From f925407a19eeb9bf5f7640143979638adce2c677 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 25 Jun 2020 20:39:12 +0300 Subject: [3.9] bpo-41069: Make TESTFN and the CWD for tests containing non-ascii characters. (GH-21035). (GH-21156) (cherry picked from commit 700cfa8c90a90016638bac13c4efd03786b2b2a0) --- Lib/test/libregrtest/main.py | 1 + Lib/test/support/__init__.py | 21 +++++++------ Lib/test/test_binhex.py | 7 +++-- Lib/test/test_cgitb.py | 10 +++--- Lib/test/test_compileall.py | 6 ++-- Lib/test/test_embed.py | 4 +-- Lib/test/test_fstring.py | 5 +-- Lib/test/test_genericpath.py | 2 +- Lib/test/test_gzip.py | 18 ++++++++--- Lib/test/test_msilib.py | 4 +-- Lib/test/test_ntpath.py | 2 +- Lib/test/test_os.py | 6 ++-- Lib/test/test_pdb.py | 11 ++++--- Lib/test/test_posixpath.py | 2 +- Lib/test/test_tarfile.py | 9 ++++-- Lib/test/test_tools/test_pathfix.py | 8 +++-- Lib/test/test_trace.py | 9 ++++-- Lib/test/test_urllib.py | 7 +++-- Lib/test/test_uu.py | 4 +-- Lib/test/test_venv.py | 4 +-- Lib/test/test_warnings/__init__.py | 6 ++-- .../Tests/2020-06-22-00-21-12.bpo-41069.bLZkX-.rst | 2 ++ Modules/_testcapimodule.c | 36 +++++++++++----------- 23 files changed, 108 insertions(+), 76 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2020-06-22-00-21-12.bpo-41069.bLZkX-.rst diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 95b4856..555a340 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -596,6 +596,7 @@ class Regrtest: test_cwd = 'test_python_worker_{}'.format(pid) else: test_cwd = 'test_python_{}'.format(pid) + test_cwd += support.FS_NONASCII test_cwd = os.path.join(self.tmp_dir, test_cwd) return test_cwd diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 1bd9f47..c0138b8 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -720,21 +720,21 @@ else: # Filename used for testing if os.name == 'java': # Jython disallows @ in module names - TESTFN = '$test' + TESTFN_ASCII = '$test' else: - TESTFN = '@test' + TESTFN_ASCII = '@test' # Disambiguate TESTFN for parallel testing, while letting it remain a valid # module name. -TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) +TESTFN_ASCII = "{}_{}_tmp".format(TESTFN_ASCII, os.getpid()) # Define the URL of a dedicated HTTP server for the network tests. # The URL must use clear-text HTTP: no redirection to encrypted HTTPS. TEST_HTTP_URL = "http://www.pythontest.net" # FS_NONASCII: non-ASCII character encodable by os.fsencode(), -# or None if there is no such character. -FS_NONASCII = None +# or an empty string if there is no such character. +FS_NONASCII = '' for character in ( # First try printable and common characters to have a readable filename. # For each character, the encoding list are just example of encodings able @@ -781,7 +781,7 @@ for character in ( break # TESTFN_UNICODE is a non-ascii filename -TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" +TESTFN_UNICODE = TESTFN_ASCII + "-\xe0\xf2\u0258\u0141\u011f" if sys.platform == 'darwin': # In Mac OS X's VFS API file names are, by definition, canonically # decomposed Unicode, encoded using UTF-8. See QA1173: @@ -799,7 +799,7 @@ if os.name == 'nt': if sys.getwindowsversion().platform >= 2: # Different kinds of characters from various languages to minimize the # probability that the whole name is encodable to MBCS (issue #9819) - TESTFN_UNENCODABLE = TESTFN + "-\u5171\u0141\u2661\u0363\uDC80" + TESTFN_UNENCODABLE = TESTFN_ASCII + "-\u5171\u0141\u2661\u0363\uDC80" try: TESTFN_UNENCODABLE.encode(TESTFN_ENCODING) except UnicodeEncodeError: @@ -816,7 +816,7 @@ elif sys.platform != 'darwin': b'\xff'.decode(TESTFN_ENCODING) except UnicodeDecodeError: # 0xff will be encoded using the surrogate character u+DCFF - TESTFN_UNENCODABLE = TESTFN \ + TESTFN_UNENCODABLE = TESTFN_ASCII \ + b'-\xff'.decode(TESTFN_ENCODING, 'surrogateescape') else: # File system encoding (eg. ISO-8859-* encodings) can encode @@ -850,13 +850,14 @@ for name in ( try: name.decode(TESTFN_ENCODING) except UnicodeDecodeError: - TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name + TESTFN_UNDECODABLE = os.fsencode(TESTFN_ASCII) + name break if FS_NONASCII: - TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII + TESTFN_NONASCII = TESTFN_ASCII + FS_NONASCII else: TESTFN_NONASCII = None +TESTFN = TESTFN_NONASCII or TESTFN_ASCII # Save the initial cwd SAVEDCWD = os.getcwd() diff --git a/Lib/test/test_binhex.py b/Lib/test/test_binhex.py index 8595532..591f32a 100644 --- a/Lib/test/test_binhex.py +++ b/Lib/test/test_binhex.py @@ -13,9 +13,10 @@ with support.check_warnings(('', DeprecationWarning)): class BinHexTestCase(unittest.TestCase): def setUp(self): - self.fname1 = support.TESTFN + "1" - self.fname2 = support.TESTFN + "2" - self.fname3 = support.TESTFN + "very_long_filename__very_long_filename__very_long_filename__very_long_filename__" + # binhex supports only file names encodable to Latin1 + self.fname1 = support.TESTFN_ASCII + "1" + self.fname2 = support.TESTFN_ASCII + "2" + self.fname3 = support.TESTFN_ASCII + "very_long_filename__very_long_filename__very_long_filename__very_long_filename__" def tearDown(self): support.unlink(self.fname1) diff --git a/Lib/test/test_cgitb.py b/Lib/test/test_cgitb.py index 8991bc1..bab152d 100644 --- a/Lib/test/test_cgitb.py +++ b/Lib/test/test_cgitb.py @@ -41,8 +41,9 @@ class TestCgitb(unittest.TestCase): rc, out, err = assert_python_failure( '-c', ('import cgitb; cgitb.enable(logdir=%s); ' - 'raise ValueError("Hello World")') % repr(tracedir)) - out = out.decode(sys.getfilesystemencoding()) + 'raise ValueError("Hello World")') % repr(tracedir), + PYTHONIOENCODING='utf-8') + out = out.decode() self.assertIn("ValueError", out) self.assertIn("Hello World", out) self.assertIn("<module>", out) @@ -56,8 +57,9 @@ class TestCgitb(unittest.TestCase): rc, out, err = assert_python_failure( '-c', ('import cgitb; cgitb.enable(format="text", logdir=%s); ' - 'raise ValueError("Hello World")') % repr(tracedir)) - out = out.decode(sys.getfilesystemencoding()) + 'raise ValueError("Hello World")') % repr(tracedir), + PYTHONIOENCODING='utf-8') + out = out.decode() self.assertIn("ValueError", out) self.assertIn("Hello World", out) self.assertNotIn('

', out) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index b4061b7..3bbc681 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -456,13 +456,15 @@ class CommandLineTestsBase: def assertRunOK(self, *args, **env_vars): rc, out, err = script_helper.assert_python_ok( - *self._get_run_args(args), **env_vars) + *self._get_run_args(args), **env_vars, + PYTHONIOENCODING='utf-8') self.assertEqual(b'', err) return out def assertRunNotOK(self, *args, **env_vars): rc, out, err = script_helper.assert_python_failure( - *self._get_run_args(args), **env_vars) + *self._get_run_args(args), **env_vars, + PYTHONIOENCODING='utf-8') return rc, out, err def assertCompiled(self, fn): diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 4d29f2c..b7b7058 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1353,7 +1353,7 @@ class AuditingTests(EmbeddingTestsMixin, unittest.TestCase): returncode=1) def test_audit_run_interactivehook(self): - startup = os.path.join(self.oldcwd, support.TESTFN) + (support.FS_NONASCII or '') + ".py" + startup = os.path.join(self.oldcwd, support.TESTFN) + ".py" with open(startup, "w", encoding="utf-8") as f: print("import sys", file=f) print("sys.__interactivehook__ = lambda: None", file=f) @@ -1366,7 +1366,7 @@ class AuditingTests(EmbeddingTestsMixin, unittest.TestCase): os.unlink(startup) def test_audit_run_startup(self): - startup = os.path.join(self.oldcwd, support.TESTFN) + (support.FS_NONASCII or '') + ".py" + startup = os.path.join(self.oldcwd, support.TESTFN) + ".py" with open(startup, "w", encoding="utf-8") as f: print("pass", file=f) try: diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index f6be64b..6d96627 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1061,8 +1061,9 @@ non-important content file_path = os.path.join(cwd, 't.py') with open(file_path, 'w') as f: f.write('f"{a b}"') # This generates a SyntaxError - _, _, stderr = assert_python_failure(file_path) - self.assertIn(file_path, stderr.decode('utf-8')) + _, _, stderr = assert_python_failure(file_path, + PYTHONIOENCODING='ascii') + self.assertIn(file_path.encode('ascii', 'backslashreplace'), stderr) def test_loop(self): for i in range(1000): diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index 9d5ac44..e7acbcd 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -534,7 +534,7 @@ class CommonTest(GenericTest): class PathLikeTests(unittest.TestCase): def setUp(self): - self.file_name = support.TESTFN.lower() + self.file_name = support.TESTFN self.file_path = FakePath(support.TESTFN) self.addCleanup(support.unlink, self.file_name) create_file(self.file_name, b"test_genericpath.PathLikeTests") diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index 7833421..0f235d1 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -328,8 +328,15 @@ class TestGzip(BaseTest): cmByte = fRead.read(1) self.assertEqual(cmByte, b'\x08') # deflate + try: + expectedname = self.filename.encode('Latin-1') + b'\x00' + expectedflags = b'\x08' # only the FNAME flag is set + except UnicodeEncodeError: + expectedname = b'' + expectedflags = b'\x00' + flagsByte = fRead.read(1) - self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set + self.assertEqual(flagsByte, expectedflags) mtimeBytes = fRead.read(4) self.assertEqual(mtimeBytes, struct.pack('