summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@gmail.com>2012-11-12 00:23:15 (GMT)
committerVictor Stinner <victor.stinner@gmail.com>2012-11-12 00:23:15 (GMT)
commite667e98faad93e76b8b569d853eb02d91591f5fb (patch)
tree009075f72468af1156312858aada27dd389678f3 /Lib
parent37bfa4e7ec5df1528aa208150933a3fc54508cf9 (diff)
downloadcpython-e667e98faad93e76b8b569d853eb02d91591f5fb.zip
cpython-e667e98faad93e76b8b569d853eb02d91591f5fb.tar.gz
cpython-e667e98faad93e76b8b569d853eb02d91591f5fb.tar.bz2
Issue #16218, #16444: Backport improvment on tests for non-ASCII characters
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/support.py75
-rw-r--r--Lib/test/test_cmd_line.py6
-rw-r--r--Lib/test/test_cmd_line_script.py30
-rw-r--r--Lib/test/test_os.py4
4 files changed, 105 insertions, 10 deletions
diff --git a/Lib/test/support.py b/Lib/test/support.py
index c5640e0..d0a37ea 100644
--- a/Lib/test/support.py
+++ b/Lib/test/support.py
@@ -603,6 +603,49 @@ else:
# module name.
TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid())
+# FS_NONASCII: non-ASCII character encodable by os.fsencode(),
+# or None if there is no such character.
+FS_NONASCII = None
+for character in (
+ # First try printable and common characters to have a readable filename.
+ # For each character, the encoding list are just example of encodings able
+ # to encode the character (the list is not exhaustive).
+
+ # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
+ '\u00E6',
+ # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
+ '\u0130',
+ # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
+ '\u0141',
+ # U+03C6 (Greek Small Letter Phi): cp1253
+ '\u03C6',
+ # U+041A (Cyrillic Capital Letter Ka): cp1251
+ '\u041A',
+ # U+05D0 (Hebrew Letter Alef): Encodable to cp424
+ '\u05D0',
+ # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
+ '\u060C',
+ # U+062A (Arabic Letter Teh): cp720
+ '\u062A',
+ # U+0E01 (Thai Character Ko Kai): cp874
+ '\u0E01',
+
+ # Then try more "special" characters. "special" because they may be
+ # interpreted or displayed differently depending on the exact locale
+ # encoding and the font.
+
+ # U+00A0 (No-Break Space)
+ '\u00A0',
+ # U+20AC (Euro Sign)
+ '\u20AC',
+):
+ try:
+ os.fsdecode(os.fsencode(character))
+ except UnicodeError:
+ pass
+ else:
+ FS_NONASCII = character
+ break
# TESTFN_UNICODE is a non-ascii filename
TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f"
@@ -647,6 +690,38 @@ elif sys.platform != 'darwin':
# the byte 0xff. Skip some unicode filename tests.
pass
+# TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be
+# decoded from the filesystem encoding (in strict mode). It can be None if we
+# cannot generate such filename (ex: the latin1 encoding can decode any byte
+# sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks
+# to the surrogateescape error handler (PEP 383), but not from the filesystem
+# encoding in strict mode.
+TESTFN_UNDECODABLE = None
+for name in (
+ # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows
+ # accepts it to create a file or a directory, or don't accept to enter to
+ # such directory (when the bytes name is used). So test b'\xe7' first: it is
+ # not decodable from cp932.
+ b'\xe7w\xf0',
+ # undecodable from ASCII, UTF-8
+ b'\xff',
+ # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856
+ # and cp857
+ b'\xae\xd5'
+ # undecodable from UTF-8 (UNIX and Mac OS X)
+ b'\xed\xb2\x80', b'\xed\xb4\x80',
+):
+ try:
+ name.decode(TESTFN_ENCODING)
+ except UnicodeDecodeError:
+ TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name
+ break
+
+if FS_NONASCII:
+ TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII
+else:
+ TESTFN_NONASCII = None
+
# Save the initial cwd
SAVEDCWD = os.getcwd()
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 2b0c6e2..f617c2f 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -93,15 +93,15 @@ class CmdLineTest(unittest.TestCase):
# All good if execution is successful
assert_python_ok('-c', 'pass')
- @unittest.skipIf(sys.getfilesystemencoding() == 'ascii',
- 'need a filesystem encoding different than ASCII')
+ @unittest.skipUnless(test.support.FS_NONASCII, 'need support.FS_NONASCII')
def test_non_ascii(self):
# Test handling of non-ascii data
if test.support.verbose:
import locale
print('locale encoding = %s, filesystem encoding = %s'
% (locale.getpreferredencoding(), sys.getfilesystemencoding()))
- command = "assert(ord('\xe9') == 0xe9)"
+ command = ("assert(ord(%r) == %s)"
+ % (test.support.FS_NONASCII, ord(test.support.FS_NONASCII)))
assert_python_ok('-c', command)
# On Windows, pass bytes to subprocess doesn't test how Python decodes the
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
index 6e097e3..f066204 100644
--- a/Lib/test/test_cmd_line_script.py
+++ b/Lib/test/test_cmd_line_script.py
@@ -363,14 +363,30 @@ class CmdLineTest(unittest.TestCase):
self.assertTrue(text[1].startswith(' File '))
self.assertTrue(text[3].startswith('NameError'))
- def test_non_utf8(self):
+ def test_non_ascii(self):
+ # Mac OS X denies the creation of a file with an invalid UTF-8 name.
+ # Windows allows to create a name with an arbitrary bytes name, but
+ # Python cannot a undecodable bytes argument to a subprocess.
+ #if (support.TESTFN_UNDECODABLE
+ #and sys.platform not in ('win32', 'darwin')):
+ # name = os.fsdecode(support.TESTFN_UNDECODABLE)
+ #elif support.TESTFN_NONASCII:
+ if support.TESTFN_NONASCII:
+ name = support.TESTFN_NONASCII
+ else:
+ self.skipTest("need support.TESTFN_NONASCII")
+
# Issue #16218
- with temp_dir() as script_dir:
- script_name = _make_test_script(script_dir,
- '\udcf1\udcea\udcf0\udce8\udcef\udcf2')
- self._check_script(script_name, script_name, script_name,
- script_dir, None,
- importlib.machinery.SourceFileLoader)
+ source = 'print(ascii(__file__))\n'
+ script_name = _make_test_script(os.curdir, name, source)
+ self.addCleanup(support.unlink, script_name)
+ rc, stdout, stderr = assert_python_ok(script_name)
+ self.assertEqual(
+ ascii(script_name),
+ stdout.rstrip().decode('ascii'),
+ 'stdout=%r stderr=%r' % (stdout, stderr))
+ self.assertEqual(0, rc)
+
def test_main():
support.run_unittest(CmdLineTest)
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index 7d6b377..13a2b38 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -1243,6 +1243,8 @@ if sys.platform != 'win32':
def setUp(self):
if support.TESTFN_UNENCODABLE:
self.dir = support.TESTFN_UNENCODABLE
+ elif support.TESTFN_NONASCII:
+ self.dir = support.TESTFN_NONASCII
else:
self.dir = support.TESTFN
self.bdir = os.fsencode(self.dir)
@@ -1257,6 +1259,8 @@ if sys.platform != 'win32':
add_filename(support.TESTFN_UNICODE)
if support.TESTFN_UNENCODABLE:
add_filename(support.TESTFN_UNENCODABLE)
+ if support.TESTFN_NONASCII:
+ add_filename(support.TESTFN_NONASCII)
if not bytesfn:
self.skipTest("couldn't create any non-ascii filename")