diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-08-13 13:02:04 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-08-13 13:02:04 (GMT) |
commit | 3d85a6fa043ddab8b289ccf5fb507242d45d2c28 (patch) | |
tree | b45dcd68116766b5b526400db854474dcb2e09fa /Lib | |
parent | 994addc414cfac472e62d813067b486298bec0be (diff) | |
download | cpython-3d85a6fa043ddab8b289ccf5fb507242d45d2c28.zip cpython-3d85a6fa043ddab8b289ccf5fb507242d45d2c28.tar.gz cpython-3d85a6fa043ddab8b289ccf5fb507242d45d2c28.tar.bz2 |
Set TESTFN_UNENCODEABLE on non-Windows OSes
* Use 0xff byte on non-Windows OSes
* mbcs is now really strict by default: i closed the issue #850997, so use the
filesystem encoding and not Latin-1
* Rename TESTFN_UNICODE_UNENCODEABLE to TESTFN_UNENCODEABLE
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/support.py | 49 | ||||
-rw-r--r-- | Lib/test/test_unicode_file.py | 15 |
2 files changed, 37 insertions, 27 deletions
diff --git a/Lib/test/support.py b/Lib/test/support.py index 0372f16..42e2acb 100644 --- a/Lib/test/support.py +++ b/Lib/test/support.py @@ -382,29 +382,38 @@ TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) # file system encoding, but *not* with the default (ascii) encoding TESTFN_UNICODE = TESTFN + "-\xe0\xf2" TESTFN_ENCODING = sys.getfilesystemencoding() -# TESTFN_UNICODE_UNENCODEABLE is a filename that should *not* be -# able to be encoded by *either* the default or filesystem encoding. -# This test really only makes sense on Windows NT platforms -# which have special Unicode support in posixmodule. -if (not hasattr(sys, "getwindowsversion") or - sys.getwindowsversion()[3] < 2): # 0=win32s or 1=9x/ME - TESTFN_UNICODE_UNENCODEABLE = None + +# TESTFN_UNENCODEABLE is a filename (str type) that should *not* be able to be +# encoded by the filesystem encoding (in strict mode). It can be None if we +# cannot generate such filename. +if os.name in ('nt', 'ce'): + if sys.getwindowsversion().platform < 2: + # win32s (0) or Windows 9x/ME (1) + TESTFN_UNENCODEABLE = None + else: + # Japanese characters (I think - from bug 846133) + TESTFN_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b" + try: + TESTFN_UNENCODEABLE.encode(TESTFN_ENCODING) + except UnicodeEncodeError: + pass + else: + print('WARNING: The filename %r CAN be encoded by the filesystem encoding (%s). ' + 'Unicode filename tests may not be effective' + % (TESTFN_UNENCODEABLE, TESTFN_ENCODING)) + TESTFN_UNENCODEABLE = None else: - # Japanese characters (I think - from bug 846133) - TESTFN_UNICODE_UNENCODEABLE = TESTFN + "-\u5171\u6709\u3055\u308c\u308b" try: - # XXX - Note - should be using TESTFN_ENCODING here - but for - # Windows, "mbcs" currently always operates as if in - # errors=ignore' mode - hence we get '?' characters rather than - # the exception. 'Latin1' operates as we expect - ie, fails. - # See [ 850997 ] mbcs encoding ignores errors - TESTFN_UNICODE_UNENCODEABLE.encode("Latin1") - except UnicodeEncodeError: - pass + # ascii and utf-8 cannot encode the byte 0xff + b'\xff'.decode(TESTFN_ENCODING) + except UnicodeDecodeError: + # 0xff will be encoded using the surrogate character u+DCFF + TESTFN_UNENCODEABLE = TESTFN_UNICODE \ + + b'-\xff'.decode(TESTFN_ENCODING, 'surrogateescape') else: - print('WARNING: The filename %r CAN be encoded by the filesystem. ' - 'Unicode filename tests may not be effective' - % TESTFN_UNICODE_UNENCODEABLE) + # File system encoding (eg. ISO-8859-* encodings) can encode + # the byte 0xff. Skip some unicode filename tests. + TESTFN_UNENCODEABLE = None # Save the initial cwd SAVEDCWD = os.getcwd() diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py index 0ff1f71..7c2a1b4 100644 --- a/Lib/test/test_unicode_file.py +++ b/Lib/test/test_unicode_file.py @@ -5,8 +5,9 @@ import os, glob, time, shutil import unicodedata import unittest -from test.support import run_unittest, TESTFN_UNICODE, rmtree -from test.support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE +from test.support import (run_unittest, rmtree, + TESTFN_ENCODING, TESTFN_UNICODE, TESTFN_UNENCODEABLE) + try: TESTFN_UNICODE.encode(TESTFN_ENCODING) except (UnicodeError, TypeError): @@ -146,8 +147,8 @@ class TestUnicodeFiles(unittest.TestCase): # _test functions with each of the filename combinations we wish to test def test_single_files(self): self._test_single(TESTFN_UNICODE) - if TESTFN_UNICODE_UNENCODEABLE is not None: - self._test_single(TESTFN_UNICODE_UNENCODEABLE) + if TESTFN_UNENCODEABLE is not None: + self._test_single(TESTFN_UNENCODEABLE) def test_directories(self): # For all 'equivalent' combinations: @@ -156,9 +157,9 @@ class TestUnicodeFiles(unittest.TestCase): ext = ".dir" self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False) # Our directory name that can't use a non-unicode name. - if TESTFN_UNICODE_UNENCODEABLE is not None: - self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext, - TESTFN_UNICODE_UNENCODEABLE+ext, + if TESTFN_UNENCODEABLE is not None: + self._do_directory(TESTFN_UNENCODEABLE+ext, + TESTFN_UNENCODEABLE+ext, False) def test_main(): |