diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-10-20 22:58:25 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-10-20 22:58:25 (GMT) |
commit | f933e1ab6fdea76973384e38ea95520de422c340 (patch) | |
tree | 88a9a55449b4eb3a2167630127f2b9640f678e3e /Lib | |
parent | 073f759d65118674c4c7b82f778dde44ae22c6c9 (diff) | |
download | cpython-f933e1ab6fdea76973384e38ea95520de422c340.zip cpython-f933e1ab6fdea76973384e38ea95520de422c340.tar.gz cpython-f933e1ab6fdea76973384e38ea95520de422c340.tar.bz2 |
Issue #4388: On Mac OS X, decode command line arguments from UTF-8, instead of
the locale encoding. If the LANG (and LC_ALL and LC_CTYPE) environment variable
is not set, the locale encoding is ISO-8859-1, whereas most programs (including
Python) expect UTF-8. Python already uses UTF-8 for the filesystem encoding and
to encode command line arguments on this OS.
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_cmd_line.py | 32 |
1 files changed, 32 insertions, 0 deletions
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 73acb9f..c864cdd 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -148,6 +148,38 @@ class CmdLineTest(unittest.TestCase): if not stdout.startswith(pattern): raise AssertionError("%a doesn't start with %a" % (stdout, pattern)) + @unittest.skipUnless(sys.platform == 'darwin', 'test specific to Mac OS X') + def test_osx_utf8(self): + def check_output(text): + decoded = text.decode('utf8', 'surrogateescape') + expected = ascii(decoded).encode('ascii') + b'\n' + + env = os.environ.copy() + # C locale gives ASCII locale encoding, but Python uses UTF-8 + # to parse the command line arguments on Mac OS X + env['LC_ALL'] = 'C' + + p = subprocess.Popen( + (sys.executable, "-c", "import sys; print(ascii(sys.argv[1]))", text), + stdout=subprocess.PIPE, + env=env) + stdout, stderr = p.communicate() + self.assertEqual(stdout, expected) + self.assertEqual(p.returncode, 0) + + # test valid utf-8 + text = 'e:\xe9, euro:\u20ac, non-bmp:\U0010ffff'.encode('utf-8') + check_output(text) + + # test invalid utf-8 + text = ( + b'\xff' # invalid byte + b'\xc3\xa9' # valid utf-8 character + b'\xc3\xff' # invalid byte sequence + b'\xed\xa0\x80' # lone surrogate character (invalid) + ) + check_output(text) + def test_unbuffered_output(self): # Test expected operation of the '-u' switch for stream in ('stdout', 'stderr'): |