diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-08-19 01:05:19 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-08-19 01:05:19 (GMT) |
commit | e8d5145e18318e0b5003371d4d666c4e445f610e (patch) | |
tree | 4777af9b6fd7369e380fb79c5ec0dd7cbed859ee /Lib | |
parent | dbe6042f0a5b8c193efbd75cab0733bbadad4efd (diff) | |
download | cpython-e8d5145e18318e0b5003371d4d666c4e445f610e.zip cpython-e8d5145e18318e0b5003371d4d666c4e445f610e.tar.gz cpython-e8d5145e18318e0b5003371d4d666c4e445f610e.tar.bz2 |
Create os.fsdecode(): decode from the filesystem encoding with surrogateescape
error handler, or strict error handler on Windows.
* Rewrite os.fsencode() documentation
* Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING
environment variable
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/os.py | 41 | ||||
-rw-r--r-- | Lib/test/test_os.py | 53 |
2 files changed, 68 insertions, 26 deletions
@@ -402,8 +402,7 @@ def get_exec_path(env=None): path_list = path_listb if path_list is not None and isinstance(path_list, bytes): - path_list = path_list.decode(sys.getfilesystemencoding(), - 'surrogateescape') + path_list = fsdecode(path_list) if path_list is None: path_list = defpath @@ -536,19 +535,39 @@ if supports_bytes_environ: __all__.extend(("environb", "getenvb")) -def fsencode(value): - """Encode value for use in the file system, environment variables - or the command line.""" - if isinstance(value, bytes): - return value - elif isinstance(value, str): +def fsencode(filename): + """ + Encode filename to the filesystem encoding with 'surrogateescape' error + handler, return bytes unchanged. On Windows, use 'strict' error handler if + the file system encoding is 'mbcs' (which is the default encoding). + """ + if isinstance(filename, bytes): + return filename + elif isinstance(filename, str): + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + return filename.encode(encoding) + else: + return filename.encode(encoding, 'surrogateescape') + else: + raise TypeError("expect bytes or str, not %s" % type(filename).__name__) + +def fsdecode(filename): + """ + Decode filename from the filesystem encoding with 'surrogateescape' error + handler, return str unchanged. On Windows, use 'strict' error handler if + the file system encoding is 'mbcs' (which is the default encoding). + """ + if isinstance(filename, str): + return filename + elif isinstance(filename, bytes): encoding = sys.getfilesystemencoding() if encoding == 'mbcs': - return value.encode(encoding) + return filename.decode(encoding) else: - return value.encode(encoding, 'surrogateescape') + return filename.decode(encoding, 'surrogateescape') else: - raise TypeError("expect bytes or str, not %s" % type(value).__name__) + raise TypeError("expect bytes or str, not %s" % type(filename).__name__) def _exists(name): return name in globals() diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index f56280a..cd8a1b9 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -897,14 +897,6 @@ if sys.platform != 'win32': class Pep383Tests(unittest.TestCase): def setUp(self): - def fsdecode(filename): - encoding = sys.getfilesystemencoding() - if encoding == 'mbcs': - errors = 'strict' - else: - errors = 'surrogateescape' - return filename.decode(encoding, errors) - if support.TESTFN_UNENCODABLE: self.dir = support.TESTFN_UNENCODABLE else: @@ -930,7 +922,7 @@ if sys.platform != 'win32': for fn in bytesfn: f = open(os.path.join(self.bdir, fn), "w") f.close() - fn = fsdecode(fn) + fn = os.fsdecode(fn) if fn in self.unicodefn: raise ValueError("duplicate filename") self.unicodefn.add(fn) @@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase): self.assertNotEqual(os.lstat(link), os.stat(link)) -class MiscTests(unittest.TestCase): +class FSEncodingTests(unittest.TestCase): + def test_nop(self): + self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff') + self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141') - @unittest.skipIf(os.name == "nt", "POSIX specific test") - def test_fsencode(self): - self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff') - self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff') + def test_identity(self): + # assert fsdecode(fsencode(x)) == x + for fn in ('unicode\u0141', 'latin\xe9', 'ascii'): + try: + bytesfn = os.fsencode(fn) + except UnicodeEncodeError: + continue + self.assertEquals(os.fsdecode(bytesfn), fn) + + def get_output(self, fs_encoding, func): + env = os.environ.copy() + env['PYTHONIOENCODING'] = 'utf-8' + env['PYTHONFSENCODING'] = fs_encoding + code = 'import os; print(%s, end="")' % func + process = subprocess.Popen( + [sys.executable, "-c", code], + stdout=subprocess.PIPE, env=env) + stdout, stderr = process.communicate() + self.assertEqual(process.returncode, 0) + return stdout.decode('utf-8') + + def test_encodings(self): + def check(encoding, bytesfn, unicodefn): + encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn) + self.assertEqual(encoded, repr(bytesfn)) + + decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn) + self.assertEqual(decoded, repr(unicodefn)) + + check('ascii', b'abc\xff', 'abc\udcff') + check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80') + check('iso-8859-15', b'\xef\xa4', '\xef\u20ac') def test_main(): @@ -1163,7 +1186,7 @@ def test_main(): Pep383Tests, Win32KillTests, Win32SymlinkTests, - MiscTests, + FSEncodingTests, ) if __name__ == "__main__": |