summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/os.rst25
-rw-r--r--Doc/whatsnew/3.2.rst7
-rw-r--r--Lib/os.py41
-rw-r--r--Lib/test/test_os.py53
-rw-r--r--Misc/NEWS3
5 files changed, 95 insertions, 34 deletions
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 9ab5fb1..8df98cf 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -155,13 +155,26 @@ process and user.
These functions are described in :ref:`os-file-dir`.
-.. function:: fsencode(value)
+.. function:: fsencode(filename)
- Encode *value* to bytes for use in the file system, environment variables or
- the command line. Use :func:`sys.getfilesystemencoding` and
- ``'surrogateescape'`` error handler for strings and return bytes unchanged.
- On Windows, use ``'strict'`` error handler for strings if the file system
- encoding is ``'mbcs'`` (which is the default encoding).
+ Encode *filename* to the filesystem encoding with ``'surrogateescape'``
+ error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
+ error handler if the filesystem encoding is ``'mbcs'`` (which is the default
+ encoding).
+
+ :func:`fsdencode` is the reverse function.
+
+ .. versionadded:: 3.2
+
+
+.. function:: fsdecode(filename)
+
+ Decode *filename* from the filesystem encoding with ``'surrogateescape'``
+ error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
+ error handler if the filesystem encoding is ``'mbcs'`` (which is the default
+ encoding).
+
+ :func:`fsencode` is the reverse function.
.. versionadded:: 3.2
diff --git a/Doc/whatsnew/3.2.rst b/Doc/whatsnew/3.2.rst
index 3927f4b..2ec93d2 100644
--- a/Doc/whatsnew/3.2.rst
+++ b/Doc/whatsnew/3.2.rst
@@ -237,13 +237,16 @@ Major performance enhancements have been added:
* Stub
-Unicode
-=======
+Filenames and unicode
+=====================
The filesystem encoding can be specified by setting the
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
+The :mod:`os` module has two new functions: :func:`os.fsencode` and
+:func:`os.fsdecode`.
+
IDLE
====
diff --git a/Lib/os.py b/Lib/os.py
index c7abc2a..60dc12f 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -402,8 +402,7 @@ def get_exec_path(env=None):
path_list = path_listb
if path_list is not None and isinstance(path_list, bytes):
- path_list = path_list.decode(sys.getfilesystemencoding(),
- 'surrogateescape')
+ path_list = fsdecode(path_list)
if path_list is None:
path_list = defpath
@@ -536,19 +535,39 @@ if supports_bytes_environ:
__all__.extend(("environb", "getenvb"))
-def fsencode(value):
- """Encode value for use in the file system, environment variables
- or the command line."""
- if isinstance(value, bytes):
- return value
- elif isinstance(value, str):
+def fsencode(filename):
+ """
+ Encode filename to the filesystem encoding with 'surrogateescape' error
+ handler, return bytes unchanged. On Windows, use 'strict' error handler if
+ the file system encoding is 'mbcs' (which is the default encoding).
+ """
+ if isinstance(filename, bytes):
+ return filename
+ elif isinstance(filename, str):
+ encoding = sys.getfilesystemencoding()
+ if encoding == 'mbcs':
+ return filename.encode(encoding)
+ else:
+ return filename.encode(encoding, 'surrogateescape')
+ else:
+ raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
+
+def fsdecode(filename):
+ """
+ Decode filename from the filesystem encoding with 'surrogateescape' error
+ handler, return str unchanged. On Windows, use 'strict' error handler if
+ the file system encoding is 'mbcs' (which is the default encoding).
+ """
+ if isinstance(filename, str):
+ return filename
+ elif isinstance(filename, bytes):
encoding = sys.getfilesystemencoding()
if encoding == 'mbcs':
- return value.encode(encoding)
+ return filename.decode(encoding)
else:
- return value.encode(encoding, 'surrogateescape')
+ return filename.decode(encoding, 'surrogateescape')
else:
- raise TypeError("expect bytes or str, not %s" % type(value).__name__)
+ raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
def _exists(name):
return name in globals()
diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py
index f56280a..cd8a1b9 100644
--- a/Lib/test/test_os.py
+++ b/Lib/test/test_os.py
@@ -897,14 +897,6 @@ if sys.platform != 'win32':
class Pep383Tests(unittest.TestCase):
def setUp(self):
- def fsdecode(filename):
- encoding = sys.getfilesystemencoding()
- if encoding == 'mbcs':
- errors = 'strict'
- else:
- errors = 'surrogateescape'
- return filename.decode(encoding, errors)
-
if support.TESTFN_UNENCODABLE:
self.dir = support.TESTFN_UNENCODABLE
else:
@@ -930,7 +922,7 @@ if sys.platform != 'win32':
for fn in bytesfn:
f = open(os.path.join(self.bdir, fn), "w")
f.close()
- fn = fsdecode(fn)
+ fn = os.fsdecode(fn)
if fn in self.unicodefn:
raise ValueError("duplicate filename")
self.unicodefn.add(fn)
@@ -1139,12 +1131,43 @@ class Win32SymlinkTests(unittest.TestCase):
self.assertNotEqual(os.lstat(link), os.stat(link))
-class MiscTests(unittest.TestCase):
+class FSEncodingTests(unittest.TestCase):
+ def test_nop(self):
+ self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
+ self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
- @unittest.skipIf(os.name == "nt", "POSIX specific test")
- def test_fsencode(self):
- self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
- self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
+ def test_identity(self):
+ # assert fsdecode(fsencode(x)) == x
+ for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
+ try:
+ bytesfn = os.fsencode(fn)
+ except UnicodeEncodeError:
+ continue
+ self.assertEquals(os.fsdecode(bytesfn), fn)
+
+ def get_output(self, fs_encoding, func):
+ env = os.environ.copy()
+ env['PYTHONIOENCODING'] = 'utf-8'
+ env['PYTHONFSENCODING'] = fs_encoding
+ code = 'import os; print(%s, end="")' % func
+ process = subprocess.Popen(
+ [sys.executable, "-c", code],
+ stdout=subprocess.PIPE, env=env)
+ stdout, stderr = process.communicate()
+ self.assertEqual(process.returncode, 0)
+ return stdout.decode('utf-8')
+
+ def test_encodings(self):
+ def check(encoding, bytesfn, unicodefn):
+ encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
+ self.assertEqual(encoded, repr(bytesfn))
+
+ decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
+ self.assertEqual(decoded, repr(unicodefn))
+
+ check('ascii', b'abc\xff', 'abc\udcff')
+ check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
+ check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
def test_main():
@@ -1163,7 +1186,7 @@ def test_main():
Pep383Tests,
Win32KillTests,
Win32SymlinkTests,
- MiscTests,
+ FSEncodingTests,
)
if __name__ == "__main__":
diff --git a/Misc/NEWS b/Misc/NEWS
index 23245a8..31fec7a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -116,6 +116,9 @@ Extensions
Library
-------
+- Create os.fsdecode(): decode from the filesystem encoding with
+ surrogateescape error handler, or strict error handler on Windows.
+
- Issue #3488: Provide convenient shorthand functions ``gzip.compress``
and ``gzip.decompress``. Original patch by Anand B. Pillai.