summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <victor.stinner@haypocalc.com>2010-06-11 23:56:51 (GMT)
committerVictor Stinner <victor.stinner@haypocalc.com>2010-06-11 23:56:51 (GMT)
commit313a120ab6515f1bcddb13a9403a857078a9e474 (patch)
tree7304789294142106cef3076af6249fa611f041e3
parent0f35e2c0f44b2012e4e32aaccde6fa42756e61f1 (diff)
downloadcpython-313a120ab6515f1bcddb13a9403a857078a9e474.zip
cpython-313a120ab6515f1bcddb13a9403a857078a9e474.tar.gz
cpython-313a120ab6515f1bcddb13a9403a857078a9e474.tar.bz2
Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
-rw-r--r--Doc/library/os.rst8
-rw-r--r--Lib/os.py21
-rw-r--r--Lib/test/test_ssl.py9
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/unicodeobject.c14
5 files changed, 33 insertions, 22 deletions
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
index 970725a..7503697 100644
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -159,10 +159,10 @@ process and user.
.. function:: fsencode(value)
Encode *value* to bytes for use in the file system, environment variables or
- the command line. Uses :func:`sys.getfilesystemencoding` and
- ``'surrogateescape'`` error handler for strings and returns bytes unchanged.
-
- Availability: Unix.
+ the command line. Use :func:`sys.getfilesystemencoding` and
+ ``'surrogateescape'`` error handler for strings and return bytes unchanged.
+ On Windows, use ``'strict'`` error handler for strings if the file system
+ encoding is ``'mbcs'`` (which is the default encoding).
.. versionadded:: 3.2
diff --git a/Lib/os.py b/Lib/os.py
index 8f47137..e9d44cc6 100644
--- a/Lib/os.py
+++ b/Lib/os.py
@@ -533,16 +533,19 @@ if supports_bytes_environ:
return environb.get(key, default)
__all__.append("getenvb")
-if name != 'nt':
- def fsencode(value):
- """Encode value for use in the file system, environment variables
- or the command line."""
- if isinstance(value, bytes):
- return value
- elif isinstance(value, str):
- return value.encode(sys.getfilesystemencoding(), 'surrogateescape')
+def fsencode(value):
+ """Encode value for use in the file system, environment variables
+ or the command line."""
+ if isinstance(value, bytes):
+ return value
+ elif isinstance(value, str):
+ encoding = sys.getfilesystemencoding()
+ if encoding == 'mbcs':
+ return value.encode(encoding)
else:
- raise TypeError("expect bytes or str, not %s" % type(value).__name__)
+ return value.encode(encoding, 'surrogateescape')
+ else:
+ raise TypeError("expect bytes or str, not %s" % type(value).__name__)
def _exists(name):
return name in globals()
diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py
index c464440..9c0d263 100644
--- a/Lib/test/test_ssl.py
+++ b/Lib/test/test_ssl.py
@@ -33,16 +33,15 @@ else:
HOST = support.HOST
data_file = lambda name: os.path.join(os.path.dirname(__file__), name)
-fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape")
CERTFILE = data_file("keycert.pem")
-BYTES_CERTFILE = fsencode(CERTFILE)
+BYTES_CERTFILE = os.fsencode(CERTFILE)
ONLYCERT = data_file("ssl_cert.pem")
ONLYKEY = data_file("ssl_key.pem")
-BYTES_ONLYCERT = fsencode(ONLYCERT)
-BYTES_ONLYKEY = fsencode(ONLYKEY)
+BYTES_ONLYCERT = os.fsencode(ONLYCERT)
+BYTES_ONLYKEY = os.fsencode(ONLYKEY)
CAPATH = data_file("capath")
-BYTES_CAPATH = fsencode(CAPATH)
+BYTES_CAPATH = os.fsencode(CAPATH)
SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem")
diff --git a/Misc/NEWS b/Misc/NEWS
index 5a59310..3fd0178 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1?
Core and Builtins
-----------------
+- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
+ filenames and enable os.fsencode().
+
- Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash
the interpreter with characters outside the Basic Multilingual Plane
(higher than 0x10000).
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index de92787..8d75b20 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode,
PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode)
{
- if (Py_FileSystemDefaultEncoding)
+ if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+ if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0)
+ return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
+ PyUnicode_GET_SIZE(unicode),
+ NULL);
+#endif
return PyUnicode_AsEncodedString(unicode,
Py_FileSystemDefaultEncoding,
"surrogateescape");
- else
+ } else
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
"surrogateescape");
@@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
if (Py_FileSystemDefaultEncoding) {
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) {
- return PyUnicode_DecodeMBCS(s, size, "surrogateescape");
+ return PyUnicode_DecodeMBCS(s, size, NULL);
}
#elif defined(__APPLE__)
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) {
@@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
#endif
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
-
+
q = (unsigned char *)s;
e = q + size;