diff options
author | Victor Stinner <victor.stinner@haypocalc.com> | 2010-06-11 23:56:51 (GMT) |
---|---|---|
committer | Victor Stinner <victor.stinner@haypocalc.com> | 2010-06-11 23:56:51 (GMT) |
commit | 313a120ab6515f1bcddb13a9403a857078a9e474 (patch) | |
tree | 7304789294142106cef3076af6249fa611f041e3 | |
parent | 0f35e2c0f44b2012e4e32aaccde6fa42756e61f1 (diff) | |
download | cpython-313a120ab6515f1bcddb13a9403a857078a9e474.zip cpython-313a120ab6515f1bcddb13a9403a857078a9e474.tar.gz cpython-313a120ab6515f1bcddb13a9403a857078a9e474.tar.bz2 |
Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode
filenames and enable os.fsencode().
-rw-r--r-- | Doc/library/os.rst | 8 | ||||
-rw-r--r-- | Lib/os.py | 21 | ||||
-rw-r--r-- | Lib/test/test_ssl.py | 9 | ||||
-rw-r--r-- | Misc/NEWS | 3 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 14 |
5 files changed, 33 insertions, 22 deletions
diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 970725a..7503697 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -159,10 +159,10 @@ process and user. .. function:: fsencode(value) Encode *value* to bytes for use in the file system, environment variables or - the command line. Uses :func:`sys.getfilesystemencoding` and - ``'surrogateescape'`` error handler for strings and returns bytes unchanged. - - Availability: Unix. + the command line. Use :func:`sys.getfilesystemencoding` and + ``'surrogateescape'`` error handler for strings and return bytes unchanged. + On Windows, use ``'strict'`` error handler for strings if the file system + encoding is ``'mbcs'`` (which is the default encoding). .. versionadded:: 3.2 @@ -533,16 +533,19 @@ if supports_bytes_environ: return environb.get(key, default) __all__.append("getenvb") -if name != 'nt': - def fsencode(value): - """Encode value for use in the file system, environment variables - or the command line.""" - if isinstance(value, bytes): - return value - elif isinstance(value, str): - return value.encode(sys.getfilesystemencoding(), 'surrogateescape') +def fsencode(value): + """Encode value for use in the file system, environment variables + or the command line.""" + if isinstance(value, bytes): + return value + elif isinstance(value, str): + encoding = sys.getfilesystemencoding() + if encoding == 'mbcs': + return value.encode(encoding) else: - raise TypeError("expect bytes or str, not %s" % type(value).__name__) + return value.encode(encoding, 'surrogateescape') + else: + raise TypeError("expect bytes or str, not %s" % type(value).__name__) def _exists(name): return name in globals() diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index c464440..9c0d263 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -33,16 +33,15 @@ else: HOST = support.HOST data_file = lambda name: os.path.join(os.path.dirname(__file__), name) -fsencode = lambda name: name.encode(sys.getfilesystemencoding(), "surrogateescape") CERTFILE = data_file("keycert.pem") -BYTES_CERTFILE = fsencode(CERTFILE) +BYTES_CERTFILE = os.fsencode(CERTFILE) ONLYCERT = data_file("ssl_cert.pem") ONLYKEY = data_file("ssl_key.pem") -BYTES_ONLYCERT = fsencode(ONLYCERT) -BYTES_ONLYKEY = fsencode(ONLYKEY) +BYTES_ONLYCERT = os.fsencode(ONLYCERT) +BYTES_ONLYKEY = os.fsencode(ONLYKEY) CAPATH = data_file("capath") -BYTES_CAPATH = fsencode(CAPATH) +BYTES_CAPATH = os.fsencode(CAPATH) SVN_PYTHON_ORG_ROOT_CERT = data_file("https_svn_python_org_root.pem") @@ -12,6 +12,9 @@ What's New in Python 3.2 Alpha 1? Core and Builtins ----------------- +- Issue #8969: On Windows, use mbcs codec in strict mode to encode and decode + filenames and enable os.fsencode(). + - Issue #8941: decoding big endian UTF-32 data in UCS-2 builds could crash the interpreter with characters outside the Basic Multilingual Plane (higher than 0x10000). diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index de92787..8d75b20 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1478,11 +1478,17 @@ PyObject *PyUnicode_AsEncodedObject(PyObject *unicode, PyObject *PyUnicode_EncodeFSDefault(PyObject *unicode) { - if (Py_FileSystemDefaultEncoding) + if (Py_FileSystemDefaultEncoding) { +#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) + if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) + return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + NULL); +#endif return PyUnicode_AsEncodedString(unicode, Py_FileSystemDefaultEncoding, "surrogateescape"); - else + } else return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), PyUnicode_GET_SIZE(unicode), "surrogateescape"); @@ -1639,7 +1645,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) if (Py_FileSystemDefaultEncoding) { #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) if (strcmp(Py_FileSystemDefaultEncoding, "mbcs") == 0) { - return PyUnicode_DecodeMBCS(s, size, "surrogateescape"); + return PyUnicode_DecodeMBCS(s, size, NULL); } #elif defined(__APPLE__) if (strcmp(Py_FileSystemDefaultEncoding, "utf-8") == 0) { @@ -2745,7 +2751,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, #endif PyObject *errorHandler = NULL; PyObject *exc = NULL; - + q = (unsigned char *)s; e = q + size; |