summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Include/internal/pycore_pystate.h1
-rw-r--r--Objects/unicodeobject.c93
2 files changed, 48 insertions, 46 deletions
diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h
index b78ed69..405efb9 100644
--- a/Include/internal/pycore_pystate.h
+++ b/Include/internal/pycore_pystate.h
@@ -102,6 +102,7 @@ struct _is {
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct {
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
+ int utf8; /* encoding=="utf-8"? */
char *errors; /* Filesystem errors (encoded to UTF-8) */
_Py_error_handler error_handler;
} fs_codec;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5f10437..7c8bc06 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3615,39 +3615,32 @@ PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- if (interp->fs_codec.encoding) {
+ if (interp->fs_codec.utf8) {
return unicode_encode_utf8(unicode,
interp->fs_codec.error_handler,
interp->fs_codec.errors);
}
- else {
- const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
- return unicode_encode_utf8(unicode, errors, NULL);
- }
-#else
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
- cannot use it to encode and decode filenames before it is loaded. Load
- the Python codec requires to encode at least its own filename. Use the C
- implementation of the locale codec until the codec registry is
- initialized and the Python codec is loaded.
- See _PyUnicode_InitEncodings(). */
- if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (interp->fs_codec.encoding) {
return PyUnicode_AsEncodedString(unicode,
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
+#endif
else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use wcstombs() in this case. */
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_encode_utf8(unicode, errors, NULL);
+#else
return unicode_encode_locale(unicode, errors, 0);
- }
#endif
+ }
}
PyObject *
@@ -3857,39 +3850,33 @@ PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- if (interp->fs_codec.encoding) {
+ if (interp->fs_codec.utf8) {
return unicode_decode_utf8(s, size,
interp->fs_codec.error_handler,
interp->fs_codec.errors,
NULL);
}
- else {
- const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
- return unicode_decode_utf8(s, size, errors, NULL, NULL);
- }
-#else
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
- cannot use it to encode and decode filenames before it is loaded. Load
- the Python codec requires to encode at least its own filename. Use the C
- implementation of the locale codec until the codec registry is
- initialized and the Python codec is loaded.
- See _PyUnicode_InitEncodings(). */
- if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (interp->fs_codec.encoding) {
return PyUnicode_Decode(s, size,
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
+#endif
else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use mbstowcs() in this case. */
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_decode_utf8(s, size, errors, NULL, NULL);
+#else
return unicode_decode_locale(s, size, errors, 0);
- }
#endif
+ }
}
@@ -15849,10 +15836,16 @@ init_fs_codec(PyInterpreterState *interp)
PyMem_RawFree(interp->fs_codec.encoding);
interp->fs_codec.encoding = encoding;
+ /* encoding has been normalized by init_fs_encoding() */
+ interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
PyMem_RawFree(interp->fs_codec.errors);
interp->fs_codec.errors = errors;
interp->fs_codec.error_handler = error_handler;
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ assert(interp->fs_codec.utf8 == 1);
+#endif
+
/* At this point, PyUnicode_EncodeFSDefault() and
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
the C implementation of the filesystem encoding. */
@@ -15902,6 +15895,19 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
}
+static void
+_PyUnicode_FiniEncodings(PyThreadState *tstate)
+{
+ PyInterpreterState *interp = tstate->interp;
+ PyMem_RawFree(interp->fs_codec.encoding);
+ interp->fs_codec.encoding = NULL;
+ interp->fs_codec.utf8 = 0;
+ PyMem_RawFree(interp->fs_codec.errors);
+ interp->fs_codec.errors = NULL;
+ interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
+}
+
+
#ifdef MS_WINDOWS
int
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
@@ -15954,12 +15960,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
_PyUnicode_ClearStaticStrings();
}
- PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
- PyMem_RawFree(interp->fs_codec.encoding);
- interp->fs_codec.encoding = NULL;
- PyMem_RawFree(interp->fs_codec.errors);
- interp->fs_codec.errors = NULL;
- interp->config.filesystem_errors = (wchar_t *)_Py_ERROR_UNKNOWN;
+ _PyUnicode_FiniEncodings(tstate);
}