summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2020-02-05 16:39:57 (GMT)
committerGitHub <noreply@github.com>2020-02-05 16:39:57 (GMT)
commitbf305cc6f05948f264349a6a6c6fd7d49c1839d3 (patch)
tree29db3b1a8fb376e8cca2d847515acb47884136c5 /Objects
parent0e4e735d06967145b49fd00693627f3624991dbc (diff)
downloadcpython-bf305cc6f05948f264349a6a6c6fd7d49c1839d3.zip
cpython-bf305cc6f05948f264349a6a6c6fd7d49c1839d3.tar.gz
cpython-bf305cc6f05948f264349a6a6c6fd7d49c1839d3.tar.bz2
Add PyInterpreterState.fs_codec.utf8 (GH-18367)
Add a fast-path for UTF-8 encoding in PyUnicode_EncodeFSDefault() and PyUnicode_DecodeFSDefaultAndSize(). Add _PyUnicode_FiniEncodings() helper function for _PyUnicode_Fini().
Diffstat (limited to 'Objects')
-rw-r--r--Objects/unicodeobject.c93
1 files changed, 47 insertions, 46 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 5f10437..7c8bc06 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3615,39 +3615,32 @@ PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- if (interp->fs_codec.encoding) {
+ if (interp->fs_codec.utf8) {
return unicode_encode_utf8(unicode,
interp->fs_codec.error_handler,
interp->fs_codec.errors);
}
- else {
- const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
- return unicode_encode_utf8(unicode, errors, NULL);
- }
-#else
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
- cannot use it to encode and decode filenames before it is loaded. Load
- the Python codec requires to encode at least its own filename. Use the C
- implementation of the locale codec until the codec registry is
- initialized and the Python codec is loaded.
- See _PyUnicode_InitEncodings(). */
- if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (interp->fs_codec.encoding) {
return PyUnicode_AsEncodedString(unicode,
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
+#endif
else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use wcstombs() in this case. */
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_encode_utf8(unicode, errors, NULL);
+#else
return unicode_encode_locale(unicode, errors, 0);
- }
#endif
+ }
}
PyObject *
@@ -3857,39 +3850,33 @@ PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
-#ifdef _Py_FORCE_UTF8_FS_ENCODING
- if (interp->fs_codec.encoding) {
+ if (interp->fs_codec.utf8) {
return unicode_decode_utf8(s, size,
interp->fs_codec.error_handler,
interp->fs_codec.errors,
NULL);
}
- else {
- const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
- assert(errors != _Py_ERROR_UNKNOWN);
- return unicode_decode_utf8(s, size, errors, NULL, NULL);
- }
-#else
- /* Bootstrap check: if the filesystem codec is implemented in Python, we
- cannot use it to encode and decode filenames before it is loaded. Load
- the Python codec requires to encode at least its own filename. Use the C
- implementation of the locale codec until the codec registry is
- initialized and the Python codec is loaded.
- See _PyUnicode_InitEncodings(). */
- if (interp->fs_codec.encoding) {
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+ else if (interp->fs_codec.encoding) {
return PyUnicode_Decode(s, size,
interp->fs_codec.encoding,
interp->fs_codec.errors);
}
+#endif
else {
+ /* Before _PyUnicode_InitEncodings() is called, the Python codec
+ machinery is not ready and so cannot be used:
+ use mbstowcs() in this case. */
const wchar_t *filesystem_errors = interp->config.filesystem_errors;
- _Py_error_handler errors;
- errors = get_error_handler_wide(filesystem_errors);
+ assert(filesystem_errors != NULL);
+ _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+ assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ return unicode_decode_utf8(s, size, errors, NULL, NULL);
+#else
return unicode_decode_locale(s, size, errors, 0);
- }
#endif
+ }
}
@@ -15849,10 +15836,16 @@ init_fs_codec(PyInterpreterState *interp)
PyMem_RawFree(interp->fs_codec.encoding);
interp->fs_codec.encoding = encoding;
+ /* encoding has been normalized by init_fs_encoding() */
+ interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0);
PyMem_RawFree(interp->fs_codec.errors);
interp->fs_codec.errors = errors;
interp->fs_codec.error_handler = error_handler;
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+ assert(interp->fs_codec.utf8 == 1);
+#endif
+
/* At this point, PyUnicode_EncodeFSDefault() and
PyUnicode_DecodeFSDefault() can now use the Python codec rather than
the C implementation of the filesystem encoding. */
@@ -15902,6 +15895,19 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
}
+static void
+_PyUnicode_FiniEncodings(PyThreadState *tstate)
+{
+ PyInterpreterState *interp = tstate->interp;
+ PyMem_RawFree(interp->fs_codec.encoding);
+ interp->fs_codec.encoding = NULL;
+ interp->fs_codec.utf8 = 0;
+ PyMem_RawFree(interp->fs_codec.errors);
+ interp->fs_codec.errors = NULL;
+ interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
+}
+
+
#ifdef MS_WINDOWS
int
_PyUnicode_EnableLegacyWindowsFSEncoding(void)
@@ -15954,12 +15960,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
_PyUnicode_ClearStaticStrings();
}
- PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
- PyMem_RawFree(interp->fs_codec.encoding);
- interp->fs_codec.encoding = NULL;
- PyMem_RawFree(interp->fs_codec.errors);
- interp->fs_codec.errors = NULL;
- interp->config.filesystem_errors = (wchar_t *)_Py_ERROR_UNKNOWN;
+ _PyUnicode_FiniEncodings(tstate);
}