From cb064fc2321ce8673fe365e9ef60445a27657f54 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 15 Jan 2018 15:58:02 +0100 Subject: bpo-31900: Fix localeconv() encoding for LC_NUMERIC (#4174) * Add _Py_GetLocaleconvNumeric() function: decode decimal_point and thousands_sep fields of localeconv() from the LC_NUMERIC encoding, rather than decoding from the LC_CTYPE encoding. * Modify locale.localeconv() and "n" formatter of str.format() (for int, float and complex to use _Py_GetLocaleconvNumeric() internally. --- Doc/library/locale.rst | 10 +++ Doc/library/stdtypes.rst | 14 ++++ Doc/whatsnew/3.7.rst | 3 + Include/fileutils.h | 5 ++ .../2017-10-30-15-55-32.bpo-31900.-S9xc4.rst | 9 +++ Modules/_localemodule.c | 37 ++++++++--- Python/fileutils.c | 77 ++++++++++++++++++++++ Python/formatter_unicode.c | 15 ++--- 8 files changed, 151 insertions(+), 19 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 7da94a2..2fd44fe 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -147,6 +147,16 @@ The :mod:`locale` module defines the following exception and functions: | ``CHAR_MAX`` | Nothing is specified in this locale. | +--------------+-----------------------------------------+ + The function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` + locale to decode ``decimal_point`` and ``thousands_sep`` byte strings if + they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` locale is + different than the ``LC_CTYPE`` locale. This temporary change affects other + threads. + + .. versionchanged:: 3.7 + The function now sets temporarily the ``LC_CTYPE`` locale to the + ``LC_NUMERIC`` locale in some cases. + .. function:: nl_langinfo(option) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index de2fb27..120b0d3 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1599,6 +1599,20 @@ expression support in the :mod:`re` module). See :ref:`formatstrings` for a description of the various formatting options that can be specified in format strings. + .. note:: + When formatting a number (:class:`int`, :class:`float`, :class:`float` + and subclasses) with the ``n`` type (ex: ``'{:n}'.format(1234)``), the + function sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` + locale to decode ``decimal_point`` and ``thousands_sep`` fields of + :c:func:`localeconv` if they are non-ASCII or longer than 1 byte, and the + ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This + temporary change affects other threads. + + .. versionchanged:: 3.7 + When formatting a number with the ``n`` type, the function sets + temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some + cases. + .. method:: str.format_map(mapping) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 1041d31..009df38 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -866,6 +866,9 @@ Changes in Python behavior Changes in the Python API ------------------------- +* The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE`` + locale to the ``LC_NUMERIC`` locale in some cases. + * The ``asyncio.windows_utils.socketpair()`` function has been removed: use directly :func:`socket.socketpair` which is available on all platforms since Python 3.5 (before, it wasn't available on Windows). diff --git a/Include/fileutils.h b/Include/fileutils.h index b4f8b11..21eefde 100644 --- a/Include/fileutils.h +++ b/Include/fileutils.h @@ -160,6 +160,11 @@ PyAPI_FUNC(int) _Py_get_blocking(int fd); PyAPI_FUNC(int) _Py_set_blocking(int fd, int blocking); #endif /* !MS_WINDOWS */ +PyAPI_FUNC(int) _Py_GetLocaleconvNumeric( + PyObject **decimal_point, + PyObject **thousands_sep, + const char **grouping); + #endif /* Py_LIMITED_API */ #ifdef __cplusplus diff --git a/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst b/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst new file mode 100644 index 0000000..2d8e3ce --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-30-15-55-32.bpo-31900.-S9xc4.rst @@ -0,0 +1,9 @@ +The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE`` +locale to the ``LC_NUMERIC`` locale to decode ``decimal_point`` and +``thousands_sep`` byte strings if they are non-ASCII or longer than 1 byte, and +the ``LC_NUMERIC`` locale is different than the ``LC_CTYPE`` locale. This +temporary change affects other threads. + +Same change for the :meth:`str.format` method when formatting a number +(:class:`int`, :class:`float`, :class:`float` and subclasses) with the ``n`` +type (ex: ``'{:n}'.format(1234)``). diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 324b694..f9eeeb7 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -139,8 +139,9 @@ PyLocale_localeconv(PyObject* self) PyObject *x; result = PyDict_New(); - if (!result) + if (!result) { return NULL; + } /* if LC_NUMERIC is different in the C library, use saved value */ l = localeconv(); @@ -171,12 +172,6 @@ PyLocale_localeconv(PyObject* self) RESULT(#i, x); \ } while (0) - /* Numeric information */ - RESULT_STRING(decimal_point); - RESULT_STRING(thousands_sep); - x = copy_grouping(l->grouping); - RESULT("grouping", x); - /* Monetary information */ RESULT_STRING(int_curr_symbol); RESULT_STRING(currency_symbol); @@ -195,10 +190,36 @@ PyLocale_localeconv(PyObject* self) RESULT_INT(n_sep_by_space); RESULT_INT(p_sign_posn); RESULT_INT(n_sign_posn); + + /* Numeric information */ + PyObject *decimal_point, *thousands_sep; + const char *grouping; + if (_Py_GetLocaleconvNumeric(&decimal_point, + &thousands_sep, + &grouping) < 0) { + goto failed; + } + + if (PyDict_SetItemString(result, "decimal_point", decimal_point) < 0) { + Py_DECREF(decimal_point); + Py_DECREF(thousands_sep); + goto failed; + } + Py_DECREF(decimal_point); + + if (PyDict_SetItemString(result, "thousands_sep", thousands_sep) < 0) { + Py_DECREF(thousands_sep); + goto failed; + } + Py_DECREF(thousands_sep); + + x = copy_grouping(grouping); + RESULT("grouping", x); + return result; failed: - Py_XDECREF(result); + Py_DECREF(result); return NULL; } diff --git a/Python/fileutils.c b/Python/fileutils.c index a50075e..9a1435c 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -1746,3 +1746,80 @@ error: return -1; } #endif + + +int +_Py_GetLocaleconvNumeric(PyObject **decimal_point, PyObject **thousands_sep, + const char **grouping) +{ + int res = -1; + + struct lconv *lc = localeconv(); + + int change_locale = 0; + if (decimal_point != NULL && + (strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) + { + change_locale = 1; + } + if (thousands_sep != NULL && + (strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) + { + change_locale = 1; + } + + /* Keep a copy of the LC_CTYPE locale */ + char *oldloc = NULL, *loc = NULL; + if (change_locale) { + oldloc = setlocale(LC_CTYPE, NULL); + if (!oldloc) { + PyErr_SetString(PyExc_RuntimeWarning, "faild to get LC_CTYPE locale"); + return -1; + } + + oldloc = _PyMem_Strdup(oldloc); + if (!oldloc) { + PyErr_NoMemory(); + return -1; + } + + loc = setlocale(LC_NUMERIC, NULL); + if (loc != NULL && strcmp(loc, oldloc) == 0) { + loc = NULL; + } + + if (loc != NULL) { + /* Only set the locale temporarilty the LC_CTYPE locale + if LC_NUMERIC locale is different than LC_CTYPE locale and + decimal_point and/or thousands_sep are non-ASCII or longer than + 1 byte */ + setlocale(LC_CTYPE, loc); + } + } + + if (decimal_point != NULL) { + *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL); + if (*decimal_point == NULL) { + goto error; + } + } + if (thousands_sep != NULL) { + *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL); + if (*thousands_sep == NULL) { + goto error; + } + } + + if (grouping != NULL) { + *grouping = lc->grouping; + } + + res = 0; + +error: + if (loc != NULL) { + setlocale(LC_CTYPE, oldloc); + } + PyMem_Free(oldloc); + return res; +} diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 397ae7f..71e673d 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -704,18 +704,11 @@ get_locale_info(enum LocaleType type, LocaleInfo *locale_info) { switch (type) { case LT_CURRENT_LOCALE: { - struct lconv *locale_data = localeconv(); - locale_info->decimal_point = PyUnicode_DecodeLocale( - locale_data->decimal_point, - NULL); - if (locale_info->decimal_point == NULL) + if (_Py_GetLocaleconvNumeric(&locale_info->decimal_point, + &locale_info->thousands_sep, + &locale_info->grouping) < 0) { return -1; - locale_info->thousands_sep = PyUnicode_DecodeLocale( - locale_data->thousands_sep, - NULL); - if (locale_info->thousands_sep == NULL) - return -1; - locale_info->grouping = locale_data->grouping; + } break; } case LT_DEFAULT_LOCALE: -- cgit v0.12