diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-10-08 08:27:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-08 08:27:49 (GMT) |
commit | 93b9e6bd7d48150d8a5d16cea39246a980e073cb (patch) | |
tree | 1c685755350231585c12d2a4d76dc41aa314aaff /Modules/_localemodule.c | |
parent | 27390990fa9306e2a797a4eb2bd83c5bfc7cb186 (diff) | |
download | cpython-93b9e6bd7d48150d8a5d16cea39246a980e073cb.zip cpython-93b9e6bd7d48150d8a5d16cea39246a980e073cb.tar.gz cpython-93b9e6bd7d48150d8a5d16cea39246a980e073cb.tar.bz2 |
gh-69998: Fix decoding error in locale.nl_langinfo() (GH-124963)
The function now sets temporarily the LC_CTYPE locale to the locale
of the category that determines the requested value if the locales are
different and the resulting string is non-ASCII.
This temporary change affects other threads.
Diffstat (limited to 'Modules/_localemodule.c')
-rw-r--r-- | Modules/_localemodule.c | 193 |
1 files changed, 127 insertions, 66 deletions
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 9452df4..ce77c40 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -145,6 +145,17 @@ locale_is_ascii(const char *str) } static int +is_all_ascii(const char *str) +{ + for (; *str; str++) { + if ((unsigned char)*str > 127) { + return 0; + } + } + return 1; +} + +static int locale_decode_monetary(PyObject *dict, struct lconv *lc) { #ifndef MS_WINDOWS @@ -478,113 +489,153 @@ _locale__getdefaultlocale_impl(PyObject *module) #endif #ifdef HAVE_LANGINFO_H -#define LANGINFO(X) {#X, X} +#define LANGINFO(X, Y) {#X, X, Y} static struct langinfo_constant{ - char* name; + const char *name; int value; + int category; } langinfo_constants[] = { /* These constants should exist on any langinfo implementation */ - LANGINFO(DAY_1), - LANGINFO(DAY_2), - LANGINFO(DAY_3), - LANGINFO(DAY_4), - LANGINFO(DAY_5), - LANGINFO(DAY_6), - LANGINFO(DAY_7), - - LANGINFO(ABDAY_1), - LANGINFO(ABDAY_2), - LANGINFO(ABDAY_3), - LANGINFO(ABDAY_4), - LANGINFO(ABDAY_5), - LANGINFO(ABDAY_6), - LANGINFO(ABDAY_7), - - LANGINFO(MON_1), - LANGINFO(MON_2), - LANGINFO(MON_3), - LANGINFO(MON_4), - LANGINFO(MON_5), - LANGINFO(MON_6), - LANGINFO(MON_7), - LANGINFO(MON_8), - LANGINFO(MON_9), - LANGINFO(MON_10), - LANGINFO(MON_11), - LANGINFO(MON_12), - - LANGINFO(ABMON_1), - LANGINFO(ABMON_2), - LANGINFO(ABMON_3), - LANGINFO(ABMON_4), - LANGINFO(ABMON_5), - LANGINFO(ABMON_6), - LANGINFO(ABMON_7), - LANGINFO(ABMON_8), - LANGINFO(ABMON_9), - LANGINFO(ABMON_10), - LANGINFO(ABMON_11), - LANGINFO(ABMON_12), + LANGINFO(DAY_1, LC_TIME), + LANGINFO(DAY_2, LC_TIME), + LANGINFO(DAY_3, LC_TIME), + LANGINFO(DAY_4, LC_TIME), + LANGINFO(DAY_5, LC_TIME), + LANGINFO(DAY_6, LC_TIME), + LANGINFO(DAY_7, LC_TIME), + + LANGINFO(ABDAY_1, LC_TIME), + LANGINFO(ABDAY_2, LC_TIME), + LANGINFO(ABDAY_3, LC_TIME), + LANGINFO(ABDAY_4, LC_TIME), + LANGINFO(ABDAY_5, LC_TIME), + LANGINFO(ABDAY_6, LC_TIME), + LANGINFO(ABDAY_7, LC_TIME), + + LANGINFO(MON_1, LC_TIME), + LANGINFO(MON_2, LC_TIME), + LANGINFO(MON_3, LC_TIME), + LANGINFO(MON_4, LC_TIME), + LANGINFO(MON_5, LC_TIME), + LANGINFO(MON_6, LC_TIME), + LANGINFO(MON_7, LC_TIME), + LANGINFO(MON_8, LC_TIME), + LANGINFO(MON_9, LC_TIME), + LANGINFO(MON_10, LC_TIME), + LANGINFO(MON_11, LC_TIME), + LANGINFO(MON_12, LC_TIME), + + LANGINFO(ABMON_1, LC_TIME), + LANGINFO(ABMON_2, LC_TIME), + LANGINFO(ABMON_3, LC_TIME), + LANGINFO(ABMON_4, LC_TIME), + LANGINFO(ABMON_5, LC_TIME), + LANGINFO(ABMON_6, LC_TIME), + LANGINFO(ABMON_7, LC_TIME), + LANGINFO(ABMON_8, LC_TIME), + LANGINFO(ABMON_9, LC_TIME), + LANGINFO(ABMON_10, LC_TIME), + LANGINFO(ABMON_11, LC_TIME), + LANGINFO(ABMON_12, LC_TIME), #ifdef RADIXCHAR /* The following are not available with glibc 2.0 */ - LANGINFO(RADIXCHAR), - LANGINFO(THOUSEP), + LANGINFO(RADIXCHAR, LC_NUMERIC), + LANGINFO(THOUSEP, LC_NUMERIC), /* YESSTR and NOSTR are deprecated in glibc, since they are a special case of message translation, which should be rather done using gettext. So we don't expose it to Python in the first place. - LANGINFO(YESSTR), - LANGINFO(NOSTR), + LANGINFO(YESSTR, LC_MESSAGES), + LANGINFO(NOSTR, LC_MESSAGES), */ - LANGINFO(CRNCYSTR), + LANGINFO(CRNCYSTR, LC_MONETARY), #endif - LANGINFO(D_T_FMT), - LANGINFO(D_FMT), - LANGINFO(T_FMT), - LANGINFO(AM_STR), - LANGINFO(PM_STR), + LANGINFO(D_T_FMT, LC_TIME), + LANGINFO(D_FMT, LC_TIME), + LANGINFO(T_FMT, LC_TIME), + LANGINFO(AM_STR, LC_TIME), + LANGINFO(PM_STR, LC_TIME), /* The following constants are available only with XPG4, but... OpenBSD doesn't have CODESET but has T_FMT_AMPM, and doesn't have a few of the others. Solution: ifdef-test them all. */ #ifdef CODESET - LANGINFO(CODESET), + LANGINFO(CODESET, LC_CTYPE), #endif #ifdef T_FMT_AMPM - LANGINFO(T_FMT_AMPM), + LANGINFO(T_FMT_AMPM, LC_TIME), #endif #ifdef ERA - LANGINFO(ERA), + LANGINFO(ERA, LC_TIME), #endif #ifdef ERA_D_FMT - LANGINFO(ERA_D_FMT), + LANGINFO(ERA_D_FMT, LC_TIME), #endif #ifdef ERA_D_T_FMT - LANGINFO(ERA_D_T_FMT), + LANGINFO(ERA_D_T_FMT, LC_TIME), #endif #ifdef ERA_T_FMT - LANGINFO(ERA_T_FMT), + LANGINFO(ERA_T_FMT, LC_TIME), #endif #ifdef ALT_DIGITS - LANGINFO(ALT_DIGITS), + LANGINFO(ALT_DIGITS, LC_TIME), #endif #ifdef YESEXPR - LANGINFO(YESEXPR), + LANGINFO(YESEXPR, LC_MESSAGES), #endif #ifdef NOEXPR - LANGINFO(NOEXPR), + LANGINFO(NOEXPR, LC_MESSAGES), #endif #ifdef _DATE_FMT /* This is not available in all glibc versions that have CODESET. */ - LANGINFO(_DATE_FMT), + LANGINFO(_DATE_FMT, LC_TIME), #endif - {0, 0} + {0, 0, 0} }; +/* Temporary make the LC_CTYPE locale to be the same as + * the locale of the specified category. */ +static int +change_locale(int category, char **oldloc) +{ + /* Keep a copy of the LC_CTYPE locale */ + *oldloc = setlocale(LC_CTYPE, NULL); + if (!*oldloc) { + PyErr_SetString(PyExc_RuntimeError, "faild to get LC_CTYPE locale"); + return -1; + } + *oldloc = _PyMem_Strdup(*oldloc); + if (!*oldloc) { + PyErr_NoMemory(); + return -1; + } + + /* Set a new locale if it is different. */ + char *loc = setlocale(category, NULL); + if (loc == NULL || strcmp(loc, *oldloc) == 0) { + PyMem_Free(*oldloc); + *oldloc = NULL; + return 0; + } + + setlocale(LC_CTYPE, loc); + return 1; +} + +/* Restore the old LC_CTYPE locale. */ +static void +restore_locale(char *oldloc) +{ + if (oldloc != NULL) { + setlocale(LC_CTYPE, oldloc); + PyMem_Free(oldloc); + } +} + /*[clinic input] _locale.nl_langinfo @@ -602,14 +653,24 @@ _locale_nl_langinfo_impl(PyObject *module, int item) /* Check whether this is a supported constant. GNU libc sometimes returns numeric values in the char* return value, which would crash PyUnicode_FromString. */ - for (i = 0; langinfo_constants[i].name; i++) + for (i = 0; langinfo_constants[i].name; i++) { if (langinfo_constants[i].value == item) { /* Check NULL as a workaround for GNU libc's returning NULL instead of an empty string for nl_langinfo(ERA). */ const char *result = nl_langinfo(item); result = result != NULL ? result : ""; - return PyUnicode_DecodeLocale(result, NULL); + char *oldloc = NULL; + if (langinfo_constants[i].category != LC_CTYPE + && !is_all_ascii(result) + && change_locale(langinfo_constants[i].category, &oldloc) < 0) + { + return NULL; + } + PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); + restore_locale(oldloc); + return unicode; } + } PyErr_SetString(PyExc_ValueError, "unsupported langinfo constant"); return NULL; } |