summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSerhiy Storchaka <storchaka@gmail.com>2024-11-21 11:16:08 (GMT)
committerGitHub <noreply@github.com>2024-11-21 11:16:08 (GMT)
commit4803cd0244847f286641c85591fda08b513cea52 (patch)
treee6c1a444deff5366ba31b7ca6d321401bc245032
parenteaf217108226633c03cc5c4c90f0b6e4587c8803 (diff)
downloadcpython-4803cd0244847f286641c85591fda08b513cea52.zip
cpython-4803cd0244847f286641c85591fda08b513cea52.tar.gz
cpython-4803cd0244847f286641c85591fda08b513cea52.tar.bz2
gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730)
It now returns multiple era description segments separated by semicolons. Previously it only returned the first segment on platforms with Glibc.
-rw-r--r--Doc/library/locale.rst10
-rw-r--r--Lib/test/test__locale.py45
-rw-r--r--Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst3
-rw-r--r--Modules/_localemodule.c65
4 files changed, 95 insertions, 28 deletions
diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
index f172a55..426e3a0 100644
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
.. data:: ERA
- Get a string that represents the era used in the current locale.
+ Get a string which describes how years are counted and displayed for
+ each era in a locale.
Most locales do not define this value. An example of a locale which does
define this value is the Japanese one. In Japan, the traditional
@@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
Normally it should not be necessary to use this value directly. Specifying
the ``E`` modifier in their format strings causes the :func:`time.strftime`
- function to use this information. The format of the returned string is not
- specified, and therefore you should not assume knowledge of it on different
- systems.
+ function to use this information.
+ The format of the returned string is specified in *The Open Group Base
+ Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
+ <https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
.. data:: ERA_D_T_FMT
diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py
index 7e6e296..2c75103 100644
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@@ -90,6 +90,14 @@ known_alt_digits = {
'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
}
+known_era = {
+ 'C': (0, ''),
+ 'en_US': (0, ''),
+ 'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
+ 'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
+ 'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
+}
+
if sys.platform == 'win32':
# ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
del known_numerics['ps_AF']
@@ -230,6 +238,43 @@ class _LocaleTests(unittest.TestCase):
if not tested:
self.skipTest('no suitable locales')
+ @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
+ @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
+ @unittest.skipIf(
+ support.is_emscripten or support.is_wasi,
+ "musl libc issue on Emscripten, bpo-46390"
+ )
+ def test_era_nl_langinfo(self):
+ # Test nl_langinfo(ERA)
+ tested = False
+ for loc in candidate_locales:
+ with self.subTest(locale=loc):
+ try:
+ setlocale(LC_TIME, loc)
+ except Error:
+ self.skipTest(f'no locale {loc!r}')
+ continue
+
+ with self.subTest(locale=loc):
+ era = nl_langinfo(locale.ERA)
+ self.assertIsInstance(era, str)
+ if era:
+ self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
+
+ loc1 = loc.split('.', 1)[0]
+ if loc1 in known_era:
+ count, sample = known_era[loc1]
+ if count:
+ if not era:
+ self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
+ self.assertGreaterEqual(era.count(';') + 1, count)
+ self.assertIn(sample, era)
+ else:
+ self.assertEqual(era, '')
+ tested = True
+ if not tested:
+ self.skipTest('no suitable locales')
+
def test_float_parsing(self):
# Bug #1391872: Test whether float parsing is okay on European
# locales.
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
new file mode 100644
index 0000000..7bec8a6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
@@ -0,0 +1,3 @@
+``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
+segments separated by semicolons. Previously it only returned the first
+segment on platforms with Glibc.
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
index 2a789ea..876627b 100644
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -636,6 +636,37 @@ restore_locale(char *oldloc)
}
}
+#ifdef __GLIBC__
+#if defined(ALT_DIGITS) || defined(ERA)
+static PyObject *
+decode_strings(const char *result, size_t max_count)
+{
+ /* Convert a sequence of NUL-separated C strings to a Python string
+ * containing semicolon separated items. */
+ size_t i = 0;
+ size_t count = 0;
+ for (; count < max_count && result[i]; count++) {
+ i += strlen(result + i) + 1;
+ }
+ char *buf = PyMem_Malloc(i);
+ if (buf == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ memcpy(buf, result, i);
+ /* Replace all NULs with semicolons. */
+ i = 0;
+ while (--count) {
+ i += strlen(buf + i);
+ buf[i++] = ';';
+ }
+ PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
+ PyMem_Free(buf);
+ return pyresult;
+}
+#endif
+#endif
+
/*[clinic input]
_locale.nl_langinfo
@@ -668,32 +699,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
}
PyObject *pyresult;
#ifdef __GLIBC__
+ /* According to the POSIX specification the result must be
+ * a sequence of semicolon-separated strings.
+ * But in Glibc they are NUL-separated. */
#ifdef ALT_DIGITS
if (item == ALT_DIGITS && *result) {
- /* According to the POSIX specification the result must be
- * a sequence of up to 100 semicolon-separated strings.
- * But in Glibc they are NUL-separated. */
- Py_ssize_t i = 0;
- int count = 0;
- for (; count < 100 && result[i]; count++) {
- i += strlen(result + i) + 1;
- }
- char *buf = PyMem_Malloc(i);
- if (buf == NULL) {
- PyErr_NoMemory();
- pyresult = NULL;
- }
- else {
- memcpy(buf, result, i);
- /* Replace all NULs with semicolons. */
- i = 0;
- while (--count) {
- i += strlen(buf + i);
- buf[i++] = ';';
- }
- pyresult = PyUnicode_DecodeLocale(buf, NULL);
- PyMem_Free(buf);
- }
+ pyresult = decode_strings(result, 100);
+ }
+ else
+#endif
+#ifdef ERA
+ if (item == ERA && *result) {
+ pyresult = decode_strings(result, SIZE_MAX);
}
else
#endif