From 7284e0ef84e53f80b2e60c3f51e3467d67a275f3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 14 Oct 2023 08:50:03 +0300 Subject: gh-110815: Support non-ASCII keyword names in PyArg_ParseTupleAndKeywords() (GH-110816) It already mostly worked, except in the case when invalid keyword argument with non-ASCII name was passed to function with non-ASCII parameter names. Then it crashed in the debug mode. --- Doc/c-api/arg.rst | 9 +++- Doc/whatsnew/3.13.rst | 4 ++ Lib/test/test_capi/test_getargs.py | 51 ++++++++++++++++++++++ .../2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst | 1 + Python/getargs.c | 2 +- 5 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index c43dd0f..62d87d8 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -416,8 +416,10 @@ API Functions .. c:function:: int PyArg_ParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], ...) Parse the parameters of a function that takes both positional and keyword - parameters into local variables. The *keywords* argument is a - ``NULL``-terminated array of keyword parameter names. Empty names denote + parameters into local variables. + The *keywords* argument is a ``NULL``-terminated array of keyword parameter + names specified as null-terminated ASCII or UTF-8 encoded C strings. + Empty names denote :ref:`positional-only parameters `. Returns true on success; on failure, it returns false and raises the appropriate exception. @@ -426,6 +428,9 @@ API Functions Added support for :ref:`positional-only parameters `. + .. versionchanged:: 3.13 + Added support for non-ASCII keyword parameter names. + .. c:function:: int PyArg_VaParseTupleAndKeywords(PyObject *args, PyObject *kw, const char *format, char *keywords[], va_list vargs) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index dfce976..eb49e01 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1045,6 +1045,10 @@ New Features but pass event arguments as a Python :class:`tuple` object. (Contributed by Victor Stinner in :gh:`85283`.) +* :c:func:`PyArg_ParseTupleAndKeywords` now supports non-ASCII keyword + parameter names. + (Contributed by Serhiy Storchaka in :gh:`110815`.) + Porting to Python 3.13 ---------------------- diff --git a/Lib/test/test_capi/test_getargs.py b/Lib/test/test_capi/test_getargs.py index 7fc25f8..96d34ab 100644 --- a/Lib/test/test_capi/test_getargs.py +++ b/Lib/test/test_capi/test_getargs.py @@ -1235,6 +1235,57 @@ class ParseTupleAndKeywords_Test(unittest.TestCase): with self.assertRaisesRegex(SystemError, 'Empty keyword'): parse((1,), {}, 'O|OO', ['', 'a', '']) + def test_nonascii_keywords(self): + parse = _testcapi.parse_tuple_and_keywords + + for name in ('a', 'ä', 'ŷ', '㷷', '𐀀'): + with self.subTest(name=name): + self.assertEqual(parse((), {name: 1}, 'O', [name]), (1,)) + self.assertEqual(parse((), {}, '|O', [name]), (NULL,)) + with self.assertRaisesRegex(TypeError, + f"function missing required argument '{name}'"): + parse((), {}, 'O', [name]) + with self.assertRaisesRegex(TypeError, + fr"argument for function given by name \('{name}'\) " + fr"and position \(1\)"): + parse((1,), {name: 2}, 'O|O', [name, 'b']) + with self.assertRaisesRegex(TypeError, + f"'{name}' is an invalid keyword argument"): + parse((), {name: 1}, '|O', ['b']) + with self.assertRaisesRegex(TypeError, + "'b' is an invalid keyword argument"): + parse((), {'b': 1}, '|O', [name]) + + invalid = name.encode() + (name.encode()[:-1] or b'\x80') + self.assertEqual(parse((), {}, '|O', [invalid]), (NULL,)) + self.assertEqual(parse((1,), {'b': 2}, 'O|O', [invalid, 'b']), + (1, 2)) + with self.assertRaisesRegex(TypeError, + f"function missing required argument '{name}\ufffd'"): + parse((), {}, 'O', [invalid]) + with self.assertRaisesRegex(UnicodeDecodeError, + f"'utf-8' codec can't decode bytes? "): + parse((), {'b': 1}, '|OO', [invalid, 'b']) + with self.assertRaisesRegex(UnicodeDecodeError, + f"'utf-8' codec can't decode bytes? "): + parse((), {'b': 1}, '|O', [invalid]) + + for name2 in ('b', 'ë', 'ĉ', 'Ɐ', '𐀁'): + with self.subTest(name2=name2): + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1}, '|O', [name]) + + name2 = name.encode().decode('latin1') + if name2 != name: + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1}, '|O', [name]) + name3 = name + '3' + with self.assertRaisesRegex(TypeError, + f"'{name2}' is an invalid keyword argument"): + parse((), {name2: 1, name3: 2}, '|OO', [name, name3]) + class Test_testcapi(unittest.TestCase): locals().update((name, getattr(_testcapi, name)) diff --git a/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst b/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst new file mode 100644 index 0000000..216d2d2 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-13-14-18-06.gh-issue-110815.tEFLVl.rst @@ -0,0 +1 @@ +Support non-ASCII keyword names in :c:func:`PyArg_ParseTupleAndKeywords`. diff --git a/Python/getargs.c b/Python/getargs.c index d590e2e..a0eef2c 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -1729,7 +1729,7 @@ vgetargskeywords(PyObject *args, PyObject *kwargs, const char *format, return cleanreturn(0, &freelist); } for (i = pos; i < len; i++) { - if (_PyUnicode_EqualToASCIIString(key, kwlist[i])) { + if (PyUnicode_EqualToUTF8(key, kwlist[i])) { match = 1; break; } -- cgit v0.12