From 9ff7b4af137b8028b04b52addf003c4b0607113b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 2 Aug 2023 14:40:23 +0200 Subject: gh-107559: Argument Clinic: complain about non-ASCII chars in param docstrings (#107560) Previously, only function docstrings were checked for non-ASCII characters. Also, improve the warn() message. Co-authored-by: Alex Waygood --- Lib/test/test_clinic.py | 19 +++++++++++++++++++ Tools/clinic/clinic.py | 8 +++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index 6bdc571..6f53036 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1427,6 +1427,25 @@ Couldn't find existing function 'fooooooooooooooooooooooo'! actual = stdout.getvalue() self.assertEqual(actual, expected) + def test_non_ascii_character_in_docstring(self): + block = """ + module test + test.fn + a: int + á param docstring + docstring fü bár baß + """ + with support.captured_stdout() as stdout: + self.parse(block) + # The line numbers are off; this is a known limitation. + expected = dedent("""\ + Warning on line 0: + Non-ascii characters are not allowed in docstrings: 'á' + Warning on line 0: + Non-ascii characters are not allowed in docstrings: 'ü', 'á', 'ß' + """) + self.assertEqual(stdout.getvalue(), expected) + class ClinicExternalTest(TestCase): maxDiff = None diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 5f7d41e..1f46166 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -785,9 +785,6 @@ class CLanguage(Language): self, f: Function ) -> str: - if re.search(r'[^\x00-\x7F]', f.docstring): - warn("Non-ascii character appear in docstring.") - text, add, output = _text_accumulator() # turn docstring into a properly quoted C string for line in f.docstring.split('\n'): @@ -5266,6 +5263,11 @@ class DSLParser: def docstring_append(self, obj: Function | Parameter, line: str) -> None: """Add a rstripped line to the current docstring.""" + matches = re.finditer(r'[^\x00-\x7F]', line) + if offending := ", ".join([repr(m[0]) for m in matches]): + warn("Non-ascii characters are not allowed in docstrings:", + offending) + docstring = obj.docstring if docstring: docstring += "\n" -- cgit v0.12