summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/pydoc.py39
-rw-r--r--Lib/test/test_pydoc/test_pydoc.py77
-rw-r--r--Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst3
3 files changed, 104 insertions, 15 deletions
diff --git a/Lib/pydoc.py b/Lib/pydoc.py
index c863794..9e84292 100644
--- a/Lib/pydoc.py
+++ b/Lib/pydoc.py
@@ -53,6 +53,7 @@ Richard Chamberlain, for the first implementation of textdoc.
# the current directory is changed with os.chdir(), an incorrect
# path will be displayed.
+import ast
import __future__
import builtins
import importlib._bootstrap
@@ -384,21 +385,29 @@ def ispackage(path):
return False
def source_synopsis(file):
- line = file.readline()
- while line[:1] == '#' or not line.strip():
- line = file.readline()
- if not line: break
- line = line.strip()
- if line[:4] == 'r"""': line = line[1:]
- if line[:3] == '"""':
- line = line[3:]
- if line[-1:] == '\\': line = line[:-1]
- while not line.strip():
- line = file.readline()
- if not line: break
- result = line.split('"""')[0].strip()
- else: result = None
- return result
+ """Return the one-line summary of a file object, if present"""
+
+ string = ''
+ try:
+ tokens = tokenize.generate_tokens(file.readline)
+ for tok_type, tok_string, _, _, _ in tokens:
+ if tok_type == tokenize.STRING:
+ string += tok_string
+ elif tok_type == tokenize.NEWLINE:
+ with warnings.catch_warnings():
+ # Ignore the "invalid escape sequence" warning.
+ warnings.simplefilter("ignore", SyntaxWarning)
+ docstring = ast.literal_eval(string)
+ if not isinstance(docstring, str):
+ return None
+ return docstring.strip().split('\n')[0].strip()
+ elif tok_type == tokenize.OP and tok_string in ('(', ')'):
+ string += tok_string
+ elif tok_type not in (tokenize.COMMENT, tokenize.NL, tokenize.ENCODING):
+ return None
+ except (tokenize.TokenError, UnicodeDecodeError, SyntaxError):
+ return None
+ return None
def synopsis(filename, cache={}):
"""Get the one-line summary out of a module file."""
diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py
index c798b11..cec18aa 100644
--- a/Lib/test/test_pydoc/test_pydoc.py
+++ b/Lib/test/test_pydoc/test_pydoc.py
@@ -4,6 +4,7 @@ import sys
import contextlib
import importlib.util
import inspect
+import io
import pydoc
import py_compile
import keyword
@@ -899,6 +900,82 @@ class PydocDocTest(unittest.TestCase):
synopsis = pydoc.synopsis(TESTFN, {})
self.assertEqual(synopsis, 'line 1: h\xe9')
+ def test_source_synopsis(self):
+ def check(source, expected, encoding=None):
+ if isinstance(source, str):
+ source_file = StringIO(source)
+ else:
+ source_file = io.TextIOWrapper(io.BytesIO(source), encoding=encoding)
+ with source_file:
+ result = pydoc.source_synopsis(source_file)
+ self.assertEqual(result, expected)
+
+ check('"""Single line docstring."""',
+ 'Single line docstring.')
+ check('"""First line of docstring.\nSecond line.\nThird line."""',
+ 'First line of docstring.')
+ check('"""First line of docstring.\\nSecond line.\\nThird line."""',
+ 'First line of docstring.')
+ check('""" Whitespace around docstring. """',
+ 'Whitespace around docstring.')
+ check('import sys\n"""No docstring"""',
+ None)
+ check(' \n"""Docstring after empty line."""',
+ 'Docstring after empty line.')
+ check('# Comment\n"""Docstring after comment."""',
+ 'Docstring after comment.')
+ check(' # Indented comment\n"""Docstring after comment."""',
+ 'Docstring after comment.')
+ check('""""""', # Empty docstring
+ '')
+ check('', # Empty file
+ None)
+ check('"""Embedded\0null byte"""',
+ None)
+ check('"""Embedded null byte"""\0',
+ None)
+ check('"""Café and résumé."""',
+ 'Café and résumé.')
+ check("'''Triple single quotes'''",
+ 'Triple single quotes')
+ check('"Single double quotes"',
+ 'Single double quotes')
+ check("'Single single quotes'",
+ 'Single single quotes')
+ check('"""split\\\nline"""',
+ 'splitline')
+ check('"""Unrecognized escape \\sequence"""',
+ 'Unrecognized escape \\sequence')
+ check('"""Invalid escape seq\\uence"""',
+ None)
+ check('r"""Raw \\stri\\ng"""',
+ 'Raw \\stri\\ng')
+ check('b"""Bytes literal"""',
+ None)
+ check('f"""f-string"""',
+ None)
+ check('"""Concatenated""" \\\n"string" \'literals\'',
+ 'Concatenatedstringliterals')
+ check('"""String""" + """expression"""',
+ None)
+ check('("""In parentheses""")',
+ 'In parentheses')
+ check('("""Multiple lines """\n"""in parentheses""")',
+ 'Multiple lines in parentheses')
+ check('()', # tuple
+ None)
+ check(b'# coding: iso-8859-15\n"""\xa4uro sign"""',
+ '€uro sign', encoding='iso-8859-15')
+ check(b'"""\xa4"""', # Decoding error
+ None, encoding='utf-8')
+
+ with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8') as temp_file:
+ temp_file.write('"""Real file test."""\n')
+ temp_file.flush()
+ temp_file.seek(0)
+ result = pydoc.source_synopsis(temp_file)
+ self.assertEqual(result, "Real file test.")
+
@requires_docstrings
def test_synopsis_sourceless(self):
os = import_helper.import_fresh_module('os')
diff --git a/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst b/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
new file mode 100644
index 0000000..b807dcb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-12-17-15-23-40.gh-issue-41872.31LjKY.rst
@@ -0,0 +1,3 @@
+Fix quick extraction of module docstrings from a file in :mod:`pydoc`.
+It now supports docstrings with single quotes, escape sequences,
+raw string literals, and other Python syntax.