diff options
author | Tian Gao <gaogaotiantian@hotmail.com> | 2023-04-24 05:03:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-24 05:03:49 (GMT) |
commit | 36860134a9eda8df5af5a38d6c7533437c594c2f (patch) | |
tree | 6b024114fd9f35c525f59add5502bdef45f90051 /Lib/ast.py | |
parent | f0ed293f6aec1c2ed22725301b77d6ccedc2d486 (diff) | |
download | cpython-36860134a9eda8df5af5a38d6c7533437c594c2f.zip cpython-36860134a9eda8df5af5a38d6c7533437c594c2f.tar.gz cpython-36860134a9eda8df5af5a38d6c7533437c594c2f.tar.bz2 |
gh-103285: Rewrite _splitlines_no_ff to improve performance (#103307)
Diffstat (limited to 'Lib/ast.py')
-rw-r--r-- | Lib/ast.py | 26 |
1 files changed, 8 insertions, 18 deletions
@@ -25,6 +25,7 @@ :license: Python License. """ import sys +import re from _ast import * from contextlib import contextmanager, nullcontext from enum import IntEnum, auto, _simple_enum @@ -305,28 +306,17 @@ def get_docstring(node, clean=True): return text -def _splitlines_no_ff(source): +_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") +def _splitlines_no_ff(source, maxlines=None): """Split a string into lines ignoring form feed and other chars. This mimics how the Python parser splits source code. """ - idx = 0 lines = [] - next_line = '' - while idx < len(source): - c = source[idx] - next_line += c - idx += 1 - # Keep \r\n together - if c == '\r' and idx < len(source) and source[idx] == '\n': - next_line += '\n' - idx += 1 - if c in '\r\n': - lines.append(next_line) - next_line = '' - - if next_line: - lines.append(next_line) + for lineno, match in enumerate(_line_pattern.finditer(source), 1): + if maxlines is not None and lineno > maxlines: + break + lines.append(match[0]) return lines @@ -360,7 +350,7 @@ def get_source_segment(source, node, *, padded=False): except AttributeError: return None - lines = _splitlines_no_ff(source) + lines = _splitlines_no_ff(source, maxlines=end_lineno+1) if end_lineno == lineno: return lines[lineno].encode()[col_offset:end_col_offset].decode() |