diff options
-rw-r--r-- | .github/dependabot.yml | 2 | ||||
-rw-r--r-- | .github/workflows/mypy.yml | 14 | ||||
-rw-r--r-- | Tools/cases_generator/flags.py | 17 | ||||
-rw-r--r-- | Tools/cases_generator/formatting.py | 11 | ||||
-rw-r--r-- | Tools/cases_generator/generate_cases.py | 97 | ||||
-rw-r--r-- | Tools/cases_generator/lexer.py | 329 | ||||
-rw-r--r-- | Tools/cases_generator/mypy.ini | 14 | ||||
-rw-r--r-- | Tools/cases_generator/parsing.py | 4 | ||||
-rw-r--r-- | Tools/cases_generator/stacking.py | 16 | ||||
-rw-r--r-- | Tools/clinic/requirements-dev.txt | 2 | ||||
-rw-r--r-- | Tools/requirements-dev.txt | 3 |
11 files changed, 314 insertions, 195 deletions
diff --git a/.github/dependabot.yml b/.github/dependabot.yml index f026b0f..c8a3165 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -13,7 +13,7 @@ updates: - "version-update:semver-minor" - "version-update:semver-patch" - package-ecosystem: "pip" - directory: "/Tools/clinic/" + directory: "/Tools/" schedule: interval: "monthly" labels: diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 1315bb5..a83a90c 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -8,6 +8,7 @@ on: pull_request: paths: - "Tools/clinic/**" + - "Tools/cases_generator/**" - ".github/workflows/mypy.yml" workflow_dispatch: @@ -25,15 +26,18 @@ concurrency: jobs: mypy: - name: Run mypy on Tools/clinic/ + strategy: + matrix: + target: ["Tools/cases_generator", "Tools/clinic"] + name: Run mypy on ${{ matrix.target }} runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: "3.x" + python-version: "3.11" cache: pip - cache-dependency-path: Tools/clinic/requirements-dev.txt - - run: pip install -r Tools/clinic/requirements-dev.txt - - run: mypy --config-file Tools/clinic/mypy.ini + cache-dependency-path: Tools/requirements-dev.txt + - run: pip install -r Tools/requirements-dev.txt + - run: mypy --config-file ${{ matrix.target }}/mypy.ini diff --git a/Tools/cases_generator/flags.py b/Tools/cases_generator/flags.py index 962f003..536f093 100644 --- a/Tools/cases_generator/flags.py +++ b/Tools/cases_generator/flags.py @@ -16,12 +16,11 @@ class InstructionFlags: HAS_FREE_FLAG: bool HAS_LOCAL_FLAG: bool - def __post_init__(self): + def __post_init__(self) -> None: self.bitmask = {name: (1 << i) for i, name in enumerate(self.names())} @staticmethod - def fromInstruction(instr: parsing.Node): - + def fromInstruction(instr: parsing.Node) -> "InstructionFlags": has_free = ( variable_used(instr, "PyCell_New") or variable_used(instr, "PyCell_GET") @@ -41,7 +40,7 @@ class InstructionFlags: ) @staticmethod - def newEmpty(): + def newEmpty() -> "InstructionFlags": return InstructionFlags(False, False, False, False, False, False) def add(self, other: "InstructionFlags") -> None: @@ -49,7 +48,7 @@ class InstructionFlags: if value: setattr(self, name, value) - def names(self, value=None) -> list[str]: + def names(self, value: bool | None = None) -> list[str]: if value is None: return list(dataclasses.asdict(self).keys()) return [n for n, v in dataclasses.asdict(self).items() if v == value] @@ -62,7 +61,7 @@ class InstructionFlags: return flags @classmethod - def emit_macros(cls, out: Formatter): + def emit_macros(cls, out: Formatter) -> None: flags = cls.newEmpty() for name, value in flags.bitmask.items(): out.emit(f"#define {name} ({value})") @@ -90,9 +89,9 @@ def variable_used_unspecialized(node: parsing.Node, name: str) -> bool: text = "".join(token.text.split()) # TODO: Handle nested #if if text == "#if": - if ( - i + 1 < len(node.tokens) - and node.tokens[i + 1].text in ("ENABLE_SPECIALIZATION", "TIER_ONE") + if i + 1 < len(node.tokens) and node.tokens[i + 1].text in ( + "ENABLE_SPECIALIZATION", + "TIER_ONE", ): skipping = True elif text in ("#else", "#endif"): diff --git a/Tools/cases_generator/formatting.py b/Tools/cases_generator/formatting.py index 5894751..4fd9172 100644 --- a/Tools/cases_generator/formatting.py +++ b/Tools/cases_generator/formatting.py @@ -1,6 +1,7 @@ import contextlib import re import typing +from collections.abc import Iterator from parsing import StackEffect, Family @@ -58,13 +59,13 @@ class Formatter: self.set_lineno(self.lineno + 1, self.filename) @contextlib.contextmanager - def indent(self): + def indent(self) -> Iterator[None]: self.prefix += " " yield self.prefix = self.prefix[:-4] @contextlib.contextmanager - def block(self, head: str, tail: str = ""): + def block(self, head: str, tail: str = "") -> Iterator[None]: if head: self.emit(head + " {") else: @@ -77,7 +78,7 @@ class Formatter: self, input_effects: list[StackEffect], output_effects: list[StackEffect], - ): + ) -> None: shrink, isym = list_effect_size(input_effects) grow, osym = list_effect_size(output_effects) diff = grow - shrink @@ -90,7 +91,7 @@ class Formatter: if osym and osym != isym: self.emit(f"STACK_GROW({osym});") - def declare(self, dst: StackEffect, src: StackEffect | None): + def declare(self, dst: StackEffect, src: StackEffect | None) -> None: if dst.name == UNUSED or dst.cond == "0": return typ = f"{dst.type}" if dst.type else "PyObject *" @@ -107,7 +108,7 @@ class Formatter: sepa = "" if typ.endswith("*") else " " self.emit(f"{typ}{sepa}{dst.name}{init};") - def assign(self, dst: StackEffect, src: StackEffect): + def assign(self, dst: StackEffect, src: StackEffect) -> None: if src.name == UNUSED or dst.name == UNUSED: return cast = self.cast(dst, src) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index f31b665..de31129 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -10,6 +10,7 @@ import os import posixpath import sys import typing +from collections.abc import Iterator import stacking # Early import to avoid circular import from analysis import Analyzer @@ -23,7 +24,6 @@ from instructions import ( MacroInstruction, MacroParts, PseudoInstruction, - StackEffect, OverriddenInstructionPlaceHolder, TIER_ONE, TIER_TWO, @@ -80,12 +80,10 @@ SPECIALLY_HANDLED_ABSTRACT_INSTR = { "STORE_FAST", "STORE_FAST_MAYBE_NULL", "COPY", - # Arithmetic "_BINARY_OP_MULTIPLY_INT", "_BINARY_OP_ADD_INT", "_BINARY_OP_SUBTRACT_INT", - } arg_parser = argparse.ArgumentParser( @@ -144,6 +142,7 @@ arg_parser.add_argument( default=DEFAULT_ABSTRACT_INTERPRETER_OUTPUT, ) + class Generator(Analyzer): def get_stack_effect_info( self, thing: parsing.InstDef | parsing.Macro | parsing.Pseudo @@ -183,7 +182,8 @@ class Generator(Analyzer): assert target_instr target_popped = effect_str(target_instr.input_effects) target_pushed = effect_str(target_instr.output_effects) - if popped is None and pushed is None: + if pushed is None: + assert popped is None popped, pushed = target_popped, target_pushed else: assert popped == target_popped @@ -193,7 +193,7 @@ class Generator(Analyzer): return instr, popped, pushed @contextlib.contextmanager - def metadata_item(self, signature, open, close): + def metadata_item(self, signature: str, open: str, close: str) -> Iterator[None]: self.out.emit("") self.out.emit(f"extern {signature};") self.out.emit("#ifdef NEED_OPCODE_METADATA") @@ -216,9 +216,10 @@ class Generator(Analyzer): def write_function( direction: str, data: list[tuple[AnyInstruction, str]] ) -> None: - with self.metadata_item( - f"int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump)", "", "" + f"int _PyOpcode_num_{direction}(int opcode, int oparg, bool jump)", + "", + "", ): with self.out.block("switch(opcode)"): for instr, effect in data: @@ -243,23 +244,24 @@ class Generator(Analyzer): paths = f"\n{self.out.comment} ".join(filenames) return f"{self.out.comment} from:\n{self.out.comment} {paths}\n" - def write_provenance_header(self): + def write_provenance_header(self) -> None: self.out.write_raw(f"{self.out.comment} This file is generated by {THIS}\n") self.out.write_raw(self.from_source_files()) self.out.write_raw(f"{self.out.comment} Do not edit!\n") - def assign_opcode_ids(self): + def assign_opcode_ids(self) -> None: """Assign IDs to opcodes""" - ops: list[(bool, str)] = [] # (has_arg, name) for each opcode + ops: list[tuple[bool, str]] = [] # (has_arg, name) for each opcode instrumented_ops: list[str] = [] for instr in itertools.chain( [instr for instr in self.instrs.values() if instr.kind != "op"], - self.macro_instrs.values()): - + self.macro_instrs.values(), + ): + assert isinstance(instr, (Instruction, MacroInstruction, PseudoInstruction)) name = instr.name - if name.startswith('INSTRUMENTED_'): + if name.startswith("INSTRUMENTED_"): instrumented_ops.append(name) else: ops.append((instr.instr_flags.HAS_ARG_FLAG, name)) @@ -268,33 +270,32 @@ class Generator(Analyzer): # rather than bytecodes.c, so we need to add it explicitly # here (at least until we add something to bytecodes.c to # declare external instructions). - instrumented_ops.append('INSTRUMENTED_LINE') + instrumented_ops.append("INSTRUMENTED_LINE") # assert lists are unique assert len(set(ops)) == len(ops) assert len(set(instrumented_ops)) == len(instrumented_ops) - opname: list[str or None] = [None] * 512 - opmap: dict = {} - markers: dict = {} + opname: list[str | None] = [None] * 512 + opmap: dict[str, int] = {} + markers: dict[str, int] = {} - def map_op(op, name): + def map_op(op: int, name: str) -> None: assert op < len(opname) assert opname[op] is None assert name not in opmap opname[op] = name opmap[name] = op - # 0 is reserved for cache entries. This helps debugging. - map_op(0, 'CACHE') + map_op(0, "CACHE") # 17 is reserved as it is the initial value for the specializing counter. # This helps catch cases where we attempt to execute a cache. - map_op(17, 'RESERVED') + map_op(17, "RESERVED") # 166 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py - map_op(166, 'RESUME') + map_op(166, "RESUME") next_opcode = 1 @@ -306,13 +307,13 @@ class Generator(Analyzer): assert next_opcode < 255 map_op(next_opcode, name) - if has_arg and 'HAVE_ARGUMENT' not in markers: - markers['HAVE_ARGUMENT'] = next_opcode + if has_arg and "HAVE_ARGUMENT" not in markers: + markers["HAVE_ARGUMENT"] = next_opcode # Instrumented opcodes are at the end of the valid range min_instrumented = 254 - (len(instrumented_ops) - 1) assert next_opcode <= min_instrumented - markers['MIN_INSTRUMENTED_OPCODE'] = min_instrumented + markers["MIN_INSTRUMENTED_OPCODE"] = min_instrumented for i, op in enumerate(instrumented_ops): map_op(min_instrumented + i, op) @@ -320,11 +321,13 @@ class Generator(Analyzer): for i, op in enumerate(sorted(self.pseudos)): map_op(256 + i, op) - assert 255 not in opmap # 255 is reserved + assert 255 not in opmap.values() # 255 is reserved self.opmap = opmap self.markers = markers - def write_opcode_ids(self, opcode_ids_h_filename, opcode_targets_filename): + def write_opcode_ids( + self, opcode_ids_h_filename: str, opcode_targets_filename: str + ) -> None: """Write header file that defined the opcode IDs""" with open(opcode_ids_h_filename, "w") as f: @@ -337,15 +340,15 @@ class Generator(Analyzer): self.out.emit("#ifndef Py_OPCODE_IDS_H") self.out.emit("#define Py_OPCODE_IDS_H") self.out.emit("#ifdef __cplusplus") - self.out.emit("extern \"C\" {") + self.out.emit('extern "C" {') self.out.emit("#endif") self.out.emit("") self.out.emit("/* Instruction opcodes for compiled code */") - def define(name, opcode): + def define(name: str, opcode: int) -> None: self.out.emit(f"#define {name:<38} {opcode:>3}") - all_pairs = [] + all_pairs: list[tuple[int, int, str]] = [] # the second item in the tuple sorts the markers before the ops all_pairs.extend((i, 1, name) for (name, i) in self.markers.items()) all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items()) @@ -370,7 +373,6 @@ class Generator(Analyzer): targets[op] = f"TARGET_{name}" f.write(",\n".join([f" &&{s}" for s in targets])) - def write_metadata(self, metadata_filename: str, pymetadata_filename: str) -> None: """Write instruction metadata to output file.""" @@ -469,7 +471,7 @@ class Generator(Analyzer): "const struct opcode_metadata " "_PyOpcode_opcode_metadata[OPCODE_METADATA_SIZE]", "=", - ";" + ";", ): # Write metadata for each instruction for thing in self.everything: @@ -482,7 +484,9 @@ class Generator(Analyzer): case parsing.Macro(): self.write_metadata_for_macro(self.macro_instrs[thing.name]) case parsing.Pseudo(): - self.write_metadata_for_pseudo(self.pseudo_instrs[thing.name]) + self.write_metadata_for_pseudo( + self.pseudo_instrs[thing.name] + ) case _: typing.assert_never(thing) @@ -490,7 +494,7 @@ class Generator(Analyzer): "const struct opcode_macro_expansion " "_PyOpcode_macro_expansion[OPCODE_MACRO_EXPANSION_SIZE]", "=", - ";" + ";", ): # Write macro expansion for each non-pseudo instruction for thing in self.everything: @@ -529,7 +533,9 @@ class Generator(Analyzer): self.write_uop_items(lambda name, counter: f'[{name}] = "{name}",') with self.metadata_item( - f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", "=", ";" + f"const char *const _PyOpcode_OpName[{1 + max(self.opmap.values())}]", + "=", + ";", ): for name in self.opmap: self.out.emit(f'[{name}] = "{name}",') @@ -542,11 +548,9 @@ class Generator(Analyzer): for m in family.members: deoptcodes[m] = name # special case: - deoptcodes['BINARY_OP_INPLACE_ADD_UNICODE'] = 'BINARY_OP' + deoptcodes["BINARY_OP_INPLACE_ADD_UNICODE"] = "BINARY_OP" - with self.metadata_item( - f"const uint8_t _PyOpcode_Deopt[256]", "=", ";" - ): + with self.metadata_item(f"const uint8_t _PyOpcode_Deopt[256]", "=", ";"): for opt, deopt in sorted(deoptcodes.items()): self.out.emit(f"[{opt}] = {deopt},") @@ -604,10 +608,9 @@ class Generator(Analyzer): if name not in specialized_ops: self.out.emit(f"'{name}': {op},") - for name in ['MIN_INSTRUMENTED_OPCODE', 'HAVE_ARGUMENT']: + for name in ["MIN_INSTRUMENTED_OPCODE", "HAVE_ARGUMENT"]: self.out.emit(f"{name} = {self.markers[name]}") - def write_pseudo_instrs(self) -> None: """Write the IS_PSEUDO_INSTR macro""" self.out.emit("\n\n#define IS_PSEUDO_INSTR(OP) ( \\") @@ -834,7 +837,10 @@ class Generator(Analyzer): pass case parsing.InstDef(): instr = AbstractInstruction(self.instrs[thing.name].inst) - if instr.is_viable_uop() and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR: + if ( + instr.is_viable_uop() + and instr.name not in SPECIALLY_HANDLED_ABSTRACT_INSTR + ): self.out.emit("") with self.out.block(f"case {thing.name}:"): instr.write(self.out, tier=TIER_TWO) @@ -878,7 +884,7 @@ class Generator(Analyzer): self.out.emit(f"DISPATCH();") -def main(): +def main() -> None: """Parse command line, parse input, analyze, write output.""" args = arg_parser.parse_args() # Prints message and sys.exit(2) on error if len(args.input) == 0: @@ -899,8 +905,9 @@ def main(): a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) - a.write_abstract_interpreter_instructions(args.abstract_interpreter_cases, - args.emit_line_directives) + a.write_abstract_interpreter_instructions( + args.abstract_interpreter_cases, args.emit_line_directives + ) if __name__ == "__main__": diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index fe9c05e..a60f6c1 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -4,132 +4,221 @@ import re from dataclasses import dataclass +from collections.abc import Iterator -def choice(*opts): + +def choice(*opts: str) -> str: return "|".join("(%s)" % opt for opt in opts) + # Regexes # Longer operators must go before shorter ones. -PLUSPLUS = r'\+\+' -MINUSMINUS = r'--' +PLUSPLUS = r"\+\+" +MINUSMINUS = r"--" # -> -ARROW = r'->' -ELLIPSIS = r'\.\.\.' +ARROW = r"->" +ELLIPSIS = r"\.\.\." # Assignment operators -TIMESEQUAL = r'\*=' -DIVEQUAL = r'/=' -MODEQUAL = r'%=' -PLUSEQUAL = r'\+=' -MINUSEQUAL = r'-=' -LSHIFTEQUAL = r'<<=' -RSHIFTEQUAL = r'>>=' -ANDEQUAL = r'&=' -OREQUAL = r'\|=' -XOREQUAL = r'\^=' +TIMESEQUAL = r"\*=" +DIVEQUAL = r"/=" +MODEQUAL = r"%=" +PLUSEQUAL = r"\+=" +MINUSEQUAL = r"-=" +LSHIFTEQUAL = r"<<=" +RSHIFTEQUAL = r">>=" +ANDEQUAL = r"&=" +OREQUAL = r"\|=" +XOREQUAL = r"\^=" # Operators -PLUS = r'\+' -MINUS = r'-' -TIMES = r'\*' -DIVIDE = r'/' -MOD = r'%' -NOT = r'~' -XOR = r'\^' -LOR = r'\|\|' -LAND = r'&&' -LSHIFT = r'<<' -RSHIFT = r'>>' -LE = r'<=' -GE = r'>=' -EQ = r'==' -NE = r'!=' -LT = r'<' -GT = r'>' -LNOT = r'!' -OR = r'\|' -AND = r'&' -EQUALS = r'=' +PLUS = r"\+" +MINUS = r"-" +TIMES = r"\*" +DIVIDE = r"/" +MOD = r"%" +NOT = r"~" +XOR = r"\^" +LOR = r"\|\|" +LAND = r"&&" +LSHIFT = r"<<" +RSHIFT = r">>" +LE = r"<=" +GE = r">=" +EQ = r"==" +NE = r"!=" +LT = r"<" +GT = r">" +LNOT = r"!" +OR = r"\|" +AND = r"&" +EQUALS = r"=" # ? -CONDOP = r'\?' +CONDOP = r"\?" # Delimiters -LPAREN = r'\(' -RPAREN = r'\)' -LBRACKET = r'\[' -RBRACKET = r'\]' -LBRACE = r'\{' -RBRACE = r'\}' -COMMA = r',' -PERIOD = r'\.' -SEMI = r';' -COLON = r':' -BACKSLASH = r'\\' - -operators = { op: pattern for op, pattern in globals().items() if op == op.upper() } +LPAREN = r"\(" +RPAREN = r"\)" +LBRACKET = r"\[" +RBRACKET = r"\]" +LBRACE = r"\{" +RBRACE = r"\}" +COMMA = r"," +PERIOD = r"\." +SEMI = r";" +COLON = r":" +BACKSLASH = r"\\" + +operators = {op: pattern for op, pattern in globals().items() if op == op.upper()} for op in operators: globals()[op] = op -opmap = { pattern.replace("\\", "") or '\\' : op for op, pattern in operators.items() } +opmap = {pattern.replace("\\", "") or "\\": op for op, pattern in operators.items()} # Macros -macro = r'# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)' -MACRO = 'MACRO' +macro = r"# *(ifdef|ifndef|undef|define|error|endif|if|else|include|#)" +MACRO = "MACRO" -id_re = r'[a-zA-Z_][0-9a-zA-Z_]*' -IDENTIFIER = 'IDENTIFIER' +id_re = r"[a-zA-Z_][0-9a-zA-Z_]*" +IDENTIFIER = "IDENTIFIER" -suffix = r'([uU]?[lL]?[lL]?)' -octal = r'0[0-7]+' + suffix -hex = r'0[xX][0-9a-fA-F]+' -decimal_digits = r'(0|[1-9][0-9]*)' +suffix = r"([uU]?[lL]?[lL]?)" +octal = r"0[0-7]+" + suffix +hex = r"0[xX][0-9a-fA-F]+" +decimal_digits = r"(0|[1-9][0-9]*)" decimal = decimal_digits + suffix exponent = r"""([eE][-+]?[0-9]+)""" fraction = r"""([0-9]*\.[0-9]+)|([0-9]+\.)""" -float = '(((('+fraction+')'+exponent+'?)|([0-9]+'+exponent+'))[FfLl]?)' +float = "((((" + fraction + ")" + exponent + "?)|([0-9]+" + exponent + "))[FfLl]?)" number_re = choice(octal, hex, float, decimal) -NUMBER = 'NUMBER' +NUMBER = "NUMBER" simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])""" decimal_escape = r"""(\d+)""" hex_escape = r"""(x[0-9a-fA-F]+)""" -escape_sequence = r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))' -string_char = r"""([^"\\\n]|"""+escape_sequence+')' -str_re = '"'+string_char+'*"' -STRING = 'STRING' -char = r'\'.\'' # TODO: escape sequence -CHARACTER = 'CHARACTER' +escape_sequence = ( + r"""(\\(""" + simple_escape + "|" + decimal_escape + "|" + hex_escape + "))" +) +string_char = r"""([^"\\\n]|""" + escape_sequence + ")" +str_re = '"' + string_char + '*"' +STRING = "STRING" +char = r"\'.\'" # TODO: escape sequence +CHARACTER = "CHARACTER" -comment_re = r'//.*|/\*([^*]|\*[^/])*\*/' -COMMENT = 'COMMENT' +comment_re = r"//.*|/\*([^*]|\*[^/])*\*/" +COMMENT = "COMMENT" newline = r"\n" -invalid = r"\S" # A single non-space character that's not caught by any of the other patterns -matcher = re.compile(choice(id_re, number_re, str_re, char, newline, macro, comment_re, *operators.values(), invalid)) -letter = re.compile(r'[a-zA-Z_]') - -kwds = ( - 'AUTO', 'BREAK', 'CASE', 'CHAR', 'CONST', - 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE', 'ELSE', 'ENUM', 'EXTERN', - 'FLOAT', 'FOR', 'GOTO', 'IF', 'INLINE', 'INT', 'LONG', 'OVERRIDE', - 'REGISTER', 'OFFSETOF', - 'RESTRICT', 'RETURN', 'SHORT', 'SIGNED', 'SIZEOF', 'STATIC', 'STRUCT', - 'SWITCH', 'TYPEDEF', 'UNION', 'UNSIGNED', 'VOID', - 'VOLATILE', 'WHILE' +invalid = ( + r"\S" # A single non-space character that's not caught by any of the other patterns ) -for name in kwds: - globals()[name] = name -keywords = { name.lower() : name for name in kwds } +matcher = re.compile( + choice( + id_re, + number_re, + str_re, + char, + newline, + macro, + comment_re, + *operators.values(), + invalid, + ) +) +letter = re.compile(r"[a-zA-Z_]") + + +kwds = [] +AUTO = "AUTO" +kwds.append(AUTO) +BREAK = "BREAK" +kwds.append(BREAK) +CASE = "CASE" +kwds.append(CASE) +CHAR = "CHAR" +kwds.append(CHAR) +CONST = "CONST" +kwds.append(CONST) +CONTINUE = "CONTINUE" +kwds.append(CONTINUE) +DEFAULT = "DEFAULT" +kwds.append(DEFAULT) +DO = "DO" +kwds.append(DO) +DOUBLE = "DOUBLE" +kwds.append(DOUBLE) +ELSE = "ELSE" +kwds.append(ELSE) +ENUM = "ENUM" +kwds.append(ENUM) +EXTERN = "EXTERN" +kwds.append(EXTERN) +FLOAT = "FLOAT" +kwds.append(FLOAT) +FOR = "FOR" +kwds.append(FOR) +GOTO = "GOTO" +kwds.append(GOTO) +IF = "IF" +kwds.append(IF) +INLINE = "INLINE" +kwds.append(INLINE) +INT = "INT" +kwds.append(INT) +LONG = "LONG" +kwds.append(LONG) +OVERRIDE = "OVERRIDE" +kwds.append(OVERRIDE) +REGISTER = "REGISTER" +kwds.append(REGISTER) +OFFSETOF = "OFFSETOF" +kwds.append(OFFSETOF) +RESTRICT = "RESTRICT" +kwds.append(RESTRICT) +RETURN = "RETURN" +kwds.append(RETURN) +SHORT = "SHORT" +kwds.append(SHORT) +SIGNED = "SIGNED" +kwds.append(SIGNED) +SIZEOF = "SIZEOF" +kwds.append(SIZEOF) +STATIC = "STATIC" +kwds.append(STATIC) +STRUCT = "STRUCT" +kwds.append(STRUCT) +SWITCH = "SWITCH" +kwds.append(SWITCH) +TYPEDEF = "TYPEDEF" +kwds.append(TYPEDEF) +UNION = "UNION" +kwds.append(UNION) +UNSIGNED = "UNSIGNED" +kwds.append(UNSIGNED) +VOID = "VOID" +kwds.append(VOID) +VOLATILE = "VOLATILE" +kwds.append(VOLATILE) +WHILE = "WHILE" +kwds.append(WHILE) +keywords = {name.lower(): name for name in kwds} + +__all__ = [] +__all__.extend(kwds) def make_syntax_error( - message: str, filename: str, line: int, column: int, line_text: str, + message: str, + filename: str | None, + line: int, + column: int, + line_text: str, ) -> SyntaxError: return SyntaxError(message, (filename, line, column, line_text)) @@ -142,30 +231,30 @@ class Token: end: tuple[int, int] @property - def line(self): + def line(self) -> int: return self.begin[0] @property - def column(self): + def column(self) -> int: return self.begin[1] @property - def end_line(self): + def end_line(self) -> int: return self.end[0] @property - def end_column(self): + def end_column(self) -> int: return self.end[1] @property - def width(self): + def width(self) -> int: return self.end[1] - self.begin[1] - def replaceText(self, txt): + def replaceText(self, txt: str) -> "Token": assert isinstance(txt, str) return Token(self.kind, txt, self.begin, self.end) - def __repr__(self): + def __repr__(self) -> str: b0, b1 = self.begin e0, e1 = self.end if b0 == e0: @@ -174,7 +263,7 @@ class Token: return f"{self.kind}({self.text!r}, {b0}:{b1}, {e0}:{e1})" -def tokenize(src, line=1, filename=None): +def tokenize(src: str, line: int = 1, filename: str | None = None) -> Iterator[Token]: linestart = -1 for m in matcher.finditer(src): start, end = m.span() @@ -183,73 +272,75 @@ def tokenize(src, line=1, filename=None): kind = keywords[text] elif letter.match(text): kind = IDENTIFIER - elif text == '...': + elif text == "...": kind = ELLIPSIS - elif text == '.': + elif text == ".": kind = PERIOD - elif text[0] in '0123456789.': + elif text[0] in "0123456789.": kind = NUMBER elif text[0] == '"': kind = STRING elif text in opmap: kind = opmap[text] - elif text == '\n': + elif text == "\n": linestart = start line += 1 - kind = '\n' + kind = "\n" elif text[0] == "'": kind = CHARACTER - elif text[0] == '#': + elif text[0] == "#": kind = MACRO - elif text[0] == '/' and text[1] in '/*': + elif text[0] == "/" and text[1] in "/*": kind = COMMENT else: lineend = src.find("\n", start) if lineend == -1: lineend = len(src) - raise make_syntax_error(f"Bad token: {text}", - filename, line, start-linestart+1, src[linestart:lineend]) + raise make_syntax_error( + f"Bad token: {text}", + filename, + line, + start - linestart + 1, + src[linestart:lineend], + ) if kind == COMMENT: - begin = line, start-linestart - newlines = text.count('\n') + begin = line, start - linestart + newlines = text.count("\n") if newlines: - linestart = start + text.rfind('\n') + linestart = start + text.rfind("\n") line += newlines else: - begin = line, start-linestart + begin = line, start - linestart if kind != "\n": - yield Token(kind, text, begin, (line, start-linestart+len(text))) - - -__all__ = [] -__all__.extend([kind for kind in globals() if kind.upper() == kind]) + yield Token(kind, text, begin, (line, start - linestart + len(text))) def to_text(tkns: list[Token], dedent: int = 0) -> str: res: list[str] = [] - line, col = -1, 1+dedent + line, col = -1, 1 + dedent for tkn in tkns: if line == -1: line, _ = tkn.begin l, c = tkn.begin - #assert(l >= line), (line, txt, start, end) + # assert(l >= line), (line, txt, start, end) while l > line: line += 1 - res.append('\n') - col = 1+dedent - res.append(' '*(c-col)) + res.append("\n") + col = 1 + dedent + res.append(" " * (c - col)) text = tkn.text - if dedent != 0 and tkn.kind == 'COMMENT' and '\n' in text: + if dedent != 0 and tkn.kind == "COMMENT" and "\n" in text: if dedent < 0: - text = text.replace('\n', '\n' + ' '*-dedent) + text = text.replace("\n", "\n" + " " * -dedent) # TODO: dedent > 0 res.append(text) line, col = tkn.end - return ''.join(res) + return "".join(res) if __name__ == "__main__": import sys + filename = sys.argv[1] if filename == "-c": src = sys.argv[2] diff --git a/Tools/cases_generator/mypy.ini b/Tools/cases_generator/mypy.ini new file mode 100644 index 0000000..7480841 --- /dev/null +++ b/Tools/cases_generator/mypy.ini @@ -0,0 +1,14 @@ +[mypy] +files = Tools/cases_generator/ +pretty = True + +python_version = 3.10 + +# Be strict: +strict = True +strict_concatenate = True +enable_error_code = ignore-without-code,redundant-expr,truthy-bool + +# Don't enable this one yet; +# it has a lot of false positives on `cases_generator` +warn_unreachable = False diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index cdd20d7..25de3a5 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -32,7 +32,7 @@ class Context(NamedTuple): end: int owner: PLexer - def __repr__(self): + def __repr__(self) -> str: return f"<{self.owner.filename}: {self.begin}-{self.end}>" @@ -75,7 +75,7 @@ class StackEffect(Node): size: str = "" # Optional `[size]` # Note: size cannot be combined with type or cond - def __repr__(self): + def __repr__(self) -> str: items = [self.name, self.type, self.cond, self.size] while items and items[-1] == "": del items[-1] diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 632298a..1e117f1 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -413,22 +413,22 @@ def write_components( return next_instr_is_set -def write_single_instr_for_abstract_interp( - instr: Instruction, out: Formatter -): +def write_single_instr_for_abstract_interp(instr: Instruction, out: Formatter) -> None: try: _write_components_for_abstract_interp( [Component(instr, instr.active_caches)], out, ) except AssertionError as err: - raise AssertionError(f"Error writing abstract instruction {instr.name}") from err + raise AssertionError( + f"Error writing abstract instruction {instr.name}" + ) from err def _write_components_for_abstract_interp( parts: list[Component], out: Formatter, -): +) -> None: managers = get_managers(parts) for mgr in managers: if mgr is managers[-1]: @@ -438,5 +438,7 @@ def _write_components_for_abstract_interp( # NULL out the output stack effects for poke in mgr.pokes: if not poke.effect.size and poke.effect.name not in mgr.instr.unmoved_names: - out.emit(f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)" - f"PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);") + out.emit( + f"PARTITIONNODE_OVERWRITE((_Py_PARTITIONNODE_t *)" + f"PARTITIONNODE_NULLROOT, PEEK(-({poke.offset.as_index()})), true);" + ) diff --git a/Tools/clinic/requirements-dev.txt b/Tools/clinic/requirements-dev.txt deleted file mode 100644 index e9529f3..0000000 --- a/Tools/clinic/requirements-dev.txt +++ /dev/null @@ -1,2 +0,0 @@ -# Requirements file for external linters and checks we run on Tools/clinic/ in CI -mypy==1.4.1 diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt new file mode 100644 index 0000000..111773f --- /dev/null +++ b/Tools/requirements-dev.txt @@ -0,0 +1,3 @@ +# Requirements file for external linters and checks we run on +# Tools/clinic and Tools/cases_generator/ in CI +mypy==1.5.1 |