From 88941d665fc6b345f38b9147a7321e40019964d5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 24 Aug 2023 12:10:51 -0700 Subject: gh-106581: Fix two bugs in the code generator's copy optimization (#108380) I was comparing the last preceding poke with the *last* peek, rather than the *first* peek. Unfortunately this bug obscured another bug: When the last preceding poke is UNUSED, the first peek disappears, leaving the variable unassigned. This is how I fixed it: - Rename CopyEffect to CopyItem. - Change CopyItem to contain StackItems instead of StackEffects. - Update those StackItems when adjusting the manager higher or lower. - Assert that those StackItems' offsets are equivalent. - Other clever things. --------- Co-authored-by: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> --- Python/generated_cases.c.h | 3 -- Tools/cases_generator/stacking.py | 79 ++++++++++++++++++++++++++++++--------- 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f6322df..1724d11 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3833,7 +3833,6 @@ DEOPT_IF(code->co_argcount != oparg + (self_or_null != NULL), CALL); } // _CHECK_STACK_SPACE - callable = stack_pointer[-2 - oparg]; { PyFunctionObject *func = (PyFunctionObject *)callable; PyCodeObject *code = (PyCodeObject *)func->func_code; @@ -3841,8 +3840,6 @@ } // _INIT_CALL_PY_EXACT_ARGS args = stack_pointer - oparg; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; { int argcount = oparg; if (self_or_null != NULL) { diff --git a/Tools/cases_generator/stacking.py b/Tools/cases_generator/stacking.py index 1e117f1..2d006ba 100644 --- a/Tools/cases_generator/stacking.py +++ b/Tools/cases_generator/stacking.py @@ -83,6 +83,27 @@ class StackOffset: terms = self.as_terms() return make_index(terms) + def equivalent_to(self, other: "StackOffset") -> bool: + if self.deep == other.deep and self.high == other.high: + return True + deep = list(self.deep) + for x in other.deep: + try: + deep.remove(x) + except ValueError: + return False + if deep: + return False + high = list(self.high) + for x in other.high: + try: + high.remove(x) + except ValueError: + return False + if high: + return False + return True + def make_index(terms: list[tuple[str, str]]) -> str: # Produce an index expression from the terms honoring PEP 8, @@ -131,9 +152,9 @@ class StackItem: @dataclasses.dataclass -class CopyEffect: - src: StackEffect - dst: StackEffect +class CopyItem: + src: StackItem + dst: StackItem class EffectManager: @@ -143,7 +164,7 @@ class EffectManager: active_caches: list[ActiveCacheEffect] peeks: list[StackItem] pokes: list[StackItem] - copies: list[CopyEffect] # See merge() + copies: list[CopyItem] # See merge() # Track offsets from stack pointer min_offset: StackOffset final_offset: StackOffset @@ -179,16 +200,18 @@ class EffectManager: while ( pred.pokes and self.peeks - and pred.pokes[-1].effect == self.peeks[-1].effect + and pred.pokes[-1].effect == self.peeks[0].effect ): - src = pred.pokes.pop(-1).effect - dst = self.peeks.pop(0).effect - pred.final_offset.deeper(src) - if dst.name != UNUSED: - destinations.add(dst.name) - if dst.name != src.name: - sources.add(src.name) - self.copies.append(CopyEffect(src, dst)) + src = pred.pokes.pop(-1) + dst = self.peeks.pop(0) + assert src.offset.equivalent_to(dst.offset), (src, dst) + pred.final_offset.deeper(src.effect) + if dst.effect.name != src.effect.name: + if dst.effect.name != UNUSED: + destinations.add(dst.effect.name) + if src.effect.name != UNUSED: + sources.add(src.effect.name) + self.copies.append(CopyItem(src, dst)) # TODO: Turn this into an error (pass an Analyzer instance?) assert sources & destinations == set(), ( pred.instr.name, @@ -202,11 +225,27 @@ class EffectManager: else: pred = None # Break + # Fix up patterns of copies through UNUSED, + # e.g. cp(a, UNUSED) + cp(UNUSED, b) -> cp(a, b). + if any(copy.src.effect.name == UNUSED for copy in self.copies): + pred = self.pred + while pred is not None: + for copy in self.copies: + if copy.src.effect.name == UNUSED: + for pred_copy in pred.copies: + if pred_copy.dst == copy.src: + copy.src = pred_copy.src + break + pred = pred.pred + def adjust_deeper(self, eff: StackEffect) -> None: for peek in self.peeks: peek.offset.deeper(eff) for poke in self.pokes: poke.offset.deeper(eff) + for copy in self.copies: + copy.src.offset.deeper(eff) + copy.dst.offset.deeper(eff) self.min_offset.deeper(eff) self.final_offset.deeper(eff) @@ -215,6 +254,9 @@ class EffectManager: peek.offset.higher(eff) for poke in self.pokes: poke.offset.higher(eff) + for copy in self.copies: + copy.src.offset.higher(eff) + copy.dst.offset.higher(eff) self.min_offset.higher(eff) self.final_offset.higher(eff) @@ -248,8 +290,8 @@ class EffectManager: vars[eff.name] = eff for copy in self.copies: - add(copy.src) - add(copy.dst) + add(copy.src.effect) + add(copy.dst.effect) for peek in self.peeks: add(peek.effect) for poke in self.pokes: @@ -365,8 +407,11 @@ def write_components( out.emit(f"// {mgr.instr.name}") for copy in mgr.copies: - if copy.src.name != copy.dst.name: - out.assign(copy.dst, copy.src) + copy_src_effect = copy.src.effect + if copy_src_effect.name != copy.dst.effect.name: + if copy_src_effect.name == UNUSED: + copy_src_effect = copy.src.as_stack_effect() + out.assign(copy.dst.effect, copy_src_effect) for peek in mgr.peeks: out.assign( peek.effect, -- cgit v0.12