From c7e5bbaee88a71dc6e633e3cd451ed1798436382 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Mon, 1 Aug 2022 11:02:56 -0700 Subject: GH-95150: Use position and exception tables for code hashing and equality (GH-95509) --- Lib/test/test_code.py | 21 +++++++++++++++++ Lib/test/test_compile.py | 27 +++++++++++++++++++--- Lib/test/test_syntax.py | 3 ++- .../2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst | 3 +++ Objects/codeobject.c | 19 ++++++++++++++- 5 files changed, 68 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index fd68f6d..2386cf6 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -428,6 +428,27 @@ class CodeTest(unittest.TestCase): self.assertIsNone(line) self.assertEqual(end_line, new_code.co_firstlineno + 1) + def test_code_equality(self): + def f(): + try: + a() + except: + b() + else: + c() + finally: + d() + code_a = f.__code__ + code_b = code_a.replace(co_linetable=b"") + code_c = code_a.replace(co_exceptiontable=b"") + code_d = code_b.replace(co_exceptiontable=b"") + self.assertNotEqual(code_a, code_b) + self.assertNotEqual(code_a, code_c) + self.assertNotEqual(code_a, code_d) + self.assertNotEqual(code_b, code_c) + self.assertNotEqual(code_b, code_d) + self.assertNotEqual(code_c, code_d) + def isinterned(s): return s is sys.intern(('_' + s + '_')[1:-1]) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index e619446..c64e4e5 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -615,7 +615,7 @@ if 1: exec(code, ns) f1 = ns['f1'] f2 = ns['f2'] - self.assertIs(f1.__code__, f2.__code__) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, const) self.assertEqual(repr(f1()), repr(const)) @@ -628,7 +628,7 @@ if 1: # Note: "lambda: ..." emits "LOAD_CONST Ellipsis", # whereas "lambda: Ellipsis" emits "LOAD_GLOBAL Ellipsis" f1, f2 = lambda: ..., lambda: ... - self.assertIs(f1.__code__, f2.__code__) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, Ellipsis) self.assertEqual(repr(f1()), repr(Ellipsis)) @@ -643,7 +643,7 @@ if 1: # {0} is converted to a constant frozenset({0}) by the peephole # optimizer f1, f2 = lambda x: x in {0}, lambda x: x in {0} - self.assertIs(f1.__code__, f2.__code__) + self.assertIs(f1.__code__.co_consts, f2.__code__.co_consts) self.check_constant(f1, frozenset({0})) self.assertTrue(f1(0)) @@ -1302,6 +1302,27 @@ f( self.assertIsNotNone(end_column) self.assertLessEqual((line, column), (end_line, end_column)) + @support.cpython_only + def test_column_offset_deduplication(self): + # GH-95150: Code with different column offsets shouldn't be merged! + for source in [ + "lambda: a", + "(a for b in c)", + "[a for b in c]", + "{a for b in c}", + "{a: b for c in d}", + ]: + with self.subTest(source): + code = compile(f"{source}, {source}", "", "eval") + self.assertEqual(len(code.co_consts), 2) + self.assertIsInstance(code.co_consts[0], types.CodeType) + self.assertIsInstance(code.co_consts[1], types.CodeType) + self.assertNotEqual(code.co_consts[0], code.co_consts[1]) + self.assertNotEqual( + list(code.co_consts[0].co_positions()), + list(code.co_consts[1].co_positions()), + ) + class TestExpressionStackSize(unittest.TestCase): # These tests check that the computed stack size for a code object diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index b22a96b..ae10669 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -2012,7 +2012,8 @@ def fib(n): a, b = 0, 1 """ try: - self.assertEqual(compile(s1, '', 'exec'), compile(s2, '', 'exec')) + compile(s1, '', 'exec') + compile(s2, '', 'exec') except SyntaxError: self.fail("Indented statement over multiple lines is valid") diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst new file mode 100644 index 0000000..c3db471 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-13-23-12.gh-issue-95150.67FXVo.rst @@ -0,0 +1,3 @@ +Update code object hashing and equality to consider all debugging and +exception handling tables. This fixes an issue where certain non-identical +code objects could be "deduplicated" during compilation. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2f757c4..7ebbfdb 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1695,6 +1695,15 @@ code_richcompare(PyObject *self, PyObject *other, int op) eq = PyObject_RichCompareBool(co->co_localsplusnames, cp->co_localsplusnames, Py_EQ); if (eq <= 0) goto unequal; + eq = PyObject_RichCompareBool(co->co_linetable, cp->co_linetable, Py_EQ); + if (eq <= 0) { + goto unequal; + } + eq = PyObject_RichCompareBool(co->co_exceptiontable, + cp->co_exceptiontable, Py_EQ); + if (eq <= 0) { + goto unequal; + } if (op == Py_EQ) res = Py_True; @@ -1727,7 +1736,15 @@ code_hash(PyCodeObject *co) if (h2 == -1) return -1; h3 = PyObject_Hash(co->co_localsplusnames); if (h3 == -1) return -1; - h = h0 ^ h1 ^ h2 ^ h3 ^ + Py_hash_t h4 = PyObject_Hash(co->co_linetable); + if (h4 == -1) { + return -1; + } + Py_hash_t h5 = PyObject_Hash(co->co_exceptiontable); + if (h5 == -1) { + return -1; + } + h = h0 ^ h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ co->co_argcount ^ co->co_posonlyargcount ^ co->co_kwonlyargcount ^ co->co_flags; if (h == -1) h = -2; -- cgit v0.12