From ad4f909e0e7890e027c4ae7fea74586667242ad3 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 15 Feb 2024 08:37:54 -0500 Subject: gh-115432: Add critical section variant that handles a NULL object (#115433) This adds `Py_XBEGIN_CRITICAL_SECTION` and `Py_XEND_CRITICAL_SECTION`, which accept a possibly NULL object as an argument. If the argument is NULL, then nothing is locked or unlocked. Otherwise, they behave like `Py_BEGIN/END_CRITICAL_SECTION`. --- Include/internal/pycore_critical_section.h | 29 ++++++++++++++++++++++ Modules/_testinternalcapi/test_critical_sections.c | 9 +++++++ 2 files changed, 38 insertions(+) diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 38ed8cd..30820a2 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -96,6 +96,15 @@ extern "C" { _PyCriticalSection_End(&_cs); \ } +# define Py_XBEGIN_CRITICAL_SECTION(op) \ + { \ + _PyCriticalSection _cs_opt = {0}; \ + _PyCriticalSection_XBegin(&_cs_opt, _PyObject_CAST(op)) + +# define Py_XEND_CRITICAL_SECTION() \ + _PyCriticalSection_XEnd(&_cs_opt); \ + } + # define Py_BEGIN_CRITICAL_SECTION2(a, b) \ { \ _PyCriticalSection2 _cs2; \ @@ -131,6 +140,8 @@ extern "C" { // The critical section APIs are no-ops with the GIL. # define Py_BEGIN_CRITICAL_SECTION(op) # define Py_END_CRITICAL_SECTION() +# define Py_XBEGIN_CRITICAL_SECTION(op) +# define Py_XEND_CRITICAL_SECTION() # define Py_BEGIN_CRITICAL_SECTION2(a, b) # define Py_END_CRITICAL_SECTION2() # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) @@ -187,6 +198,16 @@ _PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) } } +static inline void +_PyCriticalSection_XBegin(_PyCriticalSection *c, PyObject *op) +{ +#ifdef Py_GIL_DISABLED + if (op != NULL) { + _PyCriticalSection_Begin(c, &_PyObject_CAST(op)->ob_mutex); + } +#endif +} + // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. @@ -210,6 +231,14 @@ _PyCriticalSection_End(_PyCriticalSection *c) } static inline void +_PyCriticalSection_XEnd(_PyCriticalSection *c) +{ + if (c->mutex) { + _PyCriticalSection_End(c); + } +} + +static inline void _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { diff --git a/Modules/_testinternalcapi/test_critical_sections.c b/Modules/_testinternalcapi/test_critical_sections.c index 1f7e311..94da046 100644 --- a/Modules/_testinternalcapi/test_critical_sections.c +++ b/Modules/_testinternalcapi/test_critical_sections.c @@ -49,6 +49,15 @@ test_critical_sections(PyObject *self, PyObject *Py_UNUSED(args)) Py_END_CRITICAL_SECTION2(); assert_nogil(!PyMutex_IsLocked(&d2->ob_mutex)); + // Optional variant behaves the same if the object is non-NULL + Py_XBEGIN_CRITICAL_SECTION(d1); + assert_nogil(PyMutex_IsLocked(&d1->ob_mutex)); + Py_XEND_CRITICAL_SECTION(); + + // No-op + Py_XBEGIN_CRITICAL_SECTION(NULL); + Py_XEND_CRITICAL_SECTION(); + Py_DECREF(d2); Py_DECREF(d1); Py_RETURN_NONE; -- cgit v0.12 From 732faf17a618d65d264ffe775fa6db4508e3d9ff Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:20:19 +0000 Subject: gh-115347: avoid emitting redundant NOP for the docstring with -OO (#115494) --- Lib/test/test_compile.py | 26 +++++++++++++++ .../2024-02-14-23-50-43.gh-issue-115347.VkHvQC.rst | 2 ++ Python/compile.c | 38 ++++++++++++---------- 3 files changed, 48 insertions(+), 18 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-43.gh-issue-115347.VkHvQC.rst diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index ebb479f..4d8647b 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1,4 +1,6 @@ +import contextlib import dis +import io import math import os import unittest @@ -812,6 +814,30 @@ class TestSpecifics(unittest.TestCase): 'RETURN_CONST', list(dis.get_instructions(unused_code_at_end))[-1].opname) + @support.cpython_only + def test_docstring_omitted(self): + # See gh-115347 + src = textwrap.dedent(""" + def f(): + "docstring1" + def h(): + "docstring2" + return 42 + + class C: + "docstring3" + pass + + return h + """) + for opt in [-1, 0, 1, 2]: + with self.subTest(opt=opt): + code = compile(src, "", "exec", optimize=opt) + output = io.StringIO() + with contextlib.redirect_stdout(output): + dis.dis(code) + self.assertNotIn('NOP' , output.getvalue()) + def test_dont_merge_constants(self): # Issue #25843: compile() must not merge constants which are equal # but have a different type. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-43.gh-issue-115347.VkHvQC.rst b/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-43.gh-issue-115347.VkHvQC.rst new file mode 100644 index 0000000..16f357a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-02-14-23-50-43.gh-issue-115347.VkHvQC.rst @@ -0,0 +1,2 @@ +Fix bug where docstring was replaced by a redundant NOP when Python is run +with ``-OO``. diff --git a/Python/compile.c b/Python/compile.c index 15e5cf3..f95e3cd 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1692,16 +1692,13 @@ compiler_unwind_fblock_stack(struct compiler *c, location *ploc, static int compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) { - int i = 0; - stmt_ty st; - PyObject *docstring; /* Set current line number to the line number of first statement. This way line number for SETUP_ANNOTATIONS will always coincide with the line number of first "real" statement in module. If body is empty, then lineno will be set later in optimize_and_assemble. */ if (c->u->u_scope_type == COMPILER_SCOPE_MODULE && asdl_seq_LEN(stmts)) { - st = (stmt_ty)asdl_seq_GET(stmts, 0); + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); loc = LOC(st); } /* Every annotated class and module should have __annotations__. */ @@ -1711,16 +1708,17 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) if (!asdl_seq_LEN(stmts)) { return SUCCESS; } - /* if not -OO mode, set docstring */ - if (c->c_optimize < 2) { - docstring = _PyAST_GetDocString(stmts); - if (docstring) { + Py_ssize_t first_instr = 0; + PyObject *docstring = _PyAST_GetDocString(stmts); + if (docstring) { + first_instr = 1; + /* if not -OO mode, set docstring */ + if (c->c_optimize < 2) { PyObject *cleandoc = _PyCompile_CleanDoc(docstring); if (cleandoc == NULL) { return ERROR; } - i = 1; - st = (stmt_ty)asdl_seq_GET(stmts, 0); + stmt_ty st = (stmt_ty)asdl_seq_GET(stmts, 0); assert(st->kind == Expr_kind); location loc = LOC(st->v.Expr.value); ADDOP_LOAD_CONST(c, loc, cleandoc); @@ -1728,7 +1726,7 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) RETURN_IF_ERROR(compiler_nameop(c, NO_LOCATION, &_Py_ID(__doc__), Store)); } } - for (; i < asdl_seq_LEN(stmts); i++) { + for (Py_ssize_t i = first_instr; i < asdl_seq_LEN(stmts); i++) { VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i)); } return SUCCESS; @@ -2239,7 +2237,6 @@ static int compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t funcflags, int firstlineno) { - PyObject *docstring = NULL; arguments_ty args; identifier name; asdl_stmt_seq *body; @@ -2266,28 +2263,33 @@ compiler_function_body(struct compiler *c, stmt_ty s, int is_async, Py_ssize_t f RETURN_IF_ERROR( compiler_enter_scope(c, name, scope_type, (void *)s, firstlineno)); - /* if not -OO mode, add docstring */ - if (c->c_optimize < 2) { - docstring = _PyAST_GetDocString(body); - if (docstring) { + Py_ssize_t first_instr = 0; + PyObject *docstring = _PyAST_GetDocString(body); + if (docstring) { + first_instr = 1; + /* if not -OO mode, add docstring */ + if (c->c_optimize < 2) { docstring = _PyCompile_CleanDoc(docstring); if (docstring == NULL) { compiler_exit_scope(c); return ERROR; } } + else { + docstring = NULL; + } } if (compiler_add_const(c->c_const_cache, c->u, docstring ? docstring : Py_None) < 0) { Py_XDECREF(docstring); compiler_exit_scope(c); return ERROR; } - Py_XDECREF(docstring); + Py_CLEAR(docstring); c->u->u_metadata.u_argcount = asdl_seq_LEN(args->args); c->u->u_metadata.u_posonlyargcount = asdl_seq_LEN(args->posonlyargs); c->u->u_metadata.u_kwonlyargcount = asdl_seq_LEN(args->kwonlyargs); - for (Py_ssize_t i = docstring ? 1 : 0; i < asdl_seq_LEN(body); i++) { + for (Py_ssize_t i = first_instr; i < asdl_seq_LEN(body); i++) { VISIT_IN_SCOPE(c, stmt, (stmt_ty)asdl_seq_GET(body, i)); } if (c->u->u_ste->ste_coroutine || c->u->u_ste->ste_generator) { -- cgit v0.12 From 94f1334e52fbc1550feba4f433b16589d55255b9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 15 Feb 2024 15:29:42 +0100 Subject: gh-115124: Use _PyObject_ASSERT() in gc.c (#115125) Replace assert() with _PyObject_ASSERT() in gc.c to dump the object when an assertion fails. --- Python/gc.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 4664676..c6831f4 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -394,16 +394,17 @@ update_refs(PyGC_Head *containers) while (gc != containers) { next = GC_NEXT(gc); + PyObject *op = FROM_GC(gc); /* Move any object that might have become immortal to the * permanent generation as the reference count is not accurately * reflecting the actual number of live references to this object */ - if (_Py_IsImmortal(FROM_GC(gc))) { + if (_Py_IsImmortal(op)) { gc_list_move(gc, &get_gc_state()->permanent_generation.head); gc = next; continue; } - gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); + gc_reset_refs(gc, Py_REFCNT(op)); /* Python's cyclic gc should never see an incoming refcount * of 0: if something decref'ed to 0, it should have been * deallocated immediately at that time. @@ -422,7 +423,7 @@ update_refs(PyGC_Head *containers) * so serious that maybe this should be a release-build * check instead of an assert? */ - _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); + _PyObject_ASSERT(op, gc_get_refs(gc) != 0); gc = next; } } @@ -488,7 +489,7 @@ visit_reachable(PyObject *op, void *arg) } // It would be a logic error elsewhere if the collecting flag were set on // an untracked object. - assert(gc->_gc_next != 0); + _PyObject_ASSERT(op, gc->_gc_next != 0); if (gc->_gc_next & NEXT_MASK_UNREACHABLE) { /* This had gc_refs = 0 when move_unreachable got @@ -660,7 +661,9 @@ static void move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) { PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); + _PyObject_ASSERT( + FROM_GC(unreachable), + (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); /* March over unreachable. Move objects with finalizers into * `finalizers`. @@ -683,10 +686,14 @@ static inline void clear_unreachable_mask(PyGC_Head *unreachable) { /* Check that the list head does not have the unreachable bit set */ - assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); + _PyObject_ASSERT( + FROM_GC(unreachable), + ((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); + _PyObject_ASSERT( + FROM_GC(unreachable), + (unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); PyGC_Head *gc, *next; - assert((unreachable->_gc_next & NEXT_MASK_UNREACHABLE) == 0); for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; @@ -840,7 +847,7 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) */ if (gc_is_collecting(AS_GC((PyObject *)wr))) { /* it should already have been cleared above */ - assert(wr->wr_object == Py_None); + _PyObject_ASSERT((PyObject*)wr, wr->wr_object == Py_None); continue; } @@ -851,9 +858,8 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) /* Move wr to wrcb_to_call, for the next pass. */ wrasgc = AS_GC((PyObject *)wr); - assert(wrasgc != next); /* wrasgc is reachable, but - next isn't, so they can't - be the same */ + // wrasgc is reachable, but next isn't, so they can't be the same + _PyObject_ASSERT((PyObject *)wr, wrasgc != next); gc_list_move(wrasgc, &wrcb_to_call); } } @@ -1773,13 +1779,14 @@ _Py_ScheduleGC(PyInterpreterState *interp) void _PyObject_GC_Link(PyObject *op) { - PyGC_Head *g = AS_GC(op); - assert(((uintptr_t)g & (sizeof(uintptr_t)-1)) == 0); // g must be correctly aligned + PyGC_Head *gc = AS_GC(op); + // gc must be correctly aligned + _PyObject_ASSERT(op, ((uintptr_t)gc & (sizeof(uintptr_t)-1)) == 0); PyThreadState *tstate = _PyThreadState_GET(); GCState *gcstate = &tstate->interp->gc; - g->_gc_next = 0; - g->_gc_prev = 0; + gc->_gc_next = 0; + gc->_gc_prev = 0; gcstate->generations[0].count++; /* number of allocated GC objects */ if (gcstate->generations[0].count > gcstate->generations[0].threshold && gcstate->enabled && -- cgit v0.12 From 3a9e67a9fdb4fad13bf42df6eb91126ab2ef45a1 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:32:21 +0000 Subject: gh-115376: fix segfault in _testinternalcapi.compiler_codegen on bad input (#115379) --- Lib/test/test_compiler_codegen.py | 7 +++- .../2024-02-12-22-35-01.gh-issue-115376.n9vubZ.rst | 1 + Python/compile.c | 42 ++++++++++++++-------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-02-12-22-35-01.gh-issue-115376.n9vubZ.rst diff --git a/Lib/test/test_compiler_codegen.py b/Lib/test/test_compiler_codegen.py index dbeadd9..166294a 100644 --- a/Lib/test/test_compiler_codegen.py +++ b/Lib/test/test_compiler_codegen.py @@ -8,7 +8,7 @@ class IsolatedCodeGenTests(CodegenTestCase): def codegen_test(self, snippet, expected_insts): import ast - a = ast.parse(snippet, "my_file.py", "exec"); + a = ast.parse(snippet, "my_file.py", "exec") insts = self.generate_code(a) self.assertInstructionsMatch(insts, expected_insts) @@ -54,3 +54,8 @@ class IsolatedCodeGenTests(CodegenTestCase): ('RETURN_VALUE', None), ] self.codegen_test(snippet, expected) + + def test_syntax_error__return_not_in_function(self): + snippet = "return 42" + with self.assertRaisesRegex(SyntaxError, "'return' outside function"): + self.codegen_test(snippet, None) diff --git a/Misc/NEWS.d/next/Tests/2024-02-12-22-35-01.gh-issue-115376.n9vubZ.rst b/Misc/NEWS.d/next/Tests/2024-02-12-22-35-01.gh-issue-115376.n9vubZ.rst new file mode 100644 index 0000000..e09d78a --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-02-12-22-35-01.gh-issue-115376.n9vubZ.rst @@ -0,0 +1 @@ +Fix segfault in ``_testinternalcapi.compiler_codegen`` on bad input. diff --git a/Python/compile.c b/Python/compile.c index f95e3cd..d857239 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1735,16 +1735,10 @@ compiler_body(struct compiler *c, location loc, asdl_stmt_seq *stmts) static int compiler_codegen(struct compiler *c, mod_ty mod) { - _Py_DECLARE_STR(anon_module, ""); - RETURN_IF_ERROR( - compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE, - mod, 1)); - location loc = LOCATION(1, 1, 0, 0); switch (mod->kind) { case Module_kind: if (compiler_body(c, loc, mod->v.Module.body) < 0) { - compiler_exit_scope(c); return ERROR; } break; @@ -1753,10 +1747,10 @@ compiler_codegen(struct compiler *c, mod_ty mod) ADDOP(c, loc, SETUP_ANNOTATIONS); } c->c_interactive = 1; - VISIT_SEQ_IN_SCOPE(c, stmt, mod->v.Interactive.body); + VISIT_SEQ(c, stmt, mod->v.Interactive.body); break; case Expression_kind: - VISIT_IN_SCOPE(c, expr, mod->v.Expression.body); + VISIT(c, expr, mod->v.Expression.body); break; default: PyErr_Format(PyExc_SystemError, @@ -1767,14 +1761,29 @@ compiler_codegen(struct compiler *c, mod_ty mod) return SUCCESS; } +static int +compiler_enter_anonymous_scope(struct compiler* c, mod_ty mod) +{ + _Py_DECLARE_STR(anon_module, ""); + RETURN_IF_ERROR( + compiler_enter_scope(c, &_Py_STR(anon_module), COMPILER_SCOPE_MODULE, + mod, 1)); + return SUCCESS; +} + static PyCodeObject * compiler_mod(struct compiler *c, mod_ty mod) { + PyCodeObject *co = NULL; int addNone = mod->kind != Expression_kind; - if (compiler_codegen(c, mod) < 0) { + if (compiler_enter_anonymous_scope(c, mod) < 0) { return NULL; } - PyCodeObject *co = optimize_and_assemble(c, addNone); + if (compiler_codegen(c, mod) < 0) { + goto finally; + } + co = optimize_and_assemble(c, addNone); +finally: compiler_exit_scope(c); return co; } @@ -7920,15 +7929,20 @@ _PyCompile_CodeGen(PyObject *ast, PyObject *filename, PyCompilerFlags *pflags, return NULL; } + metadata = PyDict_New(); + if (metadata == NULL) { + return NULL; + } + + if (compiler_enter_anonymous_scope(c, mod) < 0) { + return NULL; + } if (compiler_codegen(c, mod) < 0) { goto finally; } _PyCompile_CodeUnitMetadata *umd = &c->u->u_metadata; - metadata = PyDict_New(); - if (metadata == NULL) { - goto finally; - } + #define SET_MATADATA_ITEM(key, value) \ if (value != NULL) { \ if (PyDict_SetItemString(metadata, key, value) < 0) goto finally; \ -- cgit v0.12 From f42e112fd86edb5507a38a2eb850d0ebc6bc27a2 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:32:52 +0000 Subject: gh-115420: Fix translation of exception hander targets by _testinternalcapi.optimize_cfg. (#115425) --- Lib/test/test_peepholer.py | 16 ++++++++++++++++ .../Tests/2024-02-13-18-24-04.gh-issue-115420.-dlzfI.rst | 2 ++ Python/flowgraph.c | 2 +- 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-02-13-18-24-04.gh-issue-115420.-dlzfI.rst diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 2ea186c..dffedd0 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1065,6 +1065,22 @@ class DirectCfgOptimizerTests(CfgOptimizationTestCase): ] self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) + def test_except_handler_label(self): + insts = [ + ('SETUP_FINALLY', handler := self.Label(), 10), + ('POP_BLOCK', 0, -1), + ('RETURN_CONST', 1, 11), + handler, + ('RETURN_CONST', 2, 12), + ] + expected_insts = [ + ('SETUP_FINALLY', handler := self.Label(), 10), + ('RETURN_CONST', 1, 11), + handler, + ('RETURN_CONST', 2, 12), + ] + self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) + def test_no_unsafe_static_swap(self): # We can't change order of two stores to the same location insts = [ diff --git a/Misc/NEWS.d/next/Tests/2024-02-13-18-24-04.gh-issue-115420.-dlzfI.rst b/Misc/NEWS.d/next/Tests/2024-02-13-18-24-04.gh-issue-115420.-dlzfI.rst new file mode 100644 index 0000000..1442ada --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-02-13-18-24-04.gh-issue-115420.-dlzfI.rst @@ -0,0 +1,2 @@ +Fix translation of exception hander targets by +``_testinternalcapi.optimize_cfg``. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 1a648ed..4d9ba9e 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2729,7 +2729,7 @@ _PyCfg_ToInstructionSequence(cfg_builder *g, _PyCompile_InstructionSequence *seq RETURN_IF_ERROR(_PyCompile_InstructionSequence_UseLabel(seq, b->b_label.id)); for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; - if (OPCODE_HAS_JUMP(instr->i_opcode)) { + if (OPCODE_HAS_JUMP(instr->i_opcode) || is_block_push(instr)) { instr->i_oparg = instr->i_target->b_label.id; } RETURN_IF_ERROR( -- cgit v0.12 From 298bcdc185d1a9709271e61a4cc529d33483add4 Mon Sep 17 00:00:00 2001 From: monkeyman192 Date: Fri, 16 Feb 2024 01:40:20 +1100 Subject: gh-112433: Add optional _align_ attribute to ctypes.Structure (GH-113790) --- Doc/library/ctypes.rst | 10 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_unicodeobject_generated.h | 3 + Lib/test/test_ctypes/test_aligned_structures.py | 286 +++++++++++++++++++++ .../2024-01-28-02-46-12.gh-issue-112433.FUX-nT.rst | 1 + Modules/_ctypes/stgdict.c | 26 +- 8 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_ctypes/test_aligned_structures.py create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-28-02-46-12.gh-issue-112433.FUX-nT.rst diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index ef3a9a0..7377954 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -670,6 +670,10 @@ compiler does it. It is possible to override this behavior by specifying a :attr:`~Structure._pack_` class attribute in the subclass definition. This must be set to a positive integer and specifies the maximum alignment for the fields. This is what ``#pragma pack(n)`` also does in MSVC. +It is also possible to set a minimum alignment for how the subclass itself is packed in the +same way ``#pragma align(n)`` works in MSVC. +This can be achieved by specifying a ::attr:`~Structure._align_` class attribute +in the subclass definition. :mod:`ctypes` uses the native byte order for Structures and Unions. To build structures with non-native byte order, you can use one of the @@ -2534,6 +2538,12 @@ fields, or any other data types containing pointer type fields. Setting this attribute to 0 is the same as not setting it at all. + .. attribute:: _align_ + + An optional small integer that allows overriding the alignment of + the structure when being packed or unpacked to/from memory. + Setting this attribute to 0 is the same as not setting it at all. + .. attribute:: _anonymous_ An optional sequence that lists the names of unnamed (anonymous) fields. diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 1175521..3253b52 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -742,6 +742,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_abc_impl)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_abstract_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_active)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_align_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_annotation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_anonymous_)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_argtypes_)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 576ac70..8780f7e 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -231,6 +231,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(_abc_impl) STRUCT_FOR_ID(_abstract_) STRUCT_FOR_ID(_active) + STRUCT_FOR_ID(_align_) STRUCT_FOR_ID(_annotation) STRUCT_FOR_ID(_anonymous_) STRUCT_FOR_ID(_argtypes_) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index e682c97..a9d5148 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -740,6 +740,7 @@ extern "C" { INIT_ID(_abc_impl), \ INIT_ID(_abstract_), \ INIT_ID(_active), \ + INIT_ID(_align_), \ INIT_ID(_annotation), \ INIT_ID(_anonymous_), \ INIT_ID(_argtypes_), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 739af0e..f3b064e 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -534,6 +534,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(_active); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(_align_); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(_annotation); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/test/test_ctypes/test_aligned_structures.py b/Lib/test/test_ctypes/test_aligned_structures.py new file mode 100644 index 0000000..a208fb9 --- /dev/null +++ b/Lib/test/test_ctypes/test_aligned_structures.py @@ -0,0 +1,286 @@ +from ctypes import ( + c_char, c_uint32, c_uint16, c_ubyte, c_byte, alignment, sizeof, + BigEndianStructure, LittleEndianStructure, + BigEndianUnion, LittleEndianUnion, +) +import struct +import unittest + + +class TestAlignedStructures(unittest.TestCase): + def test_aligned_string(self): + for base, e in ( + (LittleEndianStructure, "<"), + (BigEndianStructure, ">"), + ): + data = bytearray(struct.pack(f"{e}i12x16s", 7, b"hello world!")) + class Aligned(base): + _align_ = 16 + _fields_ = [ + ('value', c_char * 12) + ] + + class Main(base): + _fields_ = [ + ('first', c_uint32), + ('string', Aligned), + ] + + main = Main.from_buffer(data) + self.assertEqual(main.first, 7) + self.assertEqual(main.string.value, b'hello world!') + self.assertEqual(bytes(main.string), b'hello world!\0\0\0\0') + self.assertEqual(Main.string.offset, 16) + self.assertEqual(Main.string.size, 16) + self.assertEqual(alignment(main.string), 16) + self.assertEqual(alignment(main), 16) + + def test_aligned_structures(self): + for base, data in ( + (LittleEndianStructure, bytearray(b"\1\0\0\0\1\0\0\0\7\0\0\0")), + (BigEndianStructure, bytearray(b"\1\0\0\0\1\0\0\0\7\0\0\0")), + ): + class SomeBools(base): + _align_ = 4 + _fields_ = [ + ("bool1", c_ubyte), + ("bool2", c_ubyte), + ] + class Main(base): + _fields_ = [ + ("x", c_ubyte), + ("y", SomeBools), + ("z", c_ubyte), + ] + + main = Main.from_buffer(data) + self.assertEqual(alignment(SomeBools), 4) + self.assertEqual(alignment(main), 4) + self.assertEqual(alignment(main.y), 4) + self.assertEqual(Main.x.size, 1) + self.assertEqual(Main.y.offset, 4) + self.assertEqual(Main.y.size, 4) + self.assertEqual(main.y.bool1, True) + self.assertEqual(main.y.bool2, False) + self.assertEqual(Main.z.offset, 8) + self.assertEqual(main.z, 7) + + def test_oversized_structure(self): + data = bytearray(b"\0" * 8) + for base in (LittleEndianStructure, BigEndianStructure): + class SomeBoolsTooBig(base): + _align_ = 8 + _fields_ = [ + ("bool1", c_ubyte), + ("bool2", c_ubyte), + ("bool3", c_ubyte), + ] + class Main(base): + _fields_ = [ + ("y", SomeBoolsTooBig), + ("z", c_uint32), + ] + with self.assertRaises(ValueError) as ctx: + Main.from_buffer(data) + self.assertEqual( + ctx.exception.args[0], + 'Buffer size too small (4 instead of at least 8 bytes)' + ) + + def test_aligned_subclasses(self): + for base, e in ( + (LittleEndianStructure, "<"), + (BigEndianStructure, ">"), + ): + data = bytearray(struct.pack(f"{e}4i", 1, 2, 3, 4)) + class UnalignedSub(base): + x: c_uint32 + _fields_ = [ + ("x", c_uint32), + ] + + class AlignedStruct(UnalignedSub): + _align_ = 8 + _fields_ = [ + ("y", c_uint32), + ] + + class Main(base): + _fields_ = [ + ("a", c_uint32), + ("b", AlignedStruct) + ] + + main = Main.from_buffer(data) + self.assertEqual(alignment(main.b), 8) + self.assertEqual(alignment(main), 8) + self.assertEqual(sizeof(main.b), 8) + self.assertEqual(sizeof(main), 16) + self.assertEqual(main.a, 1) + self.assertEqual(main.b.x, 3) + self.assertEqual(main.b.y, 4) + self.assertEqual(Main.b.offset, 8) + self.assertEqual(Main.b.size, 8) + + def test_aligned_union(self): + for sbase, ubase, e in ( + (LittleEndianStructure, LittleEndianUnion, "<"), + (BigEndianStructure, BigEndianUnion, ">"), + ): + data = bytearray(struct.pack(f"{e}4i", 1, 2, 3, 4)) + class AlignedUnion(ubase): + _align_ = 8 + _fields_ = [ + ("a", c_uint32), + ("b", c_ubyte * 7), + ] + + class Main(sbase): + _fields_ = [ + ("first", c_uint32), + ("union", AlignedUnion), + ] + + main = Main.from_buffer(data) + self.assertEqual(main.first, 1) + self.assertEqual(main.union.a, 3) + self.assertEqual(bytes(main.union.b), data[8:-1]) + self.assertEqual(Main.union.offset, 8) + self.assertEqual(Main.union.size, 8) + self.assertEqual(alignment(main.union), 8) + self.assertEqual(alignment(main), 8) + + def test_aligned_struct_in_union(self): + for sbase, ubase, e in ( + (LittleEndianStructure, LittleEndianUnion, "<"), + (BigEndianStructure, BigEndianUnion, ">"), + ): + data = bytearray(struct.pack(f"{e}4i", 1, 2, 3, 4)) + class Sub(sbase): + _align_ = 8 + _fields_ = [ + ("x", c_uint32), + ("y", c_uint32), + ] + + class MainUnion(ubase): + _fields_ = [ + ("a", c_uint32), + ("b", Sub), + ] + + class Main(sbase): + _fields_ = [ + ("first", c_uint32), + ("union", MainUnion), + ] + + main = Main.from_buffer(data) + self.assertEqual(Main.first.size, 4) + self.assertEqual(alignment(main.union), 8) + self.assertEqual(alignment(main), 8) + self.assertEqual(Main.union.offset, 8) + self.assertEqual(Main.union.size, 8) + self.assertEqual(main.first, 1) + self.assertEqual(main.union.a, 3) + self.assertEqual(main.union.b.x, 3) + self.assertEqual(main.union.b.y, 4) + + def test_smaller_aligned_subclassed_union(self): + for sbase, ubase, e in ( + (LittleEndianStructure, LittleEndianUnion, "<"), + (BigEndianStructure, BigEndianUnion, ">"), + ): + data = bytearray(struct.pack(f"{e}H2xI", 1, 0xD60102D7)) + class SubUnion(ubase): + _align_ = 2 + _fields_ = [ + ("unsigned", c_ubyte), + ("signed", c_byte), + ] + + class MainUnion(SubUnion): + _fields_ = [ + ("num", c_uint32) + ] + + class Main(sbase): + _fields_ = [ + ("first", c_uint16), + ("union", MainUnion), + ] + + main = Main.from_buffer(data) + self.assertEqual(main.union.num, 0xD60102D7) + self.assertEqual(main.union.unsigned, data[4]) + self.assertEqual(main.union.signed, data[4] - 256) + self.assertEqual(alignment(main), 4) + self.assertEqual(alignment(main.union), 4) + self.assertEqual(Main.union.offset, 4) + self.assertEqual(Main.union.size, 4) + self.assertEqual(Main.first.size, 2) + + def test_larger_aligned_subclassed_union(self): + for ubase, e in ( + (LittleEndianUnion, "<"), + (BigEndianUnion, ">"), + ): + data = bytearray(struct.pack(f"{e}I4x", 0xD60102D6)) + class SubUnion(ubase): + _align_ = 8 + _fields_ = [ + ("unsigned", c_ubyte), + ("signed", c_byte), + ] + + class Main(SubUnion): + _fields_ = [ + ("num", c_uint32) + ] + + main = Main.from_buffer(data) + self.assertEqual(alignment(main), 8) + self.assertEqual(sizeof(main), 8) + self.assertEqual(main.num, 0xD60102D6) + self.assertEqual(main.unsigned, 0xD6) + self.assertEqual(main.signed, -42) + + def test_aligned_packed_structures(self): + for sbase, e in ( + (LittleEndianStructure, "<"), + (BigEndianStructure, ">"), + ): + data = bytearray(struct.pack(f"{e}B2H4xB", 1, 2, 3, 4)) + + class Inner(sbase): + _align_ = 8 + _fields_ = [ + ("x", c_uint16), + ("y", c_uint16), + ] + + class Main(sbase): + _pack_ = 1 + _fields_ = [ + ("a", c_ubyte), + ("b", Inner), + ("c", c_ubyte), + ] + + main = Main.from_buffer(data) + self.assertEqual(sizeof(main), 10) + self.assertEqual(Main.b.offset, 1) + # Alignment == 8 because _pack_ wins out. + self.assertEqual(alignment(main.b), 8) + # Size is still 8 though since inside this Structure, it will have + # effect. + self.assertEqual(sizeof(main.b), 8) + self.assertEqual(Main.c.offset, 9) + self.assertEqual(main.a, 1) + self.assertEqual(main.b.x, 2) + self.assertEqual(main.b.y, 3) + self.assertEqual(main.c, 4) + + +if __name__ == '__main__': + unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-28-02-46-12.gh-issue-112433.FUX-nT.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-28-02-46-12.gh-issue-112433.FUX-nT.rst new file mode 100644 index 0000000..fdd11bd --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-28-02-46-12.gh-issue-112433.FUX-nT.rst @@ -0,0 +1 @@ +Add ability to force alignment of :mod:`ctypes.Structure` by way of the new ``_align_`` attribute on the class. diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index deafa69..32ee414 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -384,6 +384,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct int bitofs; PyObject *tmp; int pack; + int forced_alignment = 1; Py_ssize_t ffi_ofs; int big_endian; int arrays_seen = 0; @@ -424,6 +425,28 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct pack = 0; } + if (PyObject_GetOptionalAttr(type, &_Py_ID(_align_), &tmp) < 0) { + return -1; + } + if (tmp) { + forced_alignment = PyLong_AsInt(tmp); + Py_DECREF(tmp); + if (forced_alignment < 0) { + if (!PyErr_Occurred() || + PyErr_ExceptionMatches(PyExc_TypeError) || + PyErr_ExceptionMatches(PyExc_OverflowError)) + { + PyErr_SetString(PyExc_ValueError, + "_align_ must be a non-negative integer"); + } + return -1; + } + } + else { + /* Setting `_align_ = 0` amounts to using the default alignment */ + forced_alignment = 1; + } + len = PySequence_Size(fields); if (len == -1) { if (PyErr_ExceptionMatches(PyExc_TypeError)) { @@ -469,6 +492,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct align = basedict->align; union_size = 0; total_align = align ? align : 1; + total_align = max(total_align, forced_alignment); stgdict->ffi_type_pointer.type = FFI_TYPE_STRUCT; stgdict->ffi_type_pointer.elements = PyMem_New(ffi_type *, basedict->length + len + 1); if (stgdict->ffi_type_pointer.elements == NULL) { @@ -488,7 +512,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct size = 0; align = 0; union_size = 0; - total_align = 1; + total_align = forced_alignment; stgdict->ffi_type_pointer.type = FFI_TYPE_STRUCT; stgdict->ffi_type_pointer.elements = PyMem_New(ffi_type *, len + 1); if (stgdict->ffi_type_pointer.elements == NULL) { -- cgit v0.12 From cfb26401f60a428b3674eb5d9eecf315eb55acab Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 15 Feb 2024 17:32:33 +0200 Subject: gh-100734: What's New in 3.x: Add missing detail from 3.x branch (#114689) --- Doc/whatsnew/2.6.rst | 36 ++++++++++++++++++++ Doc/whatsnew/3.1.rst | 22 +++++++++++++ Doc/whatsnew/3.10.rst | 47 ++++++++++++++++++++++++++ Doc/whatsnew/3.12.rst | 2 ++ Doc/whatsnew/3.6.rst | 33 +++++++++++++++++++ Doc/whatsnew/3.7.rst | 44 +++++++++++++++++++++++++ Doc/whatsnew/3.8.rst | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++ Doc/whatsnew/3.9.rst | 52 +++++++++++++++++++++++++++++ 8 files changed, 327 insertions(+) diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index 05c21d3..e4ade5e 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -2992,6 +2992,33 @@ Changes to Python's build process and to the C API include: architectures (x86, PowerPC), 64-bit (x86-64 and PPC-64), or both. (Contributed by Ronald Oussoren.) +* A new function added in Python 2.6.6, :c:func:`!PySys_SetArgvEx`, sets + the value of ``sys.argv`` and can optionally update ``sys.path`` to + include the directory containing the script named by ``sys.argv[0]`` + depending on the value of an *updatepath* parameter. + + This function was added to close a security hole for applications + that embed Python. The old function, :c:func:`!PySys_SetArgv`, would + always update ``sys.path``, and sometimes it would add the current + directory. This meant that, if you ran an application embedding + Python in a directory controlled by someone else, attackers could + put a Trojan-horse module in the directory (say, a file named + :file:`os.py`) that your application would then import and run. + + If you maintain a C/C++ application that embeds Python, check + whether you're calling :c:func:`!PySys_SetArgv` and carefully consider + whether the application should be using :c:func:`!PySys_SetArgvEx` + with *updatepath* set to false. Note that using this function will + break compatibility with Python versions 2.6.5 and earlier; if you + have to continue working with earlier versions, you can leave + the call to :c:func:`!PySys_SetArgv` alone and call + ``PyRun_SimpleString("sys.path.pop(0)\n")`` afterwards to discard + the first ``sys.path`` component. + + Security issue reported as `CVE-2008-5983 + `_; + discussed in :gh:`50003`, and fixed by Antoine Pitrou. + * The BerkeleyDB module now has a C API object, available as ``bsddb.db.api``. This object can be used by other C extensions that wish to use the :mod:`bsddb` module for their own purposes. @@ -3294,6 +3321,15 @@ that may require changes to your code: scoping rules, also cause warnings because such comparisons are forbidden entirely in 3.0. +For applications that embed Python: + +* The :c:func:`!PySys_SetArgvEx` function was added in Python 2.6.6, + letting applications close a security hole when the existing + :c:func:`!PySys_SetArgv` function was used. Check whether you're + calling :c:func:`!PySys_SetArgv` and carefully consider whether the + application should be using :c:func:`!PySys_SetArgvEx` with + *updatepath* set to false. + .. ====================================================================== diff --git a/Doc/whatsnew/3.1.rst b/Doc/whatsnew/3.1.rst index c912a92..b7dd8f2 100644 --- a/Doc/whatsnew/3.1.rst +++ b/Doc/whatsnew/3.1.rst @@ -80,6 +80,28 @@ Support was also added for third-party tools like `PyYAML ` PEP written by Armin Ronacher and Raymond Hettinger. Implementation written by Raymond Hettinger. +Since an ordered dictionary remembers its insertion order, it can be used +in conjuction with sorting to make a sorted dictionary:: + + >>> # regular unsorted dictionary + >>> d = {'banana': 3, 'apple':4, 'pear': 1, 'orange': 2} + + >>> # dictionary sorted by key + >>> OrderedDict(sorted(d.items(), key=lambda t: t[0])) + OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)]) + + >>> # dictionary sorted by value + >>> OrderedDict(sorted(d.items(), key=lambda t: t[1])) + OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)]) + + >>> # dictionary sorted by length of the key string + >>> OrderedDict(sorted(d.items(), key=lambda t: len(t[0]))) + OrderedDict([('pear', 1), ('apple', 4), ('orange', 2), ('banana', 3)]) + +The new sorted dictionaries maintain their sort order when entries +are deleted. But when new keys are added, the keys are appended +to the end and the sort is not maintained. + PEP 378: Format Specifier for Thousands Separator ================================================= diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 7dc06e9..9770b12 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -1517,6 +1517,13 @@ functions internally. For more details, please see their respective documentation. (Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates :rfc:`3986`, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :gh:`88048`) + xml --- @@ -2315,3 +2322,43 @@ Removed * The ``PyThreadState.use_tracing`` member has been removed to optimize Python. (Contributed by Mark Shannon in :issue:`43760`.) + + +Notable security feature in 3.10.7 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. + +Notable security feature in 3.10.8 +================================== + +The deprecated :mod:`!mailcap` module now refuses to inject unsafe text +(filenames, MIME types, parameters) into shell commands. Instead of using such +text, it will warn and act as if a match was not found (or for test commands, +as if the test failed). +(Contributed by Petr Viktorin in :gh:`98966`.) + +Notable changes in 3.10.12 +========================== + +tarfile +------- + +* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, + have a new a *filter* argument that allows limiting tar features than may be + surprising or dangerous, such as creating files outside the destination + directory. + See :ref:`tarfile-extraction-filter` for details. + In Python 3.12, use without the *filter* argument will show a + :exc:`DeprecationWarning`. + In Python 3.14, the default will switch to ``'data'``. + (Contributed by Petr Viktorin in :pep:`706`.) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 100312a..b986e63 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1956,6 +1956,8 @@ Build Changes :file:`!configure`. (Contributed by Christian Heimes in :gh:`89886`.) +* Windows builds and macOS installers from python.org now use OpenSSL 3.0. + C API Changes ============= diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst index 11e1d73..d62beb0 100644 --- a/Doc/whatsnew/3.6.rst +++ b/Doc/whatsnew/3.6.rst @@ -1472,6 +1472,10 @@ Server and client-side specific TLS protocols for :class:`~ssl.SSLContext` were added. (Contributed by Christian Heimes in :issue:`28085`.) +Added :attr:`ssl.SSLContext.post_handshake_auth` to enable and +:meth:`ssl.SSLSocket.verify_client_post_handshake` to initiate TLS 1.3 +post-handshake authentication. +(Contributed by Christian Heimes in :gh:`78851`.) statistics ---------- @@ -2063,6 +2067,15 @@ connected to and thus what Python interpreter will be used by the virtual environment. (Contributed by Brett Cannon in :issue:`25154`.) +xml +--- + +* As mitigation against DTD and external entity retrieval, the + :mod:`xml.dom.minidom` and :mod:`xml.sax` modules no longer process + external entities by default. + (Contributed by Christian Heimes in :gh:`61441`.) + + Deprecated functions and types of the C API ------------------------------------------- @@ -2430,9 +2443,13 @@ The :func:`locale.localeconv` function now sets temporarily the ``LC_CTYPE`` locale to the ``LC_NUMERIC`` locale in some cases. (Contributed by Victor Stinner in :issue:`31900`.) + Notable changes in Python 3.6.7 =============================== +:mod:`xml.dom.minidom` and :mod:`xml.sax` modules no longer process +external entities by default. See also :gh:`61441`. + In 3.6.7 the :mod:`tokenize` module now implicitly emits a ``NEWLINE`` token when provided with input that does not have a trailing new line. This behavior now matches what the C tokenizer does internally. @@ -2460,3 +2477,19 @@ separator key, with ``&`` as the default. This change also affects functions internally. For more details, please see their respective documentation. (Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) + +Notable changes in Python 3.6.14 +================================ + +A security fix alters the :class:`ftplib.FTP` behavior to not trust the +IPv4 address sent from the remote server when setting up a passive data +channel. We reuse the ftp server IP address instead. For unusual code +requiring the old behavior, set a ``trust_server_pasv_ipv4_address`` +attribute on your FTP instance to ``True``. (See :gh:`87451`) + +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates RFC 3986, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser :func:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :gh:`88048`) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 402b15a..8122e0e 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -1380,6 +1380,10 @@ Supported protocols are indicated by several new flags, such as :data:`~ssl.HAS_TLSv1_1`. (Contributed by Christian Heimes in :issue:`32609`.) +Added :attr:`ssl.SSLContext.post_handshake_auth` to enable and +:meth:`ssl.SSLSocket.verify_client_post_handshake` to initiate TLS 1.3 +post-handshake authentication. +(Contributed by Christian Heimes in :gh:`78851`.) string ------ @@ -1599,6 +1603,15 @@ at the interactive prompt. See :ref:`whatsnew37-pep565` for details. (Contributed by Nick Coghlan in :issue:`31975`.) +xml +--- + +As mitigation against DTD and external entity retrieval, the +:mod:`xml.dom.minidom` and :mod:`xml.sax` modules no longer process +external entities by default. +(Contributed by Christian Heimes in :gh:`61441`.) + + xml.etree --------- @@ -2571,3 +2584,34 @@ separator key, with ``&`` as the default. This change also affects functions internally. For more details, please see their respective documentation. (Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) + +Notable changes in Python 3.7.11 +================================ + +A security fix alters the :class:`ftplib.FTP` behavior to not trust the +IPv4 address sent from the remote server when setting up a passive data +channel. We reuse the ftp server IP address instead. For unusual code +requiring the old behavior, set a ``trust_server_pasv_ipv4_address`` +attribute on your FTP instance to ``True``. (See :gh:`87451`) + + +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates RFC 3986, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser :func:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :gh:`88048`) + +Notable security feature in 3.7.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index b041e59..9a2652f 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -2243,6 +2243,21 @@ details, see the documentation for ``loop.create_datagram_endpoint()``. (Contributed by Kyle Stanley, Antoine Pitrou, and Yury Selivanov in :issue:`37228`.) +Notable changes in Python 3.8.2 +=============================== + +Fixed a regression with the ``ignore`` callback of :func:`shutil.copytree`. +The argument types are now str and List[str] again. +(Contributed by Manuel Barkhau and Giampaolo Rodola in :gh:`83571`.) + +Notable changes in Python 3.8.3 +=============================== + +The constant values of future flags in the :mod:`__future__` module +are updated in order to prevent collision with compiler flags. Previously +``PyCF_ALLOW_TOP_LEVEL_AWAIT`` was clashing with ``CO_FUTURE_DIVISION``. +(Contributed by Batuhan Taskaya in :gh:`83743`) + Notable changes in Python 3.8.8 =============================== @@ -2256,9 +2271,55 @@ functions internally. For more details, please see their respective documentation. (Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) +Notable changes in Python 3.8.9 +=============================== + +A security fix alters the :class:`ftplib.FTP` behavior to not trust the +IPv4 address sent from the remote server when setting up a passive data +channel. We reuse the ftp server IP address instead. For unusual code +requiring the old behavior, set a ``trust_server_pasv_ipv4_address`` +attribute on your FTP instance to ``True``. (See :gh:`87451`) + +Notable changes in Python 3.8.10 +================================ + +macOS 11.0 (Big Sur) and Apple Silicon Mac support +-------------------------------------------------- + +As of 3.8.10, Python now supports building and running on macOS 11 +(Big Sur) and on Apple Silicon Macs (based on the ``ARM64`` architecture). +A new universal build variant, ``universal2``, is now available to natively +support both ``ARM64`` and ``Intel 64`` in one set of executables. +Note that support for "weaklinking", building binaries targeted for newer +versions of macOS that will also run correctly on older versions by +testing at runtime for missing features, is not included in this backport +from Python 3.9; to support a range of macOS versions, continue to target +for and build on the oldest version in the range. + +(Originally contributed by Ronald Oussoren and Lawrence D'Anna in :gh:`85272`, +with fixes by FX Coudert and Eli Rykoff, and backported to 3.8 by Maxime BĂ©langer +and Ned Deily) + +Notable changes in Python 3.8.10 +================================ + +urllib.parse +------------ + +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates :rfc:`3986`, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) + + Notable changes in Python 3.8.12 ================================ +Changes in the Python API +------------------------- + Starting with Python 3.8.12 the :mod:`ipaddress` module no longer accepts any leading zeros in IPv4 address strings. Leading zeros are ambiguous and interpreted as octal notation by some libraries. For example the legacy @@ -2268,3 +2329,33 @@ any leading zeros. (Originally contributed by Christian Heimes in :issue:`36384`, and backported to 3.8 by Achraf Merzouki.) + +Notable security feature in 3.8.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. + +Notable changes in 3.8.17 +========================= + +tarfile +------- + +* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, + have a new a *filter* argument that allows limiting tar features than may be + surprising or dangerous, such as creating files outside the destination + directory. + See :ref:`tarfile-extraction-filter` for details. + In Python 3.12, use without the *filter* argument will show a + :exc:`DeprecationWarning`. + In Python 3.14, the default will switch to ``'data'``. + (Contributed by Petr Viktorin in :pep:`706`.) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index f7ad437..b24c138 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1562,3 +1562,55 @@ separator key, with ``&`` as the default. This change also affects functions internally. For more details, please see their respective documentation. (Contributed by Adam Goldschmidt, Senthil Kumaran and Ken Jin in :issue:`42967`.) + +Notable changes in Python 3.9.3 +=============================== + +A security fix alters the :class:`ftplib.FTP` behavior to not trust the +IPv4 address sent from the remote server when setting up a passive data +channel. We reuse the ftp server IP address instead. For unusual code +requiring the old behavior, set a ``trust_server_pasv_ipv4_address`` +attribute on your FTP instance to ``True``. (See :gh:`87451`) + +Notable changes in Python 3.9.5 +=============================== + +urllib.parse +------------ + +The presence of newline or tab characters in parts of a URL allows for some +forms of attacks. Following the WHATWG specification that updates :rfc:`3986`, +ASCII newline ``\n``, ``\r`` and tab ``\t`` characters are stripped from the +URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal +characters are controlled by a new module level variable +``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :gh:`88048`) + +Notable security feature in 3.9.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. + +Notable changes in 3.9.17 +========================= + +tarfile +------- + +* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, + have a new a *filter* argument that allows limiting tar features than may be + surprising or dangerous, such as creating files outside the destination + directory. + See :ref:`tarfile-extraction-filter` for details. + In Python 3.12, use without the *filter* argument will show a + :exc:`DeprecationWarning`. + In Python 3.14, the default will switch to ``'data'``. + (Contributed by Petr Viktorin in :pep:`706`.) -- cgit v0.12 From e74fa294c9b0c67bfcbefdda5a069f0a7648f524 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 15 Feb 2024 17:03:58 +0100 Subject: gh-113317: Argument Clinic: inline required_type_for_self_for_parser() in self converter (#115522) Co-authored-by: Alex Waygood --- Tools/clinic/clinic.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 77d492a..7e65735 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -4402,14 +4402,6 @@ def correct_name_for_self( return "PyTypeObject *", "type" raise AssertionError(f"Unhandled type of function f: {f.kind!r}") -def required_type_for_self_for_parser( - f: Function -) -> str | None: - type, _ = correct_name_for_self(f) - if f.kind in (METHOD_INIT, METHOD_NEW, STATIC_METHOD, CLASS_METHOD): - return type - return None - class self_converter(CConverter): """ @@ -4474,7 +4466,10 @@ class self_converter(CConverter): @property def parser_type(self) -> str: assert self.type is not None - return required_type_for_self_for_parser(self.function) or self.type + if self.function.kind in {METHOD_INIT, METHOD_NEW, STATIC_METHOD, CLASS_METHOD}: + tp, _ = correct_name_for_self(self.function) + return tp + return self.type def render(self, parameter: Parameter, data: CRenderData) -> None: """ -- cgit v0.12 From ae460d450ab854ca66d509ef6971cfe1b6312405 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 15 Feb 2024 10:54:57 -0800 Subject: gh-113743: Make the MRO cache thread-safe in free-threaded builds (#113930) Makes _PyType_Lookup thread safe, including: Thread safety of the underlying cache. Make mutation of mro and type members thread safe Also _PyType_GetMRO and _PyType_GetBases are currently returning borrowed references which aren't safe. --- Include/cpython/pyatomic.h | 3 + Include/cpython/pyatomic_gcc.h | 3 + Include/cpython/pyatomic_msc.h | 11 + Include/cpython/pyatomic_std.h | 7 + Include/internal/pycore_critical_section.h | 3 +- Include/internal/pycore_lock.h | 33 +++ Include/internal/pycore_typeobject.h | 7 +- Modules/_abc.c | 15 +- Objects/typeobject.c | 424 ++++++++++++++++++++++++----- Python/lock.c | 71 +++++ Python/pystate.c | 3 + 11 files changed, 500 insertions(+), 80 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 5314a70..e10d482 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -469,6 +469,9 @@ _Py_atomic_store_int_release(int *obj, int value); static inline int _Py_atomic_load_int_acquire(const int *obj); +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj); + // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index 70f2b7e..4095e18 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -495,6 +495,9 @@ static inline int _Py_atomic_load_int_acquire(const int *obj) { return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 601a0cf..b5c1ec9 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -938,6 +938,17 @@ _Py_atomic_load_int_acquire(const int *obj) #endif } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uint32_t volatile *)obj; +#elif defined(_M_ARM64) + return (int)__ldar32((uint32_t volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint32_acquire" +#endif +} // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index a05bfae..6c934a2 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -870,6 +870,13 @@ _Py_atomic_load_int_acquire(const int *obj) memory_order_acquire); } +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint32_t)*)obj, + memory_order_acquire); +} // --- _Py_atomic_fence ------------------------------------------------------ diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 30820a2..9163b5c 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -293,7 +293,8 @@ _PyCriticalSection_SuspendAll(PyThreadState *tstate); #ifdef Py_GIL_DISABLED static inline void -_PyCriticalSection_AssertHeld(PyMutex *mutex) { +_PyCriticalSection_AssertHeld(PyMutex *mutex) +{ #ifdef Py_DEBUG PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = tstate->critical_section; diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index 18a8896..674a1d1 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -251,6 +251,39 @@ PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex); PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); +// Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock +// We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd +// sequence means we're locked. Readers will read the sequence before attempting to read the +// underlying data and then read the sequence number again after reading the data. If the +// sequence has not changed the data is valid. +// +// Differs a little bit in that we use CAS on sequence as the lock, instead of a seperate spin lock. +// The writer can also detect that the undelering data has not changed and abandon the write +// and restore the previous sequence. +typedef struct { + uint32_t sequence; +} _PySeqLock; + +// Lock the sequence lock for the writer +PyAPI_FUNC(void) _PySeqLock_LockWrite(_PySeqLock *seqlock); + +// Unlock the sequence lock and move to the next sequence number. +PyAPI_FUNC(void) _PySeqLock_UnlockWrite(_PySeqLock *seqlock); + +// Abandon the current update indicating that no mutations have occured +// and restore the previous sequence value. +PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock); + +// Begin a read operation and return the current sequence number. +PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock); + +// End the read operation and confirm that the sequence number has not changed. +// Returns 1 if the read was successful or 0 if the read should be re-tried. +PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous); + +// Check if the lock was held during a fork and clear the lock. Returns 1 +// if the lock was held and any associated datat should be cleared. +PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock); #ifdef __cplusplus } diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c03c3d7..664f6fb 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_moduleobject.h" // PyModuleObject +#include "pycore_lock.h" // PyMutex /* state */ @@ -21,6 +22,7 @@ struct _types_runtime_state { // bpo-42745: next_version_tag remains shared by all interpreters // because of static types. unsigned int next_version_tag; + PyMutex type_mutex; }; @@ -28,6 +30,9 @@ struct _types_runtime_state { // see _PyType_Lookup(). struct type_cache_entry { unsigned int version; // initialized from type->tp_version_tag +#ifdef Py_GIL_DISABLED + _PySeqLock sequence; +#endif PyObject *name; // reference to exactly a str or None PyObject *value; // borrowed reference or NULL }; @@ -74,7 +79,7 @@ struct types_state { extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); extern void _PyTypes_FiniTypes(PyInterpreterState *); extern void _PyTypes_Fini(PyInterpreterState *); - +extern void _PyTypes_AfterFork(void); /* other API */ diff --git a/Modules/_abc.c b/Modules/_abc.c index 9473905..399ecbb 100644 --- a/Modules/_abc.c +++ b/Modules/_abc.c @@ -8,7 +8,6 @@ #include "pycore_object.h" // _PyType_GetSubclasses() #include "pycore_runtime.h" // _Py_ID() #include "pycore_setobject.h" // _PySet_NextEntry() -#include "pycore_typeobject.h" // _PyType_GetMRO() #include "pycore_weakref.h" // _PyWeakref_GET_REF() #include "clinic/_abc.c.h" @@ -744,18 +743,12 @@ _abc__abc_subclasscheck_impl(PyObject *module, PyObject *self, Py_DECREF(ok); /* 4. Check if it's a direct subclass. */ - PyObject *mro = _PyType_GetMRO((PyTypeObject *)subclass); - assert(PyTuple_Check(mro)); - for (pos = 0; pos < PyTuple_GET_SIZE(mro); pos++) { - PyObject *mro_item = PyTuple_GET_ITEM(mro, pos); - assert(mro_item != NULL); - if ((PyObject *)self == mro_item) { - if (_add_to_weak_set(&impl->_abc_cache, subclass) < 0) { - goto end; - } - result = Py_True; + if (PyType_IsSubtype((PyTypeObject *)subclass, (PyTypeObject *)self)) { + if (_add_to_weak_set(&impl->_abc_cache, subclass) < 0) { goto end; } + result = Py_True; + goto end; } /* 5. Check if it's a subclass of a registered class (recursive). */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c65d0ec..e0711df 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6,6 +6,7 @@ #include "pycore_code.h" // CO_FAST_FREE #include "pycore_dict.h" // _PyDict_KeysSize() #include "pycore_frame.h" // _PyInterpreterFrame +#include "pycore_lock.h" // _PySeqLock_* #include "pycore_long.h" // _PyLong_IsNegative() #include "pycore_memoryobject.h" // _PyMemoryView_FromBufferProc() #include "pycore_modsupport.h" // _PyArg_NoKwnames() @@ -52,6 +53,34 @@ class object "PyObject *" "&PyBaseObject_Type" #define NEXT_VERSION_TAG(interp) \ (interp)->types.next_version_tag +#ifdef Py_GIL_DISABLED + +// There's a global lock for mutation of types. This avoids having to take +// additonal locks while doing various subclass processing which may result +// in odd behaviors w.r.t. running with the GIL as the outer type lock could +// be released and reacquired during a subclass update if there's contention +// on the subclass lock. +#define BEGIN_TYPE_LOCK() \ + { \ + _PyCriticalSection _cs; \ + _PyCriticalSection_Begin(&_cs, &_PyRuntime.types.type_mutex); \ + +#define END_TYPE_LOCK() \ + _PyCriticalSection_End(&_cs); \ + } + +#define ASSERT_TYPE_LOCK_HELD() \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyRuntime.types.type_mutex) + +#else + +#define BEGIN_TYPE_LOCK() +#define END_TYPE_LOCK() +#define ASSERT_TYPE_LOCK_HELD() + +#endif + + typedef struct PySlot_Offset { short subslot_offset; short slot_offset; @@ -279,8 +308,14 @@ lookup_tp_bases(PyTypeObject *self) PyObject * _PyType_GetBases(PyTypeObject *self) { - /* It returns a borrowed reference. */ - return lookup_tp_bases(self); + PyObject *res; + + BEGIN_TYPE_LOCK(); + res = lookup_tp_bases(self); + Py_INCREF(res); + END_TYPE_LOCK() + + return res; } static inline void @@ -330,14 +365,19 @@ clear_tp_bases(PyTypeObject *self) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { + ASSERT_TYPE_LOCK_HELD(); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { - /* It returns a borrowed reference. */ - return lookup_tp_mro(self); + PyObject *mro; + BEGIN_TYPE_LOCK(); + mro = lookup_tp_mro(self); + Py_INCREF(mro); + END_TYPE_LOCK() + return mro; } static inline void @@ -646,9 +686,15 @@ type_cache_clear(struct type_cache *cache, PyObject *value) { for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { struct type_cache_entry *entry = &cache->hashtable[i]; +#ifdef Py_GIL_DISABLED + _PySeqLock_LockWrite(&entry->sequence); +#endif entry->version = 0; Py_XSETREF(entry->name, _Py_XNewRef(value)); entry->value = NULL; +#ifdef Py_GIL_DISABLED + _PySeqLock_UnlockWrite(&entry->sequence); +#endif } } @@ -760,8 +806,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification + BEGIN_TYPE_LOCK() assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); + END_TYPE_LOCK() return 0; } @@ -781,8 +829,8 @@ PyType_Unwatch(int watcher_id, PyObject* obj) return 0; } -void -PyType_Modified(PyTypeObject *type) +static void +type_modified_unlocked(PyTypeObject *type) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -848,6 +896,22 @@ PyType_Modified(PyTypeObject *type) } } +void +PyType_Modified(PyTypeObject *type) +{ + // Quick check without the lock held + if (!_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)) { + return; + } + + BEGIN_TYPE_LOCK() + type_modified_unlocked(type); + END_TYPE_LOCK() +} + +static int +is_subtype_unlocked(PyTypeObject *a, PyTypeObject *b); + static void type_mro_modified(PyTypeObject *type, PyObject *bases) { /* @@ -866,6 +930,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { int custom = !Py_IS_TYPE(type, &PyType_Type); int unbound; + ASSERT_TYPE_LOCK_HELD(); if (custom) { PyObject *mro_meth, *type_mro_meth; mro_meth = lookup_maybe_method( @@ -891,7 +956,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); - if (!PyType_IsSubtype(type, cls)) { + if (!is_subtype_unlocked(type, cls)) { goto clear; } } @@ -913,6 +978,8 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { static int assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + /* Ensure that the tp_version_tag is valid and set Py_TPFLAGS_VALID_VERSION_TAG. To respect the invariant, this must first be done on all super classes. Return 0 if this @@ -961,7 +1028,11 @@ assign_version_tag(PyInterpreterState *interp, PyTypeObject *type) int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); - return assign_version_tag(interp, type); + int assigned; + BEGIN_TYPE_LOCK() + assigned = assign_version_tag(interp, type); + END_TYPE_LOCK() + return assigned; } @@ -1183,21 +1254,28 @@ type_set_abstractmethods(PyTypeObject *type, PyObject *value, void *context) static PyObject * type_get_bases(PyTypeObject *type, void *context) { - PyObject *bases = lookup_tp_bases(type); + PyObject *bases = _PyType_GetBases(type); if (bases == NULL) { Py_RETURN_NONE; } - return Py_NewRef(bases); + return bases; } static PyObject * type_get_mro(PyTypeObject *type, void *context) { - PyObject *mro = lookup_tp_mro(type); + PyObject *mro; + + BEGIN_TYPE_LOCK() + mro = lookup_tp_mro(type); if (mro == NULL) { - Py_RETURN_NONE; + mro = Py_None; + } else { + Py_INCREF(mro); } - return Py_NewRef(mro); + + END_TYPE_LOCK() + return mro; } static PyTypeObject *best_base(PyObject *); @@ -1219,6 +1297,8 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *old_mro; int res = mro_internal(type, &old_mro); if (res <= 0) { @@ -1282,7 +1362,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) { // Check arguments if (!check_set_special_type_attr(type, new_bases, "__bases__")) { @@ -1313,7 +1393,7 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) } PyTypeObject *base = (PyTypeObject*)ob; - if (PyType_IsSubtype(base, type) || + if (is_subtype_unlocked(base, type) || /* In case of reentering here again through a custom mro() the above check is not enough since it relies on base->tp_mro which would gonna be updated inside @@ -1418,6 +1498,16 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) return -1; } +static int +type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) +{ + int res; + BEGIN_TYPE_LOCK(); + res = type_set_bases_unlocked(type, new_bases, context); + END_TYPE_LOCK(); + return res; +} + static PyObject * type_dict(PyTypeObject *type, void *context) { @@ -2156,11 +2246,12 @@ type_is_subtype_base_chain(PyTypeObject *a, PyTypeObject *b) return (b == &PyBaseObject_Type); } -int -PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) +static int +is_subtype_unlocked(PyTypeObject *a, PyTypeObject *b) { PyObject *mro; + ASSERT_TYPE_LOCK_HELD(); mro = lookup_tp_mro(a); if (mro != NULL) { /* Deal with multiple inheritance without recursion @@ -2179,6 +2270,16 @@ PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) return type_is_subtype_base_chain(a, b); } +int +PyType_IsSubtype(PyTypeObject *a, PyTypeObject *b) +{ + int res; + BEGIN_TYPE_LOCK(); + res = is_subtype_unlocked(a, b); + END_TYPE_LOCK() + return res; +} + /* Routines to do a method lookup in the type without looking in the instance dictionary (so we can't use PyObject_GetAttr) but still binding it to the instance. @@ -2538,8 +2639,10 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation(PyTypeObject *type) +mro_implementation_unlocked(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (!_PyType_IsReady(type)) { if (PyType_Ready(type) < 0) return NULL; @@ -2619,6 +2722,16 @@ mro_implementation(PyTypeObject *type) return result; } +static PyObject * +mro_implementation(PyTypeObject *type) +{ + PyObject *mro; + BEGIN_TYPE_LOCK() + mro = mro_implementation_unlocked(type); + END_TYPE_LOCK() + return mro; +} + /*[clinic input] type.mro @@ -2657,7 +2770,7 @@ mro_check(PyTypeObject *type, PyObject *mro) } PyTypeObject *base = (PyTypeObject*)obj; - if (!PyType_IsSubtype(solid, solid_base(base))) { + if (!is_subtype_unlocked(solid, solid_base(base))) { PyErr_Format( PyExc_TypeError, "mro() returned base with unsuitable layout ('%.500s')", @@ -2688,6 +2801,9 @@ mro_invoke(PyTypeObject *type) { PyObject *mro_result; PyObject *new_mro; + + ASSERT_TYPE_LOCK_HELD(); + const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { @@ -2700,7 +2816,7 @@ mro_invoke(PyTypeObject *type) Py_DECREF(mro_meth); } else { - mro_result = mro_implementation(type); + mro_result = mro_implementation_unlocked(type); } if (mro_result == NULL) return NULL; @@ -2747,8 +2863,10 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) +mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *new_mro, *old_mro; int reent; @@ -2793,6 +2911,16 @@ mro_internal(PyTypeObject *type, PyObject **p_old_mro) return 1; } +static int +mro_internal(PyTypeObject *type, PyObject **p_old_mro) +{ + int res; + BEGIN_TYPE_LOCK() + res = mro_internal_unlocked(type, p_old_mro); + END_TYPE_LOCK() + return res; +} + /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ @@ -4631,6 +4759,9 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) { assert(PyType_Check(type)); + PyObject *res = NULL; + BEGIN_TYPE_LOCK() + PyObject *mro = lookup_tp_mro(type); // The type must be ready assert(mro != NULL); @@ -4650,15 +4781,19 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) PyHeapTypeObject *ht = (PyHeapTypeObject*)super; PyObject *module = ht->ht_module; if (module && _PyModule_GetDef(module) == def) { - return module; + res = module; + break; } } + END_TYPE_LOCK() - PyErr_Format( - PyExc_TypeError, - "PyType_GetModuleByDef: No superclass of '%s' has the given module", - type->tp_name); - return NULL; + if (res == NULL) { + PyErr_Format( + PyExc_TypeError, + "PyType_GetModuleByDef: No superclass of '%s' has the given module", + type->tp_name); + } + return res; } void * @@ -4696,6 +4831,8 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { + ASSERT_TYPE_LOCK_HELD(); + Py_hash_t hash; if (!PyUnicode_CheckExact(name) || (hash = _PyASCIIObject_CAST(name)->hash) == -1) @@ -4765,6 +4902,62 @@ is_dunder_name(PyObject *name) return 0; } +static void +update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int version_tag, PyObject *value) +{ + entry->version = version_tag; + entry->value = value; /* borrowed */ + assert(_PyASCIIObject_CAST(name)->hash != -1); + OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); + // We're releasing this under the lock for simplicity sake because it's always a + // exact unicode object or Py_None so it's safe to do so. + Py_SETREF(entry->name, Py_NewRef(name)); +} + +#if Py_GIL_DISABLED + +#define TYPE_CACHE_IS_UPDATING(sequence) (sequence & 0x01) + +static void +update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) +{ + _PySeqLock_LockWrite(&entry->sequence); + + // update the entry + if (entry->name == name && + entry->value == value && + entry->version == version_tag) { + // We raced with another update, bail and restore previous sequence. + _PySeqLock_AbandonWrite(&entry->sequence); + return; + } + + update_cache(entry, name, version_tag, value); + + // Then update sequence to the next valid value + _PySeqLock_UnlockWrite(&entry->sequence); +} + +#endif + +void +_PyTypes_AfterFork() +{ +#ifdef Py_GIL_DISABLED + struct type_cache *cache = get_type_cache(); + for (Py_ssize_t i = 0; i < (1 << MCACHE_SIZE_EXP); i++) { + struct type_cache_entry *entry = &cache->hashtable[i]; + if (_PySeqLock_AfterFork(&entry->sequence)) { + // Entry was in the process of updating while forking, clear it... + entry->value = NULL; + Py_SETREF(entry->name, Py_None); + entry->version = 0; + } + } +#endif +} + /* Internal API to look for a name through the MRO. This returns a borrowed reference, and doesn't set an exception! */ PyObject * @@ -4777,6 +4970,27 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) unsigned int h = MCACHE_HASH_METHOD(type, name); struct type_cache *cache = get_type_cache(); struct type_cache_entry *entry = &cache->hashtable[h]; +#ifdef Py_GIL_DISABLED + // synchronize-with other writing threads by doing an acquire load on the sequence + while (1) { + int sequence = _PySeqLock_BeginRead(&entry->sequence); + if (_Py_atomic_load_uint32_relaxed(&entry->version) == type->tp_version_tag && + _Py_atomic_load_ptr_relaxed(&entry->name) == name) { + assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + PyObject *value = _Py_atomic_load_ptr_relaxed(&entry->value); + + // If the sequence is still valid then we're done + if (_PySeqLock_EndRead(&entry->sequence, sequence)) { + return value; + } + } else { + // cache miss + break; + } + } +#else if (entry->version == type->tp_version_tag && entry->name == name) { assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); @@ -4784,13 +4998,27 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); return entry->value; } +#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); + // We need to atomically do the lookup and capture the version before + // anyone else can modify our mro or mutate the type. + + int has_version = 0; + int version = 0; + BEGIN_TYPE_LOCK() res = find_name_in_mro(type, name, &error); + if (MCACHE_CACHEABLE_NAME(name)) { + has_version = assign_version_tag(interp, type); + version = type->tp_version_tag; + assert(!has_version || _PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); + } + END_TYPE_LOCK() + /* Only put NULL results into cache if there was no error. */ if (error) { /* It's not ideal to clear the error condition, @@ -4807,15 +5035,12 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return NULL; } - if (MCACHE_CACHEABLE_NAME(name) && assign_version_tag(interp, type)) { - h = MCACHE_HASH_METHOD(type, name); - struct type_cache_entry *entry = &cache->hashtable[h]; - entry->version = type->tp_version_tag; - entry->value = res; /* borrowed */ - assert(_PyASCIIObject_CAST(name)->hash != -1); - OBJECT_STAT_INC_COND(type_cache_collisions, entry->name != Py_None && entry->name != name); - assert(_PyType_HasFeature(type, Py_TPFLAGS_VALID_VERSION_TAG)); - Py_SETREF(entry->name, Py_NewRef(name)); + if (has_version) { +#if Py_GIL_DISABLED + update_cache_gil_disabled(entry, name, version, res); +#else + update_cache(entry, name, version, res); +#endif } return res; } @@ -4978,6 +5203,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) /* Will fail in _PyObject_GenericSetAttrWithDict. */ Py_INCREF(name); } + + BEGIN_TYPE_LOCK() res = _PyObject_GenericSetAttrWithDict((PyObject *)type, name, value, NULL); if (res == 0) { /* Clear the VALID_VERSION flag of 'type' and all its @@ -4985,13 +5212,15 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) update_subclasses() recursion in update_slot(), but carefully: they each have their own conditions on which to stop recursing into subclasses. */ - PyType_Modified(type); + type_modified_unlocked(type); if (is_dunder_name(name)) { res = update_slot(type, name); } assert(_PyType_CheckConsistency(type)); } + END_TYPE_LOCK() + Py_DECREF(name); return res; } @@ -6796,28 +7025,28 @@ inherit_special(PyTypeObject *type, PyTypeObject *base) #undef COPYVAL /* Setup fast subclass flags */ - if (PyType_IsSubtype(base, (PyTypeObject*)PyExc_BaseException)) { + if (is_subtype_unlocked(base, (PyTypeObject*)PyExc_BaseException)) { type->tp_flags |= Py_TPFLAGS_BASE_EXC_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyType_Type)) { + else if (is_subtype_unlocked(base, &PyType_Type)) { type->tp_flags |= Py_TPFLAGS_TYPE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyLong_Type)) { + else if (is_subtype_unlocked(base, &PyLong_Type)) { type->tp_flags |= Py_TPFLAGS_LONG_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyBytes_Type)) { + else if (is_subtype_unlocked(base, &PyBytes_Type)) { type->tp_flags |= Py_TPFLAGS_BYTES_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyUnicode_Type)) { + else if (is_subtype_unlocked(base, &PyUnicode_Type)) { type->tp_flags |= Py_TPFLAGS_UNICODE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyTuple_Type)) { + else if (is_subtype_unlocked(base, &PyTuple_Type)) { type->tp_flags |= Py_TPFLAGS_TUPLE_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyList_Type)) { + else if (is_subtype_unlocked(base, &PyList_Type)) { type->tp_flags |= Py_TPFLAGS_LIST_SUBCLASS; } - else if (PyType_IsSubtype(base, &PyDict_Type)) { + else if (is_subtype_unlocked(base, &PyDict_Type)) { type->tp_flags |= Py_TPFLAGS_DICT_SUBCLASS; } @@ -7256,6 +7485,8 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { assert(lookup_tp_mro(type) != NULL); @@ -7265,7 +7496,7 @@ type_ready_mro(PyTypeObject *type) } /* Calculate method resolution order */ - if (mro_internal(type, NULL) < 0) { + if (mro_internal_unlocked(type, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -7329,6 +7560,8 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; if (base != NULL) { @@ -7521,6 +7754,8 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int rerunbuiltin) { + ASSERT_TYPE_LOCK_HELD(); + _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -7608,7 +7843,16 @@ PyType_Ready(PyTypeObject *type) type->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; } - return type_ready(type, 0); + int res; + BEGIN_TYPE_LOCK() + if (!(type->tp_flags & Py_TPFLAGS_READY)) { + res = type_ready(type, 0); + } else { + res = 0; + assert(_PyType_CheckConsistency(type)); + } + END_TYPE_LOCK() + return res; } int @@ -7638,7 +7882,10 @@ _PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) static_builtin_state_init(interp, self); - int res = type_ready(self, !ismain); + int res; + BEGIN_TYPE_LOCK(); + res = type_ready(self, !ismain); + END_TYPE_LOCK() if (res < 0) { static_builtin_state_clear(interp, self); } @@ -8040,9 +8287,12 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck(PyObject *self, setattrofunc func, const char *what) +hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) { PyTypeObject *type = Py_TYPE(self); + + ASSERT_TYPE_LOCK_HELD(); + PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -8084,6 +8334,20 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) return 1; } +static int +hackcheck(PyObject *self, setattrofunc func, const char *what) +{ + if (!PyType_Check(self)) { + return 1; + } + + int res; + BEGIN_TYPE_LOCK(); + res = hackcheck_unlocked(self, func, what); + END_TYPE_LOCK() + return res; +} + static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -9252,13 +9516,13 @@ fail: return -1; } -static int -releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) +static releasebufferproc +releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); if (mro == NULL) { - return -1; + return NULL; } assert(PyTuple_Check(mro)); @@ -9272,9 +9536,8 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) } i++; /* skip self_type */ if (i >= n) - return -1; + return NULL; - releasebufferproc base_releasebuffer = NULL; for (; i < n; i++) { PyObject *obj = PyTuple_GET_ITEM(mro, i); if (!PyType_Check(obj)) { @@ -9284,15 +9547,25 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) if (base_type->tp_as_buffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != NULL && base_type->tp_as_buffer->bf_releasebuffer != slot_bf_releasebuffer) { - base_releasebuffer = base_type->tp_as_buffer->bf_releasebuffer; - break; + return base_type->tp_as_buffer->bf_releasebuffer; } } + return NULL; +} + +static void +releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) +{ + releasebufferproc base_releasebuffer; + + BEGIN_TYPE_LOCK(); + base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); + END_TYPE_LOCK(); + if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); } - return 0; } static void @@ -9369,11 +9642,7 @@ static void slot_bf_releasebuffer(PyObject *self, Py_buffer *buffer) { releasebuffer_call_python(self, buffer); - if (releasebuffer_maybe_call_super(self, buffer) < 0) { - if (PyErr_Occurred()) { - PyErr_WriteUnraisable(self); - } - } + releasebuffer_maybe_call_super(self, buffer); } static PyObject * @@ -9828,6 +10097,8 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { + ASSERT_TYPE_LOCK_HELD(); + PyObject *descr; PyWrapperDescrObject *d; @@ -9876,7 +10147,7 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) d = (PyWrapperDescrObject *)descr; if ((specific == NULL || specific == d->d_wrapped) && d->d_base->wrapper == p->wrapper && - PyType_IsSubtype(type, PyDescr_TYPE(d))) + is_subtype_unlocked(type, PyDescr_TYPE(d))) { specific = d->d_wrapped; } @@ -9941,6 +10212,8 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { + ASSERT_TYPE_LOCK_HELD(); + pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { update_one_slot(type, *pp); @@ -9957,6 +10230,7 @@ update_slot(PyTypeObject *type, PyObject *name) pytype_slotdef **pp; int offset; + ASSERT_TYPE_LOCK_HELD(); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -9990,10 +10264,17 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { + // This lock isn't strictly necessary because the type has not been + // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro + // where we'd like to assert that the tyep is locked. + BEGIN_TYPE_LOCK() + assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } + + END_TYPE_LOCK() } static void @@ -10001,6 +10282,8 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; + ASSERT_TYPE_LOCK_HELD(); + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ PyType_Modified(type); @@ -10273,7 +10556,15 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(su_obj_type); + /* keep a strong reference to mro because su_obj_type->tp_mro can be + replaced during PyDict_GetItemRef(dict, name, &res) and because + another thread can modify it after we end the critical section + below */ + Py_XINCREF(mro); + END_TYPE_LOCK() + if (mro == NULL) return NULL; @@ -10286,12 +10577,11 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * break; } i++; /* skip su->type (if any) */ - if (i >= n) + if (i >= n) { + Py_DECREF(mro); return NULL; + } - /* keep a strong reference to mro because su_obj_type->tp_mro can be - replaced during PyDict_GetItemRef(dict, name, &res) */ - Py_INCREF(mro); do { PyObject *obj = PyTuple_GET_ITEM(mro, i); PyObject *dict = lookup_tp_dict(_PyType_CAST(obj)); diff --git a/Python/lock.c b/Python/lock.c index f0ff117..bf01436 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -459,3 +459,74 @@ _PyRWMutex_Unlock(_PyRWMutex *rwmutex) _PyParkingLot_UnparkAll(&rwmutex->bits); } } + +#define SEQLOCK_IS_UPDATING(sequence) (sequence & 0x01) + +void _PySeqLock_LockWrite(_PySeqLock *seqlock) +{ + // lock the entry by setting by moving to an odd sequence number + uint32_t prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence); + while (1) { + if (SEQLOCK_IS_UPDATING(prev)) { + // Someone else is currently updating the cache + _Py_yield(); + prev = _Py_atomic_load_uint32_relaxed(&seqlock->sequence); + } + else if (_Py_atomic_compare_exchange_uint32(&seqlock->sequence, &prev, prev + 1)) { + // We've locked the cache + break; + } + else { + _Py_yield(); + } + } +} + +void _PySeqLock_AbandonWrite(_PySeqLock *seqlock) +{ + uint32_t new_seq = seqlock->sequence - 1; + assert(!SEQLOCK_IS_UPDATING(new_seq)); + _Py_atomic_store_uint32(&seqlock->sequence, new_seq); +} + +void _PySeqLock_UnlockWrite(_PySeqLock *seqlock) +{ + uint32_t new_seq = seqlock->sequence + 1; + assert(!SEQLOCK_IS_UPDATING(new_seq)); + _Py_atomic_store_uint32(&seqlock->sequence, new_seq); +} + +uint32_t _PySeqLock_BeginRead(_PySeqLock *seqlock) +{ + uint32_t sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence); + while (SEQLOCK_IS_UPDATING(sequence)) { + _Py_yield(); + sequence = _Py_atomic_load_uint32_acquire(&seqlock->sequence); + } + + return sequence; +} + +uint32_t _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous) +{ + // Synchronize again and validate that the entry hasn't been updated + // while we were readying the values. + if (_Py_atomic_load_uint32_acquire(&seqlock->sequence) == previous) { + return 1; + } + + _Py_yield(); + return 0; +} + +uint32_t _PySeqLock_AfterFork(_PySeqLock *seqlock) +{ + // Synchronize again and validate that the entry hasn't been updated + // while we were readying the values. + if (SEQLOCK_IS_UPDATING(seqlock->sequence)) { + seqlock->sequence = 0; + return 1; + } + + return 0; +} diff --git a/Python/pystate.c b/Python/pystate.c index 08ec586..b1d1a08 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -395,6 +395,7 @@ _Py_COMP_DIAG_POP &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ + &(runtime)->types.type_mutex, \ } static void @@ -499,6 +500,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyMutex_at_fork_reinit(locks[i]); } + _PyTypes_AfterFork(); + /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does * not force the default allocator. */ if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { -- cgit v0.12