From a5764d3d96341441d3f70fb5c96a82610a3f4842 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 30 Jun 2021 18:52:25 +0100 Subject: bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26970) --- Doc/library/marshal.rst | 18 +++++++++++++ Lib/test/audit-tests.py | 27 +++++++++++++++++++ Lib/test/test_audit.py | 5 ++++ .../2021-06-29-23-40-22.bpo-41180.uTWHv_.rst | 5 ++++ Python/marshal.c | 30 ++++++++++++++-------- 5 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst index d65afc2..24f9dc1 100644 --- a/Doc/library/marshal.rst +++ b/Doc/library/marshal.rst @@ -66,6 +66,8 @@ The module defines these functions: The *version* argument indicates the data format that ``dump`` should use (see below). + .. audit-event:: marshal.dumps value,version marshal.dump + .. function:: load(file) @@ -74,11 +76,18 @@ The module defines these functions: format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. The file must be a readable :term:`binary file`. + .. audit-event:: marshal.load "" marshal.load + .. note:: If an object containing an unsupported type was marshalled with :func:`dump`, :func:`load` will substitute ``None`` for the unmarshallable type. + .. versionchanged:: 3.10 + + This call used to raise a ``code.__new__`` audit event for each code object. Now + it raises a single ``marshal.load`` event for the entire load operation. + .. function:: dumps(value[, version]) @@ -89,6 +98,8 @@ The module defines these functions: The *version* argument indicates the data format that ``dumps`` should use (see below). + .. audit-event:: marshal.dumps value,version marshal.dump + .. function:: loads(bytes) @@ -96,6 +107,13 @@ The module defines these functions: :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. Extra bytes in the input are ignored. + .. audit-event:: marshal.loads bytes marshal.load + + .. versionchanged:: 3.10 + + This call used to raise a ``code.__new__`` audit event for each code object. Now + it raises a single ``marshal.loads`` event for the entire load operation. + In addition, the following constants are defined: diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index 7a7de63..ccec9fe 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -6,6 +6,7 @@ module with arguments identifying each test. """ import contextlib +import os import sys @@ -106,6 +107,32 @@ def test_block_add_hook_baseexception(): pass +def test_marshal(): + import marshal + o = ("a", "b", "c", 1, 2, 3) + payload = marshal.dumps(o) + + with TestHook() as hook: + assertEqual(o, marshal.loads(marshal.dumps(o))) + + try: + with open("test-marshal.bin", "wb") as f: + marshal.dump(o, f) + with open("test-marshal.bin", "rb") as f: + assertEqual(o, marshal.load(f)) + finally: + os.unlink("test-marshal.bin") + + actual = [(a[0], a[1]) for e, a in hook.seen if e == "marshal.dumps"] + assertSequenceEqual(actual, [(o, marshal.version)] * 2) + + actual = [a[0] for e, a in hook.seen if e == "marshal.loads"] + assertSequenceEqual(actual, [payload]) + + actual = [e for e, a in hook.seen if e == "marshal.load"] + assertSequenceEqual(actual, ["marshal.load"]) + + def test_pickle(): import pickle diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py index 25ff34b..c5ce263 100644 --- a/Lib/test/test_audit.py +++ b/Lib/test/test_audit.py @@ -54,6 +54,11 @@ class AuditTest(unittest.TestCase): def test_block_add_hook_baseexception(self): self.do_test("test_block_add_hook_baseexception") + def test_marshal(self): + import_helper.import_module("marshal") + + self.do_test("test_marshal") + def test_pickle(self): import_helper.import_module("pickle") diff --git a/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst new file mode 100644 index 0000000..88b70c7 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst @@ -0,0 +1,5 @@ +Add auditing events to the :mod:`marshal` module, and stop raising +``code.__init__`` events for every unmarshalled code object. Directly +instantiated code objects will continue to raise an event, and audit event +handlers should inspect or collect the raw marshal data. This reduces a +significant performance overhead when loading from ``.pyc`` files. diff --git a/Python/marshal.c b/Python/marshal.c index fa4ec9e..4125240 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -596,14 +596,18 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) { char buf[BUFSIZ]; WFILE wf; + if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) { + return; /* caller must check PyErr_Occurred() */ + } memset(&wf, 0, sizeof(wf)); wf.fp = fp; wf.ptr = wf.buf = buf; wf.end = wf.ptr + sizeof(buf); wf.error = WFERR_OK; wf.version = version; - if (w_init_refs(&wf, version)) - return; /* caller mush check PyErr_Occurred() */ + if (w_init_refs(&wf, version)) { + return; /* caller must check PyErr_Occurred() */ + } w_object(x, &wf); w_clear_refs(&wf); w_flush(&wf); @@ -1371,12 +1375,6 @@ r_object(RFILE *p) if (linetable == NULL) goto code_error; - if (PySys_Audit("code.__new__", "OOOiiiiii", - code, filename, name, argcount, posonlyargcount, - kwonlyargcount, nlocals, stacksize, flags) < 0) { - goto code_error; - } - v = (PyObject *) PyCode_NewWithPosOnlyArgs( argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, @@ -1435,6 +1433,15 @@ read_object(RFILE *p) fprintf(stderr, "XXX readobject called with exception set\n"); return NULL; } + if (p->ptr && p->end) { + if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) { + return NULL; + } + } else if (p->fp || p->readable) { + if (PySys_Audit("marshal.load", NULL) < 0) { + return NULL; + } + } v = r_object(p); if (v == NULL && !PyErr_Occurred()) PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object"); @@ -1531,7 +1538,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp) rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; - result = r_object(&rf); + result = read_object(&rf); Py_DECREF(rf.refs); if (rf.buf != NULL) PyMem_Free(rf.buf); @@ -1552,7 +1559,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len) rf.refs = PyList_New(0); if (rf.refs == NULL) return NULL; - result = r_object(&rf); + result = read_object(&rf); Py_DECREF(rf.refs); if (rf.buf != NULL) PyMem_Free(rf.buf); @@ -1564,6 +1571,9 @@ PyMarshal_WriteObjectToString(PyObject *x, int version) { WFILE wf; + if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) { + return NULL; + } memset(&wf, 0, sizeof(wf)); wf.str = PyBytes_FromStringAndSize((char *)NULL, 50); if (wf.str == NULL) -- cgit v0.12