summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSteve Dower <steve.dower@python.org>2021-06-30 17:52:39 (GMT)
committerGitHub <noreply@github.com>2021-06-30 17:52:39 (GMT)
commit863e3d5c7e037b24b8294b041ed7686b522973d8 (patch)
tree11ac1debd39ff3d45edb6ed63094e6dd9be6ff5f
parentf790bc8084d3dfd723889740f9129ac8fcb2fa02 (diff)
downloadcpython-863e3d5c7e037b24b8294b041ed7686b522973d8.zip
cpython-863e3d5c7e037b24b8294b041ed7686b522973d8.tar.gz
cpython-863e3d5c7e037b24b8294b041ed7686b522973d8.tar.bz2
bpo-41180: Replace marshal code.__new__ audit event with marshal.load[s] and marshal.dumps (GH-26971)
-rw-r--r--Doc/library/marshal.rst18
-rw-r--r--Lib/test/audit-tests.py27
-rw-r--r--Lib/test/test_audit.py5
-rw-r--r--Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst5
-rw-r--r--Python/marshal.c30
5 files changed, 75 insertions, 10 deletions
diff --git a/Doc/library/marshal.rst b/Doc/library/marshal.rst
index d65afc2..b38ba54 100644
--- a/Doc/library/marshal.rst
+++ b/Doc/library/marshal.rst
@@ -66,6 +66,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dump`` should use
(see below).
+ .. audit-event:: marshal.dumps value,version marshal.dump
+
.. function:: load(file)
@@ -74,11 +76,18 @@ The module defines these functions:
format), raise :exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. The
file must be a readable :term:`binary file`.
+ .. audit-event:: marshal.load "" marshal.load
+
.. note::
If an object containing an unsupported type was marshalled with :func:`dump`,
:func:`load` will substitute ``None`` for the unmarshallable type.
+ .. versionchanged:: 3.9.7
+
+ This call used to raise a ``code.__new__`` audit event for each code object. Now
+ it raises a single ``marshal.load`` event for the entire load operation.
+
.. function:: dumps(value[, version])
@@ -89,6 +98,8 @@ The module defines these functions:
The *version* argument indicates the data format that ``dumps`` should use
(see below).
+ .. audit-event:: marshal.dumps value,version marshal.dump
+
.. function:: loads(bytes)
@@ -96,6 +107,13 @@ The module defines these functions:
:exc:`EOFError`, :exc:`ValueError` or :exc:`TypeError`. Extra bytes in the
input are ignored.
+ .. audit-event:: marshal.loads bytes marshal.load
+
+ .. versionchanged:: 3.9.7
+
+ This call used to raise a ``code.__new__`` audit event for each code object. Now
+ it raises a single ``marshal.loads`` event for the entire load operation.
+
In addition, the following constants are defined:
diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py
index 8e66594..95216bc 100644
--- a/Lib/test/audit-tests.py
+++ b/Lib/test/audit-tests.py
@@ -6,6 +6,7 @@ module with arguments identifying each test.
"""
import contextlib
+import os
import sys
@@ -106,6 +107,32 @@ def test_block_add_hook_baseexception():
pass
+def test_marshal():
+ import marshal
+ o = ("a", "b", "c", 1, 2, 3)
+ payload = marshal.dumps(o)
+
+ with TestHook() as hook:
+ assertEqual(o, marshal.loads(marshal.dumps(o)))
+
+ try:
+ with open("test-marshal.bin", "wb") as f:
+ marshal.dump(o, f)
+ with open("test-marshal.bin", "rb") as f:
+ assertEqual(o, marshal.load(f))
+ finally:
+ os.unlink("test-marshal.bin")
+
+ actual = [(a[0], a[1]) for e, a in hook.seen if e == "marshal.dumps"]
+ assertSequenceEqual(actual, [(o, marshal.version)] * 2)
+
+ actual = [a[0] for e, a in hook.seen if e == "marshal.loads"]
+ assertSequenceEqual(actual, [payload])
+
+ actual = [e for e, a in hook.seen if e == "marshal.load"]
+ assertSequenceEqual(actual, ["marshal.load"])
+
+
def test_pickle():
import pickle
diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py
index a9ac6fe..387a312 100644
--- a/Lib/test/test_audit.py
+++ b/Lib/test/test_audit.py
@@ -51,6 +51,11 @@ class AuditTest(unittest.TestCase):
def test_block_add_hook_baseexception(self):
self.do_test("test_block_add_hook_baseexception")
+ def test_marshal(self):
+ support.import_module("marshal")
+
+ self.do_test("test_marshal")
+
def test_pickle(self):
support.import_module("pickle")
diff --git a/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
new file mode 100644
index 0000000..88b70c7
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2021-06-29-23-40-22.bpo-41180.uTWHv_.rst
@@ -0,0 +1,5 @@
+Add auditing events to the :mod:`marshal` module, and stop raising
+``code.__init__`` events for every unmarshalled code object. Directly
+instantiated code objects will continue to raise an event, and audit event
+handlers should inspect or collect the raw marshal data. This reduces a
+significant performance overhead when loading from ``.pyc`` files.
diff --git a/Python/marshal.c b/Python/marshal.c
index c4538bd..baafa3e 100644
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -596,14 +596,18 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
{
char buf[BUFSIZ];
WFILE wf;
+ if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+ return; /* caller must check PyErr_Occurred() */
+ }
memset(&wf, 0, sizeof(wf));
wf.fp = fp;
wf.ptr = wf.buf = buf;
wf.end = wf.ptr + sizeof(buf);
wf.error = WFERR_OK;
wf.version = version;
- if (w_init_refs(&wf, version))
- return; /* caller mush check PyErr_Occurred() */
+ if (w_init_refs(&wf, version)) {
+ return; /* caller must check PyErr_Occurred() */
+ }
w_object(x, &wf);
w_clear_refs(&wf);
w_flush(&wf);
@@ -1371,12 +1375,6 @@ r_object(RFILE *p)
if (lnotab == NULL)
goto code_error;
- if (PySys_Audit("code.__new__", "OOOiiiiii",
- code, filename, name, argcount, posonlyargcount,
- kwonlyargcount, nlocals, stacksize, flags) < 0) {
- goto code_error;
- }
-
v = (PyObject *) PyCode_NewWithPosOnlyArgs(
argcount, posonlyargcount, kwonlyargcount,
nlocals, stacksize, flags,
@@ -1435,6 +1433,15 @@ read_object(RFILE *p)
fprintf(stderr, "XXX readobject called with exception set\n");
return NULL;
}
+ if (p->ptr && p->end) {
+ if (PySys_Audit("marshal.loads", "y#", p->ptr, (Py_ssize_t)(p->end - p->ptr)) < 0) {
+ return NULL;
+ }
+ } else if (p->fp || p->readable) {
+ if (PySys_Audit("marshal.load", NULL) < 0) {
+ return NULL;
+ }
+ }
v = r_object(p);
if (v == NULL && !PyErr_Occurred())
PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
@@ -1531,7 +1538,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
- result = r_object(&rf);
+ result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_FREE(rf.buf);
@@ -1552,7 +1559,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
rf.refs = PyList_New(0);
if (rf.refs == NULL)
return NULL;
- result = r_object(&rf);
+ result = read_object(&rf);
Py_DECREF(rf.refs);
if (rf.buf != NULL)
PyMem_FREE(rf.buf);
@@ -1564,6 +1571,9 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
{
WFILE wf;
+ if (PySys_Audit("marshal.dumps", "Oi", x, version) < 0) {
+ return NULL;
+ }
memset(&wf, 0, sizeof(wf));
wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
if (wf.str == NULL)