summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Lib/test/test_xxtestfuzz.py23
-rw-r--r--Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst1
-rw-r--r--Modules/_xxtestfuzz/README.rst46
-rw-r--r--Modules/_xxtestfuzz/_xxtestfuzz.c53
-rw-r--r--Modules/_xxtestfuzz/fuzz_tests.txt3
-rw-r--r--Modules/_xxtestfuzz/fuzzer.c120
-rw-r--r--setup.py6
7 files changed, 252 insertions, 0 deletions
diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py
new file mode 100644
index 0000000..532f5fe
--- /dev/null
+++ b/Lib/test/test_xxtestfuzz.py
@@ -0,0 +1,23 @@
+import faulthandler
+import test.support
+import unittest
+
+_xxtestfuzz = test.support.import_module('_xxtestfuzz')
+
+
+class TestFuzzer(unittest.TestCase):
+ """To keep our https://github.com/google/oss-fuzz API working."""
+
+ def test_sample_input_smoke_test(self):
+ """This is only a regression test: Check that it doesn't crash."""
+ _xxtestfuzz.run(b"")
+ _xxtestfuzz.run(b"\0")
+ _xxtestfuzz.run(b"{")
+ _xxtestfuzz.run(b" ")
+ _xxtestfuzz.run(b"x")
+ _xxtestfuzz.run(b"1")
+
+
+if __name__ == "__main__":
+ faulthandler.enable()
+ unittest.main()
diff --git a/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst
new file mode 100644
index 0000000..9a0fb16
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst
@@ -0,0 +1 @@
+Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz.
diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst
new file mode 100644
index 0000000..b48f3c8
--- /dev/null
+++ b/Modules/_xxtestfuzz/README.rst
@@ -0,0 +1,46 @@
+Fuzz Tests for CPython
+======================
+
+These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
+
+oss-fuzz works against a library exposing a function of the form
+``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
+that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
+some toy values -- no fuzzing occurs in Python's test suite.
+
+oss-fuzz will regularly pull from CPython, discover all the tests in
+``fuzz_tests.txt``, and run them -- so adding a new test here means it will
+automatically be run in oss-fuzz, while also being smoke-tested as part of
+CPython's test suite.
+
+Adding a new fuzz test
+----------------------
+
+Add the test name on a new line in ``fuzz_tests.txt``.
+
+In ``fuzzer.c``, add a function to be run::
+
+ int $test_name (const char* data, size_t size) {
+ ...
+ return 0;
+ }
+
+
+And invoke it from ``LLVMFuzzerTestOneInput``::
+
+ #if _Py_FUZZ_YES(fuzz_builtin_float)
+ rv |= _run_fuzz(data, size, fuzz_builtin_float);
+ #endif
+
+``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
+``fuzz_tests.txt`` run separately.
+
+What makes a good fuzz test
+---------------------------
+
+Libraries written in C that might handle untrusted data are worthwhile. The
+more complex the logic (e.g. parsing), the more likely this is to be a useful
+fuzz test. See the existing examples for reference, and refer to the
+`oss-fuzz`_ docs.
+
+.. _oss-fuzz: https://github.com/google/oss-fuzz
diff --git a/Modules/_xxtestfuzz/_xxtestfuzz.c b/Modules/_xxtestfuzz/_xxtestfuzz.c
new file mode 100644
index 0000000..781dd23
--- /dev/null
+++ b/Modules/_xxtestfuzz/_xxtestfuzz.c
@@ -0,0 +1,53 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
+ const char* buf;
+ Py_ssize_t size;
+ if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
+ return NULL;
+ }
+ int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+ if (rv != 0) {
+ // Nonzero return codes are reserved for future use.
+ PyErr_Format(
+ PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
+ return NULL;
+ }
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef module_methods[] = {
+ {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
+ {NULL},
+};
+
+static struct PyModuleDef _fuzzmodule = {
+ PyModuleDef_HEAD_INIT,
+ "_fuzz",
+ NULL,
+ 0,
+ module_methods,
+ NULL,
+ NULL,
+ NULL,
+ NULL
+};
+
+PyMODINIT_FUNC
+PyInit__xxtestfuzz(void)
+{
+ PyObject *m = NULL;
+
+ if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
+ return NULL;
+ }
+ return m;
+}
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
new file mode 100644
index 0000000..2e53bfd
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -0,0 +1,3 @@
+fuzz_builtin_float
+fuzz_builtin_int
+fuzz_builtin_unicode
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
new file mode 100644
index 0000000..36f721e
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -0,0 +1,120 @@
+/* A fuzz test for CPython.
+
+ The only exposed function is LLVMFuzzerTestOneInput, which is called by
+ fuzzers and by the _fuzz module for smoke tests.
+
+ To build exactly one fuzz test, as when running in oss-fuzz etc.,
+ build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
+ LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
+ -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
+
+ See the source code for LLVMFuzzerTestOneInput for details. */
+
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/* Fuzz PyFloat_FromString as a proxy for float(str). */
+static int fuzz_builtin_float(const char* data, size_t size) {
+ PyObject* s = PyBytes_FromStringAndSize(data, size);
+ if (s == NULL) return 0;
+ PyObject* f = PyFloat_FromString(s);
+ if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
+
+ Py_XDECREF(f);
+ Py_DECREF(s);
+ return 0;
+}
+
+/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
+static int fuzz_builtin_int(const char* data, size_t size) {
+ /* Pick a random valid base. (When the fuzzed function takes extra
+ parameters, it's somewhat normal to hash the input to generate those
+ parameters. We want to exercise all code paths, so we do so here.) */
+ int base = _Py_HashBytes(data, size) % 37;
+ if (base == 1) {
+ // 1 is the only number between 0 and 36 that is not a valid base.
+ base = 0;
+ }
+ if (base == -1) {
+ return 0; // An error occurred, bail early.
+ }
+ if (base < 0) {
+ base = -base;
+ }
+
+ PyObject* s = PyUnicode_FromStringAndSize(data, size);
+ if (s == NULL) {
+ if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ }
+ return 0;
+ }
+ PyObject* l = PyLong_FromUnicodeObject(s, base);
+ if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+ PyErr_Clear();
+ }
+ PyErr_Clear();
+ Py_XDECREF(l);
+ Py_DECREF(s);
+ return 0;
+}
+
+/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
+static int fuzz_builtin_unicode(const char* data, size_t size) {
+ PyObject* s = PyUnicode_FromStringAndSize(data, size);
+ if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+ PyErr_Clear();
+ }
+ Py_XDECREF(s);
+ return 0;
+}
+
+/* Run fuzzer and abort on failure. */
+static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
+ int rv = fuzzer((const char*) data, size);
+ if (PyErr_Occurred()) {
+ /* Fuzz tests should handle expected errors for themselves.
+ This is last-ditch check in case they didn't. */
+ PyErr_Print();
+ abort();
+ }
+ /* Someday the return value might mean something, propagate it. */
+ return rv;
+}
+
+/* CPython generates a lot of leak warnings for whatever reason. */
+int __lsan_is_turned_off(void) { return 1; }
+
+/* Fuzz test interface.
+ This returns the bitwise or of all fuzz test's return values.
+
+ All fuzz tests must return 0, as all nonzero return codes are reserved for
+ future use -- we propagate the return values for that future case.
+ (And we bitwise or when running multiple tests to verify that normally we
+ only return 0.) */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ if (!Py_IsInitialized()) {
+ /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
+ with no separate initialization phase, sadly, so we need to
+ initialize CPython ourselves on the first run. */
+ Py_InitializeEx(0);
+ }
+
+ int rv = 0;
+
+#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
+#if _Py_FUZZ_YES(fuzz_builtin_float)
+ rv |= _run_fuzz(data, size, fuzz_builtin_float);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_int)
+ rv |= _run_fuzz(data, size, fuzz_builtin_int);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_unicode)
+ rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#undef _Py_FUZZ_YES
+ return rv;
+}
diff --git a/setup.py b/setup.py
index 36a52bc..3c7c904 100644
--- a/setup.py
+++ b/setup.py
@@ -715,6 +715,12 @@ class PyBuildExt(build_ext):
# syslog daemon interface
exts.append( Extension('syslog', ['syslogmodule.c']) )
+ # Fuzz tests.
+ exts.append( Extension(
+ '_xxtestfuzz',
+ ['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c'])
+ )
+
#
# Here ends the simple stuff. From here on, modules need certain
# libraries, are platform-specific, or present other surprises.