diff options
-rw-r--r-- | Lib/test/test_xxtestfuzz.py | 23 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst | 1 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/README.rst | 46 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/_xxtestfuzz.c | 53 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_tests.txt | 3 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzzer.c | 120 | ||||
-rw-r--r-- | setup.py | 6 |
7 files changed, 252 insertions, 0 deletions
diff --git a/Lib/test/test_xxtestfuzz.py b/Lib/test/test_xxtestfuzz.py new file mode 100644 index 0000000..532f5fe --- /dev/null +++ b/Lib/test/test_xxtestfuzz.py @@ -0,0 +1,23 @@ +import faulthandler +import test.support +import unittest + +_xxtestfuzz = test.support.import_module('_xxtestfuzz') + + +class TestFuzzer(unittest.TestCase): + """To keep our https://github.com/google/oss-fuzz API working.""" + + def test_sample_input_smoke_test(self): + """This is only a regression test: Check that it doesn't crash.""" + _xxtestfuzz.run(b"") + _xxtestfuzz.run(b"\0") + _xxtestfuzz.run(b"{") + _xxtestfuzz.run(b" ") + _xxtestfuzz.run(b"x") + _xxtestfuzz.run(b"1") + + +if __name__ == "__main__": + faulthandler.enable() + unittest.main() diff --git a/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst new file mode 100644 index 0000000..9a0fb16 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2017-08-23-17-02-55.bpo-29505.BL6Yt8.rst @@ -0,0 +1 @@ +Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz. diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst new file mode 100644 index 0000000..b48f3c8 --- /dev/null +++ b/Modules/_xxtestfuzz/README.rst @@ -0,0 +1,46 @@ +Fuzz Tests for CPython +====================== + +These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. + +oss-fuzz works against a library exposing a function of the form +``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide +that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with +some toy values -- no fuzzing occurs in Python's test suite. + +oss-fuzz will regularly pull from CPython, discover all the tests in +``fuzz_tests.txt``, and run them -- so adding a new test here means it will +automatically be run in oss-fuzz, while also being smoke-tested as part of +CPython's test suite. + +Adding a new fuzz test +---------------------- + +Add the test name on a new line in ``fuzz_tests.txt``. + +In ``fuzzer.c``, add a function to be run:: + + int $test_name (const char* data, size_t size) { + ... + return 0; + } + + +And invoke it from ``LLVMFuzzerTestOneInput``:: + + #if _Py_FUZZ_YES(fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); + #endif + +``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in +``fuzz_tests.txt`` run separately. + +What makes a good fuzz test +--------------------------- + +Libraries written in C that might handle untrusted data are worthwhile. The +more complex the logic (e.g. parsing), the more likely this is to be a useful +fuzz test. See the existing examples for reference, and refer to the +`oss-fuzz`_ docs. + +.. _oss-fuzz: https://github.com/google/oss-fuzz diff --git a/Modules/_xxtestfuzz/_xxtestfuzz.c b/Modules/_xxtestfuzz/_xxtestfuzz.c new file mode 100644 index 0000000..781dd23 --- /dev/null +++ b/Modules/_xxtestfuzz/_xxtestfuzz.c @@ -0,0 +1,53 @@ +#define PY_SSIZE_T_CLEAN +#include <Python.h> +#include <stdlib.h> +#include <inttypes.h> + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); + +static PyObject* _fuzz_run(PyObject* self, PyObject* args) { + const char* buf; + Py_ssize_t size; + if (!PyArg_ParseTuple(args, "s#", &buf, &size)) { + return NULL; + } + int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size); + if (PyErr_Occurred()) { + return NULL; + } + if (rv != 0) { + // Nonzero return codes are reserved for future use. + PyErr_Format( + PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv); + return NULL; + } + Py_RETURN_NONE; +} + +static PyMethodDef module_methods[] = { + {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""}, + {NULL}, +}; + +static struct PyModuleDef _fuzzmodule = { + PyModuleDef_HEAD_INIT, + "_fuzz", + NULL, + 0, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__xxtestfuzz(void) +{ + PyObject *m = NULL; + + if ((m = PyModule_Create(&_fuzzmodule)) == NULL) { + return NULL; + } + return m; +} diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt new file mode 100644 index 0000000..2e53bfd --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_tests.txt @@ -0,0 +1,3 @@ +fuzz_builtin_float +fuzz_builtin_int +fuzz_builtin_unicode diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c new file mode 100644 index 0000000..36f721e --- /dev/null +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -0,0 +1,120 @@ +/* A fuzz test for CPython. + + The only exposed function is LLVMFuzzerTestOneInput, which is called by + fuzzers and by the _fuzz module for smoke tests. + + To build exactly one fuzz test, as when running in oss-fuzz etc., + build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build + LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with + -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. + + See the source code for LLVMFuzzerTestOneInput for details. */ + +#include <Python.h> +#include <stdlib.h> +#include <inttypes.h> + +/* Fuzz PyFloat_FromString as a proxy for float(str). */ +static int fuzz_builtin_float(const char* data, size_t size) { + PyObject* s = PyBytes_FromStringAndSize(data, size); + if (s == NULL) return 0; + PyObject* f = PyFloat_FromString(s); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + + Py_XDECREF(f); + Py_DECREF(s); + return 0; +} + +/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ +static int fuzz_builtin_int(const char* data, size_t size) { + /* Pick a random valid base. (When the fuzzed function takes extra + parameters, it's somewhat normal to hash the input to generate those + parameters. We want to exercise all code paths, so we do so here.) */ + int base = _Py_HashBytes(data, size) % 37; + if (base == 1) { + // 1 is the only number between 0 and 36 that is not a valid base. + base = 0; + } + if (base == -1) { + return 0; // An error occurred, bail early. + } + if (base < 0) { + base = -base; + } + + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + return 0; + } + PyObject* l = PyLong_FromUnicodeObject(s, base); + if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + PyErr_Clear(); + Py_XDECREF(l); + Py_DECREF(s); + return 0; +} + +/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ +static int fuzz_builtin_unicode(const char* data, size_t size) { + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + Py_XDECREF(s); + return 0; +} + +/* Run fuzzer and abort on failure. */ +static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { + int rv = fuzzer((const char*) data, size); + if (PyErr_Occurred()) { + /* Fuzz tests should handle expected errors for themselves. + This is last-ditch check in case they didn't. */ + PyErr_Print(); + abort(); + } + /* Someday the return value might mean something, propagate it. */ + return rv; +} + +/* CPython generates a lot of leak warnings for whatever reason. */ +int __lsan_is_turned_off(void) { return 1; } + +/* Fuzz test interface. + This returns the bitwise or of all fuzz test's return values. + + All fuzz tests must return 0, as all nonzero return codes are reserved for + future use -- we propagate the return values for that future case. + (And we bitwise or when running multiple tests to verify that normally we + only return 0.) */ +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (!Py_IsInitialized()) { + /* LLVMFuzzerTestOneInput is called repeatedly from the same process, + with no separate initialization phase, sadly, so we need to + initialize CPython ourselves on the first run. */ + Py_InitializeEx(0); + } + + int rv = 0; + +#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE)) +#if _Py_FUZZ_YES(fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_int) + rv |= _run_fuzz(data, size, fuzz_builtin_int); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_unicode) + rv |= _run_fuzz(data, size, fuzz_builtin_unicode); +#endif +#undef _Py_FUZZ_YES + return rv; +} @@ -715,6 +715,12 @@ class PyBuildExt(build_ext): # syslog daemon interface exts.append( Extension('syslog', ['syslogmodule.c']) ) + # Fuzz tests. + exts.append( Extension( + '_xxtestfuzz', + ['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c']) + ) + # # Here ends the simple stuff. From here on, modules need certain # libraries, are platform-specific, or present other surprises. |