diff options
Diffstat (limited to 'Modules/_xxtestfuzz/fuzzer.c')
| -rw-r--r-- | Modules/_xxtestfuzz/fuzzer.c | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c new file mode 100644 index 0000000..36f721e --- /dev/null +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -0,0 +1,120 @@ +/* A fuzz test for CPython. + + The only exposed function is LLVMFuzzerTestOneInput, which is called by + fuzzers and by the _fuzz module for smoke tests. + + To build exactly one fuzz test, as when running in oss-fuzz etc., + build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build + LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with + -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. + + See the source code for LLVMFuzzerTestOneInput for details. */ + +#include <Python.h> +#include <stdlib.h> +#include <inttypes.h> + +/* Fuzz PyFloat_FromString as a proxy for float(str). */ +static int fuzz_builtin_float(const char* data, size_t size) { + PyObject* s = PyBytes_FromStringAndSize(data, size); + if (s == NULL) return 0; + PyObject* f = PyFloat_FromString(s); + if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + + Py_XDECREF(f); + Py_DECREF(s); + return 0; +} + +/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ +static int fuzz_builtin_int(const char* data, size_t size) { + /* Pick a random valid base. (When the fuzzed function takes extra + parameters, it's somewhat normal to hash the input to generate those + parameters. We want to exercise all code paths, so we do so here.) */ + int base = _Py_HashBytes(data, size) % 37; + if (base == 1) { + // 1 is the only number between 0 and 36 that is not a valid base. + base = 0; + } + if (base == -1) { + return 0; // An error occurred, bail early. + } + if (base < 0) { + base = -base; + } + + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL) { + if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + return 0; + } + PyObject* l = PyLong_FromUnicodeObject(s, base); + if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { + PyErr_Clear(); + } + PyErr_Clear(); + Py_XDECREF(l); + Py_DECREF(s); + return 0; +} + +/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ +static int fuzz_builtin_unicode(const char* data, size_t size) { + PyObject* s = PyUnicode_FromStringAndSize(data, size); + if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + PyErr_Clear(); + } + Py_XDECREF(s); + return 0; +} + +/* Run fuzzer and abort on failure. */ +static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { + int rv = fuzzer((const char*) data, size); + if (PyErr_Occurred()) { + /* Fuzz tests should handle expected errors for themselves. + This is last-ditch check in case they didn't. */ + PyErr_Print(); + abort(); + } + /* Someday the return value might mean something, propagate it. */ + return rv; +} + +/* CPython generates a lot of leak warnings for whatever reason. */ +int __lsan_is_turned_off(void) { return 1; } + +/* Fuzz test interface. + This returns the bitwise or of all fuzz test's return values. + + All fuzz tests must return 0, as all nonzero return codes are reserved for + future use -- we propagate the return values for that future case. + (And we bitwise or when running multiple tests to verify that normally we + only return 0.) */ +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + if (!Py_IsInitialized()) { + /* LLVMFuzzerTestOneInput is called repeatedly from the same process, + with no separate initialization phase, sadly, so we need to + initialize CPython ourselves on the first run. */ + Py_InitializeEx(0); + } + + int rv = 0; + +#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE)) +#if _Py_FUZZ_YES(fuzz_builtin_float) + rv |= _run_fuzz(data, size, fuzz_builtin_float); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_int) + rv |= _run_fuzz(data, size, fuzz_builtin_int); +#endif +#if _Py_FUZZ_YES(fuzz_builtin_unicode) + rv |= _run_fuzz(data, size, fuzz_builtin_unicode); +#endif +#undef _Py_FUZZ_YES + return rv; +} |
