1 files changed, 120 insertions, 0 deletions
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
new file mode 100644
index 0000000..36f721e
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -0,0 +1,120 @@
+/* A fuzz test for CPython.
+
+  The only exposed function is LLVMFuzzerTestOneInput, which is called by
+  fuzzers and by the _fuzz module for smoke tests.
+
+  To build exactly one fuzz test, as when running in oss-fuzz etc.,
+  build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
+  LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
+      -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
+
+  See the source code for LLVMFuzzerTestOneInput for details. */
+
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/*  Fuzz PyFloat_FromString as a proxy for float(str). */
+static int fuzz_builtin_float(const char* data, size_t size) {
+    PyObject* s = PyBytes_FromStringAndSize(data, size);
+    if (s == NULL) return 0;
+    PyObject* f = PyFloat_FromString(s);
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+
+    Py_XDECREF(f);
+    Py_DECREF(s);
+    return 0;
+}
+
+/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
+static int fuzz_builtin_int(const char* data, size_t size) {
+    /* Pick a random valid base. (When the fuzzed function takes extra
+       parameters, it's somewhat normal to hash the input to generate those
+       parameters. We want to exercise all code paths, so we do so here.) */
+    int base = _Py_HashBytes(data, size) % 37;
+    if (base == 1) {
+        // 1 is the only number between 0 and 36 that is not a valid base.
+        base = 0;
+    }
+    if (base == -1) {
+        return 0;  // An error occurred, bail early.
+    }
+    if (base < 0) {
+        base = -base;
+    }
+
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (s == NULL) {
+        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+            PyErr_Clear();
+        }
+        return 0;
+    }
+    PyObject* l = PyLong_FromUnicodeObject(s, base);
+    if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    PyErr_Clear();
+    Py_XDECREF(l);
+    Py_DECREF(s);
+    return 0;
+}
+
+/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
+static int fuzz_builtin_unicode(const char* data, size_t size) {
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        PyErr_Clear();
+    }
+    Py_XDECREF(s);
+    return 0;
+}
+
+/* Run fuzzer and abort on failure. */
+static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
+    int rv = fuzzer((const char*) data, size);
+    if (PyErr_Occurred()) {
+        /* Fuzz tests should handle expected errors for themselves.
+           This is last-ditch check in case they didn't. */
+        PyErr_Print();
+        abort();
+    }
+    /* Someday the return value might mean something, propagate it. */
+    return rv;
+}
+
+/* CPython generates a lot of leak warnings for whatever reason. */
+int __lsan_is_turned_off(void) { return 1; }
+
+/* Fuzz test interface.
+   This returns the bitwise or of all fuzz test's return values.
+
+   All fuzz tests must return 0, as all nonzero return codes are reserved for
+   future use -- we propagate the return values for that future case.
+   (And we bitwise or when running multiple tests to verify that normally we
+   only return 0.) */
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    if (!Py_IsInitialized()) {
+        /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
+           with no separate initialization phase, sadly, so we need to
+           initialize CPython ourselves on the first run. */
+        Py_InitializeEx(0);
+    }
+
+    int rv = 0;
+
+#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
+#if _Py_FUZZ_YES(fuzz_builtin_float)
+    rv |= _run_fuzz(data, size, fuzz_builtin_float);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_int)
+    rv |= _run_fuzz(data, size, fuzz_builtin_int);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_unicode)
+    rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#undef _Py_FUZZ_YES
+  return rv;
+}