summaryrefslogtreecommitdiffstats
path: root/Modules/_xxtestfuzz
diff options
context:
space:
mode:
Diffstat (limited to 'Modules/_xxtestfuzz')
-rw-r--r--Modules/_xxtestfuzz/README.rst56
-rw-r--r--Modules/_xxtestfuzz/_xxtestfuzz.c48
-rw-r--r--Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict40
-rw-r--r--Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict219
-rw-r--r--Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csvbin118 -> 0 bytes
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json1
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json1
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json58
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json1
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json6
-rw-r--r--Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json1
-rw-r--r--Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links1
-rw-r--r--Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters1
-rw-r--r--Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn1
-rw-r--r--Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number1
-rw-r--r--Modules/_xxtestfuzz/fuzz_tests.txt7
-rw-r--r--Modules/_xxtestfuzz/fuzzer.c426
17 files changed, 0 insertions, 868 deletions
diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst
deleted file mode 100644
index 42bd02a..0000000
--- a/Modules/_xxtestfuzz/README.rst
+++ /dev/null
@@ -1,56 +0,0 @@
-Fuzz Tests for CPython
-======================
-
-These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
-
-oss-fuzz works against a library exposing a function of the form
-``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
-that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
-some toy values -- no fuzzing occurs in Python's test suite.
-
-oss-fuzz will regularly pull from CPython, discover all the tests in
-``fuzz_tests.txt``, and run them -- so adding a new test here means it will
-automatically be run in oss-fuzz, while also being smoke-tested as part of
-CPython's test suite.
-
-Adding a new fuzz test
-----------------------
-
-Add the test name on a new line in ``fuzz_tests.txt``.
-
-In ``fuzzer.c``, add a function to be run::
-
- int $test_name (const char* data, size_t size) {
- ...
- return 0;
- }
-
-
-And invoke it from ``LLVMFuzzerTestOneInput``::
-
- #if _Py_FUZZ_YES(fuzz_builtin_float)
- rv |= _run_fuzz(data, size, fuzz_builtin_float);
- #endif
-
-``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
-``fuzz_tests.txt`` run separately.
-
-Seed data (corpus) for the test can be provided in a subfolder called
-``<test_name>_corpus`` such as ``fuzz_json_loads_corpus``. A wide variety
-of good input samples allows the fuzzer to more easily explore a diverse
-set of paths and provides a better base to find buggy input from.
-
-Dictionaries of tokens (see oss-fuzz documentation for more details) can
-be placed in the ``dictionaries`` folder with the name of the test.
-For example, ``dictionaries/fuzz_json_loads.dict`` contains JSON tokens
-to guide the fuzzer.
-
-What makes a good fuzz test
----------------------------
-
-Libraries written in C that might handle untrusted data are worthwhile. The
-more complex the logic (e.g. parsing), the more likely this is to be a useful
-fuzz test. See the existing examples for reference, and refer to the
-`oss-fuzz`_ docs.
-
-.. _oss-fuzz: https://github.com/google/oss-fuzz
diff --git a/Modules/_xxtestfuzz/_xxtestfuzz.c b/Modules/_xxtestfuzz/_xxtestfuzz.c
deleted file mode 100644
index e0694de..0000000
--- a/Modules/_xxtestfuzz/_xxtestfuzz.c
+++ /dev/null
@@ -1,48 +0,0 @@
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
-
-static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
- const char* buf;
- Py_ssize_t size;
- if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
- return NULL;
- }
- int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
- if (PyErr_Occurred()) {
- return NULL;
- }
- if (rv != 0) {
- // Nonzero return codes are reserved for future use.
- PyErr_Format(
- PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
- return NULL;
- }
- Py_RETURN_NONE;
-}
-
-static PyMethodDef module_methods[] = {
- {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
- {NULL},
-};
-
-static struct PyModuleDef _fuzzmodule = {
- PyModuleDef_HEAD_INIT,
- "_fuzz",
- NULL,
- 0,
- module_methods,
- NULL,
- NULL,
- NULL,
- NULL
-};
-
-PyMODINIT_FUNC
-PyInit__xxtestfuzz(void)
-{
- return PyModule_Create(&_fuzzmodule);
-}
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
deleted file mode 100644
index ad64917..0000000
--- a/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
+++ /dev/null
@@ -1,40 +0,0 @@
-"0"
-",0"
-":0"
-"0:"
-"-1.2e+3"
-
-"true"
-"false"
-"null"
-
-"\"\""
-",\"\""
-":\"\""
-"\"\":"
-
-"{}"
-",{}"
-":{}"
-"{\"\":0}"
-"{{}}"
-
-"[]"
-",[]"
-":[]"
-"[0]"
-"[[]]"
-
-"''"
-"\\"
-"\\b"
-"\\f"
-"\\n"
-"\\r"
-"\\t"
-"\\u0000"
-"\\x00"
-"\\0"
-"\\uD800\\uDC00"
-"\\uDBFF\\uDFFF"
-
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
deleted file mode 100644
index 961306a..0000000
--- a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict
+++ /dev/null
@@ -1,219 +0,0 @@
-"?"
-"abc"
-"()"
-"[]"
-"abc|def"
-"abc|def|ghi"
-"^xxx$"
-"ab\\b\\d\\bcd"
-"\\w|\\d"
-"a*?"
-"abc+"
-"abc+?"
-"xyz?"
-"xyz??"
-"xyz{0,1}"
-"xyz{0,1}?"
-"xyz{93}"
-"xyz{1,32}"
-"xyz{1,32}?"
-"xyz{1,}"
-"xyz{1,}?"
-"a\\fb\\nc\\rd\\te\\vf"
-"a\\nb\\bc"
-"(?:foo)"
-"(?: foo )"
-"foo|(bar|baz)|quux"
-"foo(?=bar)baz"
-"foo(?!bar)baz"
-"foo(?<=bar)baz"
-"foo(?<!bar)baz"
-"()"
-"(?=)"
-"[]"
-"[x]"
-"[xyz]"
-"[a-zA-Z0-9]"
-"[-123]"
-"[^123]"
-"]"
-"}"
-"[a-b-c]"
-"[x\\dz]"
-"[\\d-z]"
-"[\\d-\\d]"
-"[z-\\d]"
-"\\cj\\cJ\\ci\\cI\\ck\\cK"
-"\\c!"
-"\\c_"
-"\\c~"
-"[\\c!]"
-"[\\c_]"
-"[\\c~]"
-"[\\ca]"
-"[\\cz]"
-"[\\cA]"
-"[\\cZ]"
-"[\\c1]"
-"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ "
-"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]"
-"\\8"
-"\\9"
-"\\11"
-"\\11a"
-"\\011"
-"\\118"
-"\\111"
-"\\1111"
-"(x)(x)(x)\\1"
-"(x)(x)(x)\\2"
-"(x)(x)(x)\\3"
-"(x)(x)(x)\\4"
-"(x)(x)(x)\\1*"
-"(x)(x)(x)\\3*"
-"(x)(x)(x)\\4*"
-"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10"
-"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11"
-"(a)\\1"
-"(a\\1)"
-"(\\1a)"
-"(\\2)(\\1)"
-"(?=a){0,10}a"
-"(?=a){1,10}a"
-"(?=a){9,10}a"
-"(?!a)?a"
-"\\1(a)"
-"(?!(a))\\1"
-"(?!\\1(a\\1)\\1)\\1"
-"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1"
-"[\\0]"
-"[\\11]"
-"[\\11a]"
-"[\\011]"
-"[\\00011]"
-"[\\118]"
-"[\\111]"
-"[\\1111]"
-"\\x60"
-"\\x3z"
-"\\c"
-"\\u0034"
-"\\u003z"
-"foo[z]*"
-"\\u{12345}"
-"\\u{12345}\\u{23456}"
-"\\u{12345}{3}"
-"\\u{12345}*"
-"\\ud808\\udf45*"
-"[\\ud808\\udf45-\\ud809\\udccc]"
-"a"
-"a|b"
-"a\\n"
-"a$"
-"a\\b!"
-"a\\Bb"
-"a*?"
-"a?"
-"a??"
-"a{0,1}?"
-"a{1,2}?"
-"a+?"
-"(a)"
-"(a)\\1"
-"(\\1a)"
-"\\1(a)"
-"a\\s"
-"a\\S"
-"a\\D"
-"a\\w"
-"a\\W"
-"a."
-"a\\q"
-"a[a]"
-"a[^a]"
-"a[a-z]"
-"a(?:b)"
-"a(?=b)"
-"a(?!b)"
-"\\x60"
-"\\u0060"
-"\\cA"
-"\\q"
-"\\1112"
-"(a)\\1"
-"(?!a)?a\\1"
-"(?:(?=a))a\\1"
-"a{}"
-"a{,}"
-"a{"
-"a{z}"
-"a{12z}"
-"a{12,"
-"a{12,3b"
-"{}"
-"{,}"
-"{"
-"{z}"
-"{1z}"
-"{12,"
-"{12,3b"
-"a"
-"abc"
-"a[bc]d"
-"a|bc"
-"ab|c"
-"a||bc"
-"(?:ab)"
-"(?:ab|cde)"
-"(?:ab)|cde"
-"(ab)"
-"(ab|cde)"
-"(ab)\\1"
-"(ab|cde)\\1"
-"(?:ab)?"
-"(?:ab)+"
-"a?"
-"a+"
-"a??"
-"a*?"
-"a+?"
-"(?:a?)?"
-"(?:a+)?"
-"(?:a?)+"
-"(?:a*)+"
-"(?:a+)+"
-"(?:a?)*"
-"(?:a*)*"
-"(?:a+)*"
-"a{0}"
-"(?:a+){0,0}"
-"a*b"
-"a+b"
-"a*b|c"
-"a+b|c"
-"(?:a{5,1000000}){3,1000000}"
-"(?:ab){4,7}"
-"a\\bc"
-"a\\sc"
-"a\\Sc"
-"a(?=b)c"
-"a(?=bbb|bb)c"
-"a(?!bbb|bb)c"
-"\xe2\x81\xa3"
-"[\xe2\x81\xa3]"
-"\xed\xb0\x80"
-"\xed\xa0\x80"
-"(\xed\xb0\x80)\x01"
-"((\xed\xa0\x80))\x02"
-"\xf0\x9f\x92\xa9"
-"\x01"
-"\x0f"
-"[-\xf0\x9f\x92\xa9]+"
-"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]"
-"(?<=)"
-"(?<=a)"
-"(?<!)"
-"(?<!a)"
-"(?<a>)"
-"(?<a>.)"
-"(?<a>.)\\k<a>"
diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
deleted file mode 100644
index 8b7887d..0000000
--- a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv
+++ /dev/null
Binary files differ
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json
deleted file mode 100644
index fe51488..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json
+++ /dev/null
@@ -1 +0,0 @@
-[]
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json
deleted file mode 100644
index 0967ef4..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json
+++ /dev/null
@@ -1 +0,0 @@
-{}
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
deleted file mode 100644
index 70e2685..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
+++ /dev/null
@@ -1,58 +0,0 @@
-[
- "JSON Test Pattern pass1",
- {"object with 1 member":["array with 1 element"]},
- {},
- [],
- -42,
- true,
- false,
- null,
- {
- "integer": 1234567890,
- "real": -9876.543210,
- "e": 0.123456789e-12,
- "E": 1.234567890E+34,
- "": 23456789012E66,
- "zero": 0,
- "one": 1,
- "space": " ",
- "quote": "\"",
- "backslash": "\\",
- "controls": "\b\f\n\r\t",
- "slash": "/ & \/",
- "alpha": "abcdefghijklmnopqrstuvwyz",
- "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
- "digit": "0123456789",
- "0123456789": "digit",
- "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
- "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
- "true": true,
- "false": false,
- "null": null,
- "array":[ ],
- "object":{ },
- "address": "50 St. James Street",
- "url": "http://www.JSON.org/",
- "comment": "// /* <!-- --",
- "# -- --> */": " ",
- " s p a c e d " :[1,2 , 3
-
-,
-
-4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7],
- "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
- "quotes": "&#34; \u0022 %22 0x22 034 &#x22;",
- "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
-: "A key can be any string"
- },
- 0.5 ,98.6
-,
-99.44
-,
-
-1066,
-1e1,
-0.1e1,
-1e-1,
-1e00,2e+00,2e-00
-,"rosebud"] \ No newline at end of file
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
deleted file mode 100644
index d3c63c7..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
+++ /dev/null
@@ -1 +0,0 @@
-[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] \ No newline at end of file
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
deleted file mode 100644
index 4528d51..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
- "JSON Test Pattern pass3": {
- "The outermost value": "must be an object or array.",
- "In this test": "It is an object."
- }
-}
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json
deleted file mode 100644
index ce1e6ec..0000000
--- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json
+++ /dev/null
@@ -1 +0,0 @@
-[1, 2, 3, "abcd", "xyz"]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
deleted file mode 100644
index d99247c..0000000
--- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links
+++ /dev/null
@@ -1 +0,0 @@
-XX<a\s*href=(.*?)[\s|>]
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
deleted file mode 100644
index 0c67ee7..0000000
--- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters
+++ /dev/null
@@ -1 +0,0 @@
-XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
deleted file mode 100644
index cce8919..0000000
--- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn
+++ /dev/null
@@ -1 +0,0 @@
-XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/
diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
deleted file mode 100644
index 1e2efc5..0000000
--- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number
+++ /dev/null
@@ -1 +0,0 @@
-XX(\+1|1)?[ \-\.]?\(?(?<areacode>[0-9]{3})\)?[ \-\.]?(?<prefix>[0-9]{3})[ \-\.]?(?<number>[0-9]{4})[ \.]*(ext|x)?[ \.]*(?<extension>[0-9]{0,5})
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
deleted file mode 100644
index 9d330a6..0000000
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-fuzz_builtin_float
-fuzz_builtin_int
-fuzz_builtin_unicode
-fuzz_json_loads
-fuzz_sre_compile
-fuzz_sre_match
-fuzz_csv_reader
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
deleted file mode 100644
index dae1eae..0000000
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/* A fuzz test for CPython.
-
- The only exposed function is LLVMFuzzerTestOneInput, which is called by
- fuzzers and by the _fuzz module for smoke tests.
-
- To build exactly one fuzz test, as when running in oss-fuzz etc.,
- build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
- LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
- -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
-
- See the source code for LLVMFuzzerTestOneInput for details. */
-
-#include <Python.h>
-#include <stdlib.h>
-#include <inttypes.h>
-
-/* Fuzz PyFloat_FromString as a proxy for float(str). */
-static int fuzz_builtin_float(const char* data, size_t size) {
- PyObject* s = PyBytes_FromStringAndSize(data, size);
- if (s == NULL) return 0;
- PyObject* f = PyFloat_FromString(s);
- if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
- PyErr_Clear();
- }
-
- Py_XDECREF(f);
- Py_DECREF(s);
- return 0;
-}
-
-#define MAX_INT_TEST_SIZE 0x10000
-
-/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
-static int fuzz_builtin_int(const char* data, size_t size) {
- /* Ignore test cases with very long ints to avoid timeouts
- int("9" * 1000000) is not a very interesting test caase */
- if (size > MAX_INT_TEST_SIZE) {
- return 0;
- }
- /* Pick a random valid base. (When the fuzzed function takes extra
- parameters, it's somewhat normal to hash the input to generate those
- parameters. We want to exercise all code paths, so we do so here.) */
- int base = _Py_HashBytes(data, size) % 37;
- if (base == 1) {
- // 1 is the only number between 0 and 36 that is not a valid base.
- base = 0;
- }
- if (base == -1) {
- return 0; // An error occurred, bail early.
- }
- if (base < 0) {
- base = -base;
- }
-
- PyObject* s = PyUnicode_FromStringAndSize(data, size);
- if (s == NULL) {
- if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- PyErr_Clear();
- }
- return 0;
- }
- PyObject* l = PyLong_FromUnicodeObject(s, base);
- if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
- PyErr_Clear();
- }
- PyErr_Clear();
- Py_XDECREF(l);
- Py_DECREF(s);
- return 0;
-}
-
-/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
-static int fuzz_builtin_unicode(const char* data, size_t size) {
- PyObject* s = PyUnicode_FromStringAndSize(data, size);
- if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
- PyErr_Clear();
- }
- Py_XDECREF(s);
- return 0;
-}
-
-#define MAX_JSON_TEST_SIZE 0x10000
-
-PyObject* json_loads_method = NULL;
-/* Called by LLVMFuzzerTestOneInput for initialization */
-static int init_json_loads() {
- /* Import json.loads */
- PyObject* json_module = PyImport_ImportModule("json");
- if (json_module == NULL) {
- return 0;
- }
- json_loads_method = PyObject_GetAttrString(json_module, "loads");
- return json_loads_method != NULL;
-}
-/* Fuzz json.loads(x) */
-static int fuzz_json_loads(const char* data, size_t size) {
- /* Since python supports arbitrarily large ints in JSON,
- long inputs can lead to timeouts on boring inputs like
- `json.loads("9" * 100000)` */
- if (size > MAX_JSON_TEST_SIZE) {
- return 0;
- }
- PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
- if (input_bytes == NULL) {
- return 0;
- }
- PyObject* parsed = _PyObject_CallOneArg(json_loads_method, input_bytes);
- if (parsed == NULL) {
- /* Ignore ValueError as the fuzzer will more than likely
- generate some invalid json and values */
- if (PyErr_ExceptionMatches(PyExc_ValueError) ||
- /* Ignore RecursionError as the fuzzer generates long sequences of
- arrays such as `[[[...` */
- PyErr_ExceptionMatches(PyExc_RecursionError) ||
- /* Ignore unicode errors, invalid byte sequences are common */
- PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)
- ) {
- PyErr_Clear();
- }
- }
- Py_DECREF(input_bytes);
- Py_XDECREF(parsed);
- return 0;
-}
-
-#define MAX_RE_TEST_SIZE 0x10000
-
-PyObject* sre_compile_method = NULL;
-PyObject* sre_error_exception = NULL;
-int SRE_FLAG_DEBUG = 0;
-/* Called by LLVMFuzzerTestOneInput for initialization */
-static int init_sre_compile() {
- /* Import sre_compile.compile and sre.error */
- PyObject* sre_compile_module = PyImport_ImportModule("sre_compile");
- if (sre_compile_module == NULL) {
- return 0;
- }
- sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile");
- if (sre_compile_method == NULL) {
- return 0;
- }
-
- PyObject* sre_constants = PyImport_ImportModule("sre_constants");
- if (sre_constants == NULL) {
- return 0;
- }
- sre_error_exception = PyObject_GetAttrString(sre_constants, "error");
- if (sre_error_exception == NULL) {
- return 0;
- }
- PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG");
- if (debug_flag == NULL) {
- return 0;
- }
- SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag);
- return 1;
-}
-/* Fuzz _sre.compile(x) */
-static int fuzz_sre_compile(const char* data, size_t size) {
- /* Ignore really long regex patterns that will timeout the fuzzer */
- if (size > MAX_RE_TEST_SIZE) {
- return 0;
- }
- /* We treat the first 2 bytes of the input as a number for the flags */
- if (size < 2) {
- return 0;
- }
- uint16_t flags = ((uint16_t*) data)[0];
- /* We remove the SRE_FLAG_DEBUG if present. This is because it
- prints to stdout which greatly decreases fuzzing speed */
- flags &= ~SRE_FLAG_DEBUG;
-
- /* Pull the pattern from the remaining bytes */
- PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2);
- if (pattern_bytes == NULL) {
- return 0;
- }
- PyObject* flags_obj = PyLong_FromUnsignedLong(flags);
- if (flags_obj == NULL) {
- Py_DECREF(pattern_bytes);
- return 0;
- }
-
- /* compiled = _sre.compile(data[2:], data[0:2] */
- PyObject* compiled = PyObject_CallFunctionObjArgs(
- sre_compile_method, pattern_bytes, flags_obj, NULL);
- /* Ignore ValueError as the fuzzer will more than likely
- generate some invalid combination of flags */
- if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
- PyErr_Clear();
- }
- /* Ignore some common errors thrown by sre_parse:
- Overflow, Assertion and Index */
- if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) ||
- PyErr_ExceptionMatches(PyExc_AssertionError) ||
- PyErr_ExceptionMatches(PyExc_IndexError))
- ) {
- PyErr_Clear();
- }
- /* Ignore re.error */
- if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) {
- PyErr_Clear();
- }
-
- Py_DECREF(pattern_bytes);
- Py_DECREF(flags_obj);
- Py_XDECREF(compiled);
- return 0;
-}
-
-/* Some random patterns used to test re.match.
- Be careful not to add catostraphically slow regexes here, we want to
- exercise the matching code without causing timeouts.*/
-static const char* regex_patterns[] = {
- ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]",
- "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?",
- "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$",
- "(?:a*)*", "a{1,2}?"
-};
-const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]);
-PyObject** compiled_patterns = NULL;
-/* Called by LLVMFuzzerTestOneInput for initialization */
-static int init_sre_match() {
- PyObject* re_module = PyImport_ImportModule("re");
- if (re_module == NULL) {
- return 0;
- }
- compiled_patterns = (PyObject**) PyMem_RawMalloc(
- sizeof(PyObject*) * NUM_PATTERNS);
- if (compiled_patterns == NULL) {
- PyErr_NoMemory();
- return 0;
- }
-
- /* Precompile all the regex patterns on the first run for faster fuzzing */
- for (size_t i = 0; i < NUM_PATTERNS; i++) {
- PyObject* compiled = PyObject_CallMethod(
- re_module, "compile", "y", regex_patterns[i]);
- /* Bail if any of the patterns fail to compile */
- if (compiled == NULL) {
- return 0;
- }
- compiled_patterns[i] = compiled;
- }
- return 1;
-}
-/* Fuzz re.match(x) */
-static int fuzz_sre_match(const char* data, size_t size) {
- if (size < 1 || size > MAX_RE_TEST_SIZE) {
- return 0;
- }
- /* Use the first byte as a uint8_t specifying the index of the
- regex to use */
- unsigned char idx = (unsigned char) data[0];
- idx = idx % NUM_PATTERNS;
-
- /* Pull the string to match from the remaining bytes */
- PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1);
- if (to_match == NULL) {
- return 0;
- }
-
- PyObject* pattern = compiled_patterns[idx];
- PyObject* match_callable = PyObject_GetAttrString(pattern, "match");
-
- PyObject* matches = _PyObject_CallOneArg(match_callable, to_match);
-
- Py_XDECREF(matches);
- Py_DECREF(match_callable);
- Py_DECREF(to_match);
- return 0;
-}
-
-#define MAX_CSV_TEST_SIZE 0x10000
-PyObject* csv_module = NULL;
-PyObject* csv_error = NULL;
-/* Called by LLVMFuzzerTestOneInput for initialization */
-static int init_csv_reader() {
- /* Import csv and csv.Error */
- csv_module = PyImport_ImportModule("csv");
- if (csv_module == NULL) {
- return 0;
- }
- csv_error = PyObject_GetAttrString(csv_module, "Error");
- return csv_error != NULL;
-}
-/* Fuzz csv.reader([x]) */
-static int fuzz_csv_reader(const char* data, size_t size) {
- if (size < 1 || size > MAX_CSV_TEST_SIZE) {
- return 0;
- }
- /* Ignore non null-terminated strings since _csv can't handle
- embeded nulls */
- if (memchr(data, '\0', size) == NULL) {
- return 0;
- }
-
- PyObject* s = PyUnicode_FromString(data);
- /* Ignore exceptions until we have a valid string */
- if (s == NULL) {
- PyErr_Clear();
- return 0;
- }
-
- /* Split on \n so we can test multiple lines */
- PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n");
- if (lines == NULL) {
- Py_DECREF(s);
- return 0;
- }
-
- PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines);
- if (reader) {
- /* Consume all of the reader as an iterator */
- PyObject* parsed_line;
- while ((parsed_line = PyIter_Next(reader))) {
- Py_DECREF(parsed_line);
- }
- }
-
- /* Ignore csv.Error because we're probably going to generate
- some bad files (embeded new-lines, unterminated quotes etc) */
- if (PyErr_ExceptionMatches(csv_error)) {
- PyErr_Clear();
- }
-
- Py_XDECREF(reader);
- Py_DECREF(s);
- return 0;
-}
-
-/* Run fuzzer and abort on failure. */
-static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
- int rv = fuzzer((const char*) data, size);
- if (PyErr_Occurred()) {
- /* Fuzz tests should handle expected errors for themselves.
- This is last-ditch check in case they didn't. */
- PyErr_Print();
- abort();
- }
- /* Someday the return value might mean something, propagate it. */
- return rv;
-}
-
-/* CPython generates a lot of leak warnings for whatever reason. */
-int __lsan_is_turned_off(void) { return 1; }
-
-
-int LLVMFuzzerInitialize(int *argc, char ***argv) {
- wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL);
- Py_SetProgramName(wide_program_name);
- return 0;
-}
-
-/* Fuzz test interface.
- This returns the bitwise or of all fuzz test's return values.
-
- All fuzz tests must return 0, as all nonzero return codes are reserved for
- future use -- we propagate the return values for that future case.
- (And we bitwise or when running multiple tests to verify that normally we
- only return 0.) */
-int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
- if (!Py_IsInitialized()) {
- /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
- with no separate initialization phase, sadly, so we need to
- initialize CPython ourselves on the first run. */
- Py_InitializeEx(0);
- }
-
- int rv = 0;
-
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
- rv |= _run_fuzz(data, size, fuzz_builtin_float);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
- rv |= _run_fuzz(data, size, fuzz_builtin_int);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
- rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
- static int JSON_LOADS_INITIALIZED = 0;
- if (!JSON_LOADS_INITIALIZED && !init_json_loads()) {
- PyErr_Print();
- abort();
- } else {
- JSON_LOADS_INITIALIZED = 1;
- }
-
- rv |= _run_fuzz(data, size, fuzz_json_loads);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile)
- static int SRE_COMPILE_INITIALIZED = 0;
- if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) {
- PyErr_Print();
- abort();
- } else {
- SRE_COMPILE_INITIALIZED = 1;
- }
-
- rv |= _run_fuzz(data, size, fuzz_sre_compile);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match)
- static int SRE_MATCH_INITIALIZED = 0;
- if (!SRE_MATCH_INITIALIZED && !init_sre_match()) {
- PyErr_Print();
- abort();
- } else {
- SRE_MATCH_INITIALIZED = 1;
- }
-
- rv |= _run_fuzz(data, size, fuzz_sre_match);
-#endif
-#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader)
- static int CSV_READER_INITIALIZED = 0;
- if (!CSV_READER_INITIALIZED && !init_csv_reader()) {
- PyErr_Print();
- abort();
- } else {
- CSV_READER_INITIALIZED = 1;
- }
-
- rv |= _run_fuzz(data, size, fuzz_csv_reader);
-#endif
- return rv;
-}