diff options
Diffstat (limited to 'Modules/_xxtestfuzz')
17 files changed, 0 insertions, 868 deletions
diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst deleted file mode 100644 index 42bd02a..0000000 --- a/Modules/_xxtestfuzz/README.rst +++ /dev/null @@ -1,56 +0,0 @@ -Fuzz Tests for CPython -====================== - -These fuzz tests are designed to be included in Google's `oss-fuzz`_ project. - -oss-fuzz works against a library exposing a function of the form -``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide -that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with -some toy values -- no fuzzing occurs in Python's test suite. - -oss-fuzz will regularly pull from CPython, discover all the tests in -``fuzz_tests.txt``, and run them -- so adding a new test here means it will -automatically be run in oss-fuzz, while also being smoke-tested as part of -CPython's test suite. - -Adding a new fuzz test ----------------------- - -Add the test name on a new line in ``fuzz_tests.txt``. - -In ``fuzzer.c``, add a function to be run:: - - int $test_name (const char* data, size_t size) { - ... - return 0; - } - - -And invoke it from ``LLVMFuzzerTestOneInput``:: - - #if _Py_FUZZ_YES(fuzz_builtin_float) - rv |= _run_fuzz(data, size, fuzz_builtin_float); - #endif - -``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in -``fuzz_tests.txt`` run separately. - -Seed data (corpus) for the test can be provided in a subfolder called -``<test_name>_corpus`` such as ``fuzz_json_loads_corpus``. A wide variety -of good input samples allows the fuzzer to more easily explore a diverse -set of paths and provides a better base to find buggy input from. - -Dictionaries of tokens (see oss-fuzz documentation for more details) can -be placed in the ``dictionaries`` folder with the name of the test. -For example, ``dictionaries/fuzz_json_loads.dict`` contains JSON tokens -to guide the fuzzer. - -What makes a good fuzz test ---------------------------- - -Libraries written in C that might handle untrusted data are worthwhile. The -more complex the logic (e.g. parsing), the more likely this is to be a useful -fuzz test. See the existing examples for reference, and refer to the -`oss-fuzz`_ docs. - -.. _oss-fuzz: https://github.com/google/oss-fuzz diff --git a/Modules/_xxtestfuzz/_xxtestfuzz.c b/Modules/_xxtestfuzz/_xxtestfuzz.c deleted file mode 100644 index e0694de..0000000 --- a/Modules/_xxtestfuzz/_xxtestfuzz.c +++ /dev/null @@ -1,48 +0,0 @@ -#define PY_SSIZE_T_CLEAN -#include <Python.h> -#include <stdlib.h> -#include <inttypes.h> - -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); - -static PyObject* _fuzz_run(PyObject* self, PyObject* args) { - const char* buf; - Py_ssize_t size; - if (!PyArg_ParseTuple(args, "s#", &buf, &size)) { - return NULL; - } - int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size); - if (PyErr_Occurred()) { - return NULL; - } - if (rv != 0) { - // Nonzero return codes are reserved for future use. - PyErr_Format( - PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv); - return NULL; - } - Py_RETURN_NONE; -} - -static PyMethodDef module_methods[] = { - {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""}, - {NULL}, -}; - -static struct PyModuleDef _fuzzmodule = { - PyModuleDef_HEAD_INIT, - "_fuzz", - NULL, - 0, - module_methods, - NULL, - NULL, - NULL, - NULL -}; - -PyMODINIT_FUNC -PyInit__xxtestfuzz(void) -{ - return PyModule_Create(&_fuzzmodule); -} diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict deleted file mode 100644 index ad64917..0000000 --- a/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict +++ /dev/null @@ -1,40 +0,0 @@ -"0" -",0" -":0" -"0:" -"-1.2e+3" - -"true" -"false" -"null" - -"\"\"" -",\"\"" -":\"\"" -"\"\":" - -"{}" -",{}" -":{}" -"{\"\":0}" -"{{}}" - -"[]" -",[]" -":[]" -"[0]" -"[[]]" - -"''" -"\\" -"\\b" -"\\f" -"\\n" -"\\r" -"\\t" -"\\u0000" -"\\x00" -"\\0" -"\\uD800\\uDC00" -"\\uDBFF\\uDFFF" - diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict deleted file mode 100644 index 961306a..0000000 --- a/Modules/_xxtestfuzz/dictionaries/fuzz_sre_compile.dict +++ /dev/null @@ -1,219 +0,0 @@ -"?" -"abc" -"()" -"[]" -"abc|def" -"abc|def|ghi" -"^xxx$" -"ab\\b\\d\\bcd" -"\\w|\\d" -"a*?" -"abc+" -"abc+?" -"xyz?" -"xyz??" -"xyz{0,1}" -"xyz{0,1}?" -"xyz{93}" -"xyz{1,32}" -"xyz{1,32}?" -"xyz{1,}" -"xyz{1,}?" -"a\\fb\\nc\\rd\\te\\vf" -"a\\nb\\bc" -"(?:foo)" -"(?: foo )" -"foo|(bar|baz)|quux" -"foo(?=bar)baz" -"foo(?!bar)baz" -"foo(?<=bar)baz" -"foo(?<!bar)baz" -"()" -"(?=)" -"[]" -"[x]" -"[xyz]" -"[a-zA-Z0-9]" -"[-123]" -"[^123]" -"]" -"}" -"[a-b-c]" -"[x\\dz]" -"[\\d-z]" -"[\\d-\\d]" -"[z-\\d]" -"\\cj\\cJ\\ci\\cI\\ck\\cK" -"\\c!" -"\\c_" -"\\c~" -"[\\c!]" -"[\\c_]" -"[\\c~]" -"[\\ca]" -"[\\cz]" -"[\\cA]" -"[\\cZ]" -"[\\c1]" -"\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ " -"[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]" -"\\8" -"\\9" -"\\11" -"\\11a" -"\\011" -"\\118" -"\\111" -"\\1111" -"(x)(x)(x)\\1" -"(x)(x)(x)\\2" -"(x)(x)(x)\\3" -"(x)(x)(x)\\4" -"(x)(x)(x)\\1*" -"(x)(x)(x)\\3*" -"(x)(x)(x)\\4*" -"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10" -"(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11" -"(a)\\1" -"(a\\1)" -"(\\1a)" -"(\\2)(\\1)" -"(?=a){0,10}a" -"(?=a){1,10}a" -"(?=a){9,10}a" -"(?!a)?a" -"\\1(a)" -"(?!(a))\\1" -"(?!\\1(a\\1)\\1)\\1" -"\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1" -"[\\0]" -"[\\11]" -"[\\11a]" -"[\\011]" -"[\\00011]" -"[\\118]" -"[\\111]" -"[\\1111]" -"\\x60" -"\\x3z" -"\\c" -"\\u0034" -"\\u003z" -"foo[z]*" -"\\u{12345}" -"\\u{12345}\\u{23456}" -"\\u{12345}{3}" -"\\u{12345}*" -"\\ud808\\udf45*" -"[\\ud808\\udf45-\\ud809\\udccc]" -"a" -"a|b" -"a\\n" -"a$" -"a\\b!" -"a\\Bb" -"a*?" -"a?" -"a??" -"a{0,1}?" -"a{1,2}?" -"a+?" -"(a)" -"(a)\\1" -"(\\1a)" -"\\1(a)" -"a\\s" -"a\\S" -"a\\D" -"a\\w" -"a\\W" -"a." -"a\\q" -"a[a]" -"a[^a]" -"a[a-z]" -"a(?:b)" -"a(?=b)" -"a(?!b)" -"\\x60" -"\\u0060" -"\\cA" -"\\q" -"\\1112" -"(a)\\1" -"(?!a)?a\\1" -"(?:(?=a))a\\1" -"a{}" -"a{,}" -"a{" -"a{z}" -"a{12z}" -"a{12," -"a{12,3b" -"{}" -"{,}" -"{" -"{z}" -"{1z}" -"{12," -"{12,3b" -"a" -"abc" -"a[bc]d" -"a|bc" -"ab|c" -"a||bc" -"(?:ab)" -"(?:ab|cde)" -"(?:ab)|cde" -"(ab)" -"(ab|cde)" -"(ab)\\1" -"(ab|cde)\\1" -"(?:ab)?" -"(?:ab)+" -"a?" -"a+" -"a??" -"a*?" -"a+?" -"(?:a?)?" -"(?:a+)?" -"(?:a?)+" -"(?:a*)+" -"(?:a+)+" -"(?:a?)*" -"(?:a*)*" -"(?:a+)*" -"a{0}" -"(?:a+){0,0}" -"a*b" -"a+b" -"a*b|c" -"a+b|c" -"(?:a{5,1000000}){3,1000000}" -"(?:ab){4,7}" -"a\\bc" -"a\\sc" -"a\\Sc" -"a(?=b)c" -"a(?=bbb|bb)c" -"a(?!bbb|bb)c" -"\xe2\x81\xa3" -"[\xe2\x81\xa3]" -"\xed\xb0\x80" -"\xed\xa0\x80" -"(\xed\xb0\x80)\x01" -"((\xed\xa0\x80))\x02" -"\xf0\x9f\x92\xa9" -"\x01" -"\x0f" -"[-\xf0\x9f\x92\xa9]+" -"[\xf0\x9f\x92\xa9-\xf4\x8f\xbf\xbf]" -"(?<=)" -"(?<=a)" -"(?<!)" -"(?<!a)" -"(?<a>)" -"(?<a>.)" -"(?<a>.)\\k<a>" diff --git a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv b/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv Binary files differdeleted file mode 100644 index 8b7887d..0000000 --- a/Modules/_xxtestfuzz/fuzz_csv_reader_corpus/test.csv +++ /dev/null diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json deleted file mode 100644 index fe51488..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json deleted file mode 100644 index 0967ef4..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json deleted file mode 100644 index 70e2685..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json +++ /dev/null @@ -1,58 +0,0 @@ -[ - "JSON Test Pattern pass1", - {"object with 1 member":["array with 1 element"]}, - {}, - [], - -42, - true, - false, - null, - { - "integer": 1234567890, - "real": -9876.543210, - "e": 0.123456789e-12, - "E": 1.234567890E+34, - "": 23456789012E66, - "zero": 0, - "one": 1, - "space": " ", - "quote": "\"", - "backslash": "\\", - "controls": "\b\f\n\r\t", - "slash": "/ & \/", - "alpha": "abcdefghijklmnopqrstuvwyz", - "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", - "digit": "0123456789", - "0123456789": "digit", - "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?", - "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", - "true": true, - "false": false, - "null": null, - "array":[ ], - "object":{ }, - "address": "50 St. James Street", - "url": "http://www.JSON.org/", - "comment": "// /* <!-- --", - "# -- --> */": " ", - " s p a c e d " :[1,2 , 3 - -, - -4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], - "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", - "quotes": "" \u0022 %22 0x22 034 "", - "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" -: "A key can be any string" - }, - 0.5 ,98.6 -, -99.44 -, - -1066, -1e1, -0.1e1, -1e-1, -1e00,2e+00,2e-00 -,"rosebud"]
\ No newline at end of file diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json deleted file mode 100644 index d3c63c7..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json +++ /dev/null @@ -1 +0,0 @@ -[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
\ No newline at end of file diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json deleted file mode 100644 index 4528d51..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "JSON Test Pattern pass3": { - "The outermost value": "must be an object or array.", - "In this test": "It is an object." - } -} diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json deleted file mode 100644 index ce1e6ec..0000000 --- a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json +++ /dev/null @@ -1 +0,0 @@ -[1, 2, 3, "abcd", "xyz"] diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links deleted file mode 100644 index d99247c..0000000 --- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/anchor_links +++ /dev/null @@ -1 +0,0 @@ -XX<a\s*href=(.*?)[\s|>] diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters deleted file mode 100644 index 0c67ee7..0000000 --- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/characters +++ /dev/null @@ -1 +0,0 @@ -XX^(Tim|Robert)\s+the\s+(Enchanter|Shrubber)$ diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn deleted file mode 100644 index cce8919..0000000 --- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/isbn +++ /dev/null @@ -1 +0,0 @@ -XX/((978[\--– ])?[0-9][0-9\--– ]{10}[\--– ][0-9xX])|((978)?[0-9]{9}[0-9Xx])/ diff --git a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number b/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number deleted file mode 100644 index 1e2efc5..0000000 --- a/Modules/_xxtestfuzz/fuzz_sre_compile_corpus/phone_number +++ /dev/null @@ -1 +0,0 @@ -XX(\+1|1)?[ \-\.]?\(?(?<areacode>[0-9]{3})\)?[ \-\.]?(?<prefix>[0-9]{3})[ \-\.]?(?<number>[0-9]{4})[ \.]*(ext|x)?[ \.]*(?<extension>[0-9]{0,5}) diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt deleted file mode 100644 index 9d330a6..0000000 --- a/Modules/_xxtestfuzz/fuzz_tests.txt +++ /dev/null @@ -1,7 +0,0 @@ -fuzz_builtin_float -fuzz_builtin_int -fuzz_builtin_unicode -fuzz_json_loads -fuzz_sre_compile -fuzz_sre_match -fuzz_csv_reader diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c deleted file mode 100644 index dae1eae..0000000 --- a/Modules/_xxtestfuzz/fuzzer.c +++ /dev/null @@ -1,426 +0,0 @@ -/* A fuzz test for CPython. - - The only exposed function is LLVMFuzzerTestOneInput, which is called by - fuzzers and by the _fuzz module for smoke tests. - - To build exactly one fuzz test, as when running in oss-fuzz etc., - build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build - LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with - -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float. - - See the source code for LLVMFuzzerTestOneInput for details. */ - -#include <Python.h> -#include <stdlib.h> -#include <inttypes.h> - -/* Fuzz PyFloat_FromString as a proxy for float(str). */ -static int fuzz_builtin_float(const char* data, size_t size) { - PyObject* s = PyBytes_FromStringAndSize(data, size); - if (s == NULL) return 0; - PyObject* f = PyFloat_FromString(s); - if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - - Py_XDECREF(f); - Py_DECREF(s); - return 0; -} - -#define MAX_INT_TEST_SIZE 0x10000 - -/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */ -static int fuzz_builtin_int(const char* data, size_t size) { - /* Ignore test cases with very long ints to avoid timeouts - int("9" * 1000000) is not a very interesting test caase */ - if (size > MAX_INT_TEST_SIZE) { - return 0; - } - /* Pick a random valid base. (When the fuzzed function takes extra - parameters, it's somewhat normal to hash the input to generate those - parameters. We want to exercise all code paths, so we do so here.) */ - int base = _Py_HashBytes(data, size) % 37; - if (base == 1) { - // 1 is the only number between 0 and 36 that is not a valid base. - base = 0; - } - if (base == -1) { - return 0; // An error occurred, bail early. - } - if (base < 0) { - base = -base; - } - - PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (s == NULL) { - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); - } - return 0; - } - PyObject* l = PyLong_FromUnicodeObject(s, base); - if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - PyErr_Clear(); - Py_XDECREF(l); - Py_DECREF(s); - return 0; -} - -/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */ -static int fuzz_builtin_unicode(const char* data, size_t size) { - PyObject* s = PyUnicode_FromStringAndSize(data, size); - if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { - PyErr_Clear(); - } - Py_XDECREF(s); - return 0; -} - -#define MAX_JSON_TEST_SIZE 0x10000 - -PyObject* json_loads_method = NULL; -/* Called by LLVMFuzzerTestOneInput for initialization */ -static int init_json_loads() { - /* Import json.loads */ - PyObject* json_module = PyImport_ImportModule("json"); - if (json_module == NULL) { - return 0; - } - json_loads_method = PyObject_GetAttrString(json_module, "loads"); - return json_loads_method != NULL; -} -/* Fuzz json.loads(x) */ -static int fuzz_json_loads(const char* data, size_t size) { - /* Since python supports arbitrarily large ints in JSON, - long inputs can lead to timeouts on boring inputs like - `json.loads("9" * 100000)` */ - if (size > MAX_JSON_TEST_SIZE) { - return 0; - } - PyObject* input_bytes = PyBytes_FromStringAndSize(data, size); - if (input_bytes == NULL) { - return 0; - } - PyObject* parsed = _PyObject_CallOneArg(json_loads_method, input_bytes); - if (parsed == NULL) { - /* Ignore ValueError as the fuzzer will more than likely - generate some invalid json and values */ - if (PyErr_ExceptionMatches(PyExc_ValueError) || - /* Ignore RecursionError as the fuzzer generates long sequences of - arrays such as `[[[...` */ - PyErr_ExceptionMatches(PyExc_RecursionError) || - /* Ignore unicode errors, invalid byte sequences are common */ - PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) - ) { - PyErr_Clear(); - } - } - Py_DECREF(input_bytes); - Py_XDECREF(parsed); - return 0; -} - -#define MAX_RE_TEST_SIZE 0x10000 - -PyObject* sre_compile_method = NULL; -PyObject* sre_error_exception = NULL; -int SRE_FLAG_DEBUG = 0; -/* Called by LLVMFuzzerTestOneInput for initialization */ -static int init_sre_compile() { - /* Import sre_compile.compile and sre.error */ - PyObject* sre_compile_module = PyImport_ImportModule("sre_compile"); - if (sre_compile_module == NULL) { - return 0; - } - sre_compile_method = PyObject_GetAttrString(sre_compile_module, "compile"); - if (sre_compile_method == NULL) { - return 0; - } - - PyObject* sre_constants = PyImport_ImportModule("sre_constants"); - if (sre_constants == NULL) { - return 0; - } - sre_error_exception = PyObject_GetAttrString(sre_constants, "error"); - if (sre_error_exception == NULL) { - return 0; - } - PyObject* debug_flag = PyObject_GetAttrString(sre_constants, "SRE_FLAG_DEBUG"); - if (debug_flag == NULL) { - return 0; - } - SRE_FLAG_DEBUG = PyLong_AsLong(debug_flag); - return 1; -} -/* Fuzz _sre.compile(x) */ -static int fuzz_sre_compile(const char* data, size_t size) { - /* Ignore really long regex patterns that will timeout the fuzzer */ - if (size > MAX_RE_TEST_SIZE) { - return 0; - } - /* We treat the first 2 bytes of the input as a number for the flags */ - if (size < 2) { - return 0; - } - uint16_t flags = ((uint16_t*) data)[0]; - /* We remove the SRE_FLAG_DEBUG if present. This is because it - prints to stdout which greatly decreases fuzzing speed */ - flags &= ~SRE_FLAG_DEBUG; - - /* Pull the pattern from the remaining bytes */ - PyObject* pattern_bytes = PyBytes_FromStringAndSize(data + 2, size - 2); - if (pattern_bytes == NULL) { - return 0; - } - PyObject* flags_obj = PyLong_FromUnsignedLong(flags); - if (flags_obj == NULL) { - Py_DECREF(pattern_bytes); - return 0; - } - - /* compiled = _sre.compile(data[2:], data[0:2] */ - PyObject* compiled = PyObject_CallFunctionObjArgs( - sre_compile_method, pattern_bytes, flags_obj, NULL); - /* Ignore ValueError as the fuzzer will more than likely - generate some invalid combination of flags */ - if (compiled == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) { - PyErr_Clear(); - } - /* Ignore some common errors thrown by sre_parse: - Overflow, Assertion and Index */ - if (compiled == NULL && (PyErr_ExceptionMatches(PyExc_OverflowError) || - PyErr_ExceptionMatches(PyExc_AssertionError) || - PyErr_ExceptionMatches(PyExc_IndexError)) - ) { - PyErr_Clear(); - } - /* Ignore re.error */ - if (compiled == NULL && PyErr_ExceptionMatches(sre_error_exception)) { - PyErr_Clear(); - } - - Py_DECREF(pattern_bytes); - Py_DECREF(flags_obj); - Py_XDECREF(compiled); - return 0; -} - -/* Some random patterns used to test re.match. - Be careful not to add catostraphically slow regexes here, we want to - exercise the matching code without causing timeouts.*/ -static const char* regex_patterns[] = { - ".", "^", "abc", "abc|def", "^xxx$", "\\b", "()", "[a-zA-Z0-9]", - "abc+", "[^A-Z]", "[x]", "(?=)", "a{z}", "a+b", "a*?", "a??", "a+?", - "{}", "a{,}", "{", "}", "^\\(*\\d{3}\\)*( |-)*\\d{3}( |-)*\\d{4}$", - "(?:a*)*", "a{1,2}?" -}; -const size_t NUM_PATTERNS = sizeof(regex_patterns) / sizeof(regex_patterns[0]); -PyObject** compiled_patterns = NULL; -/* Called by LLVMFuzzerTestOneInput for initialization */ -static int init_sre_match() { - PyObject* re_module = PyImport_ImportModule("re"); - if (re_module == NULL) { - return 0; - } - compiled_patterns = (PyObject**) PyMem_RawMalloc( - sizeof(PyObject*) * NUM_PATTERNS); - if (compiled_patterns == NULL) { - PyErr_NoMemory(); - return 0; - } - - /* Precompile all the regex patterns on the first run for faster fuzzing */ - for (size_t i = 0; i < NUM_PATTERNS; i++) { - PyObject* compiled = PyObject_CallMethod( - re_module, "compile", "y", regex_patterns[i]); - /* Bail if any of the patterns fail to compile */ - if (compiled == NULL) { - return 0; - } - compiled_patterns[i] = compiled; - } - return 1; -} -/* Fuzz re.match(x) */ -static int fuzz_sre_match(const char* data, size_t size) { - if (size < 1 || size > MAX_RE_TEST_SIZE) { - return 0; - } - /* Use the first byte as a uint8_t specifying the index of the - regex to use */ - unsigned char idx = (unsigned char) data[0]; - idx = idx % NUM_PATTERNS; - - /* Pull the string to match from the remaining bytes */ - PyObject* to_match = PyBytes_FromStringAndSize(data + 1, size - 1); - if (to_match == NULL) { - return 0; - } - - PyObject* pattern = compiled_patterns[idx]; - PyObject* match_callable = PyObject_GetAttrString(pattern, "match"); - - PyObject* matches = _PyObject_CallOneArg(match_callable, to_match); - - Py_XDECREF(matches); - Py_DECREF(match_callable); - Py_DECREF(to_match); - return 0; -} - -#define MAX_CSV_TEST_SIZE 0x10000 -PyObject* csv_module = NULL; -PyObject* csv_error = NULL; -/* Called by LLVMFuzzerTestOneInput for initialization */ -static int init_csv_reader() { - /* Import csv and csv.Error */ - csv_module = PyImport_ImportModule("csv"); - if (csv_module == NULL) { - return 0; - } - csv_error = PyObject_GetAttrString(csv_module, "Error"); - return csv_error != NULL; -} -/* Fuzz csv.reader([x]) */ -static int fuzz_csv_reader(const char* data, size_t size) { - if (size < 1 || size > MAX_CSV_TEST_SIZE) { - return 0; - } - /* Ignore non null-terminated strings since _csv can't handle - embeded nulls */ - if (memchr(data, '\0', size) == NULL) { - return 0; - } - - PyObject* s = PyUnicode_FromString(data); - /* Ignore exceptions until we have a valid string */ - if (s == NULL) { - PyErr_Clear(); - return 0; - } - - /* Split on \n so we can test multiple lines */ - PyObject* lines = PyObject_CallMethod(s, "split", "s", "\n"); - if (lines == NULL) { - Py_DECREF(s); - return 0; - } - - PyObject* reader = PyObject_CallMethod(csv_module, "reader", "N", lines); - if (reader) { - /* Consume all of the reader as an iterator */ - PyObject* parsed_line; - while ((parsed_line = PyIter_Next(reader))) { - Py_DECREF(parsed_line); - } - } - - /* Ignore csv.Error because we're probably going to generate - some bad files (embeded new-lines, unterminated quotes etc) */ - if (PyErr_ExceptionMatches(csv_error)) { - PyErr_Clear(); - } - - Py_XDECREF(reader); - Py_DECREF(s); - return 0; -} - -/* Run fuzzer and abort on failure. */ -static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { - int rv = fuzzer((const char*) data, size); - if (PyErr_Occurred()) { - /* Fuzz tests should handle expected errors for themselves. - This is last-ditch check in case they didn't. */ - PyErr_Print(); - abort(); - } - /* Someday the return value might mean something, propagate it. */ - return rv; -} - -/* CPython generates a lot of leak warnings for whatever reason. */ -int __lsan_is_turned_off(void) { return 1; } - - -int LLVMFuzzerInitialize(int *argc, char ***argv) { - wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL); - Py_SetProgramName(wide_program_name); - return 0; -} - -/* Fuzz test interface. - This returns the bitwise or of all fuzz test's return values. - - All fuzz tests must return 0, as all nonzero return codes are reserved for - future use -- we propagate the return values for that future case. - (And we bitwise or when running multiple tests to verify that normally we - only return 0.) */ -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - if (!Py_IsInitialized()) { - /* LLVMFuzzerTestOneInput is called repeatedly from the same process, - with no separate initialization phase, sadly, so we need to - initialize CPython ourselves on the first run. */ - Py_InitializeEx(0); - } - - int rv = 0; - -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float) - rv |= _run_fuzz(data, size, fuzz_builtin_float); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int) - rv |= _run_fuzz(data, size, fuzz_builtin_int); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode) - rv |= _run_fuzz(data, size, fuzz_builtin_unicode); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads) - static int JSON_LOADS_INITIALIZED = 0; - if (!JSON_LOADS_INITIALIZED && !init_json_loads()) { - PyErr_Print(); - abort(); - } else { - JSON_LOADS_INITIALIZED = 1; - } - - rv |= _run_fuzz(data, size, fuzz_json_loads); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_compile) - static int SRE_COMPILE_INITIALIZED = 0; - if (!SRE_COMPILE_INITIALIZED && !init_sre_compile()) { - PyErr_Print(); - abort(); - } else { - SRE_COMPILE_INITIALIZED = 1; - } - - rv |= _run_fuzz(data, size, fuzz_sre_compile); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_sre_match) - static int SRE_MATCH_INITIALIZED = 0; - if (!SRE_MATCH_INITIALIZED && !init_sre_match()) { - PyErr_Print(); - abort(); - } else { - SRE_MATCH_INITIALIZED = 1; - } - - rv |= _run_fuzz(data, size, fuzz_sre_match); -#endif -#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_csv_reader) - static int CSV_READER_INITIALIZED = 0; - if (!CSV_READER_INITIALIZED && !init_csv_reader()) { - PyErr_Print(); - abort(); - } else { - CSV_READER_INITIALIZED = 1; - } - - rv |= _run_fuzz(data, size, fuzz_csv_reader); -#endif - return rv; -} |