diff options
author | Brad Larsen <brad@bradfordlarsen.com> | 2023-12-10 17:16:15 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-10 17:16:15 (GMT) |
commit | eb27c9a99edb6bf3be1c93579d885edd0f403901 (patch) | |
tree | de3a93c37b4c36f9dd95ca32d037bab9187073e0 /Modules/_xxtestfuzz | |
parent | 1f9cd3c1e5410e45ade4362713229fa445ea6962 (diff) | |
download | cpython-eb27c9a99edb6bf3be1c93579d885edd0f403901.zip cpython-eb27c9a99edb6bf3be1c93579d885edd0f403901.tar.gz cpython-eb27c9a99edb6bf3be1c93579d885edd0f403901.tar.bz2 |
Add a fuzzer for `Py_CompileStringExFlags` (#111721)
Diffstat (limited to 'Modules/_xxtestfuzz')
-rw-r--r-- | Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict | 165 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py | 7 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py | 5 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py | 6 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py | 3 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py | 7 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py | 8 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzz_tests.txt | 1 | ||||
-rw-r--r-- | Modules/_xxtestfuzz/fuzzer.c | 60 |
9 files changed, 262 insertions, 0 deletions
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict new file mode 100644 index 0000000..c6a44d9 --- /dev/null +++ b/Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict @@ -0,0 +1,165 @@ +# bits of syntax +"( " +") " +"[ " +"] " +": " +", " +"; " +"{ " +"} " + +# operators +"+ " +"- " +"* " +"** " +"/ " +"// " +"| " +"& " +"< " +"> " +"= " +". " +"% " +"` " +"^ " +"~ " +"@ " +"== " +"!= " +"<> " +"<< " +"<= " +">= " +">> " +"+= " +"-= " +"*= " +"** " +"/= " +"//= " +"|= " +"%= " +"&= " +"^= " +"<<= " +">>= " +"**= " +":= " +"@= " + +# whitespace +" " +":\\n " + +# type signatures and functions +"-> " +": List[int]" +": Dict[int, str]" + +"# type:" +"# type: List[int]" +"# type: Dict[int, str]" + +", *" +", /" +", *args" +", **kwargs" +", x=42" + + +# literals +"0x0a" +"0b0000" +"42" +"0o70" +"42j" +"42.01" +"-5" +"+42e-3" +"0_0_0" +"1e1_0" +".1_4" + +"{}" + +# variable names +"x" +"y" + +# strings +"r'x'" + +"b'x'" + +"rb\"x\"" + +"br\"x\"" + +"f'{x + 5}'" +"f\"{x + 5}\"" + +"'''" +"\"\"\"" + +"\\u" +"\\x" + +# keywords +"def " +"del " +"pass " +"break " +"continue " +"return " +"raise " +"from " +"import " +".. " +"... " +"__future__ " +"as " +"global " +"nonlocal " +"assert " +"print " +"if " +"elif " +"else: " +"while " +"try: " +"except " +"finally: " +"with " +"lambda " +"or " +"and " +"not " +"None " +"__peg_parser__" +"True " +"False " +"yield " +"async " +"await " +"for " +"in " +"is " +"class " + +# shebangs and encodings +"#!" +"# coding:" +"# coding=" +"# coding: latin-1" +"# coding=latin-1" +"# coding: utf-8" +"# coding=utf-8" +"# coding: ascii" +"# coding=ascii" +"# coding: cp860" +"# coding=cp860" +"# coding: gbk" +"# coding=gbk" diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py new file mode 100644 index 0000000..c43994d --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +def test() -> None: + x: list[int] = [] + x: dict[int, str] = {} + x: set[bytes] = {} + print(5 + 42 * 3, x) diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py new file mode 100644 index 0000000..7be326e --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py @@ -0,0 +1,5 @@ +class Foo(metaclass=42): + __slots__ = ['x'] + pass + +foo = Foo() diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py new file mode 100644 index 0000000..9bc3a45 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py @@ -0,0 +1,6 @@ +def evens(): + i = 0 + while True: + i += 1 + if i % 2 == 0: + yield i diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py new file mode 100644 index 0000000..490de90 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py @@ -0,0 +1,3 @@ +async def hello(name: str): + await name + print(name) diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py new file mode 100644 index 0000000..4cfcfe5 --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py @@ -0,0 +1,7 @@ +try: + eval('importer exporter... really long matches') +except SyntaxError: + print("nothing to see here") +finally: + print("all done here") + raise diff --git a/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py new file mode 100644 index 0000000..d8e59ad --- /dev/null +++ b/Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py @@ -0,0 +1,8 @@ +"""Some module docstring""" +import sys + +def main(): + print("Hello world!", file=sys.stderr) + +if __name__ == '__main__': + main() diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt index 40aa221..ea6f982 100644 --- a/Modules/_xxtestfuzz/fuzz_tests.txt +++ b/Modules/_xxtestfuzz/fuzz_tests.txt @@ -8,3 +8,4 @@ fuzz_csv_reader fuzz_struct_unpack fuzz_ast_literal_eval fuzz_elementtree_parsewhole +fuzz_pycompile diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c index 77d29ce..e133b4d 100644 --- a/Modules/_xxtestfuzz/fuzzer.c +++ b/Modules/_xxtestfuzz/fuzzer.c @@ -501,6 +501,63 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) { return 0; } +#define MAX_PYCOMPILE_TEST_SIZE 16384 +static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE]; + +static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input}; +const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]); + +static const int optimize_vals[] = {-1, 0, 1, 2}; +const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]); + +/* Fuzz `PyCompileStringExFlags` using a variety of input parameters. + * That function is essentially behind the `compile` builtin */ +static int fuzz_pycompile(const char* data, size_t size) { + // Ignore overly-large inputs, and account for a NUL terminator + if (size > MAX_PYCOMPILE_TEST_SIZE - 1) { + return 0; + } + + // Need 2 bytes for parameter selection + if (size < 2) { + return 0; + } + + // Use first byte to determine element of `start_vals` to use + unsigned char start_idx = (unsigned char) data[0]; + int start = start_vals[start_idx % NUM_START_VALS]; + + // Use second byte to determine element of `optimize_vals` to use + unsigned char optimize_idx = (unsigned char) data[1]; + int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS]; + + // Create a NUL-terminated C string from the remaining input + memcpy(pycompile_scratch, data + 2, size - 2); + // Put a NUL terminator just after the copied data. (Space was reserved already.) + pycompile_scratch[size - 2] = '\0'; + + // XXX: instead of always using NULL for the `flags` value to + // `Py_CompileStringExFlags`, there are many flags that conditionally + // change parser behavior: + // + // #define PyCF_TYPE_COMMENTS 0x1000 + // #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 + // #define PyCF_ONLY_AST 0x0400 + // + // It would be good to test various combinations of these, too. + PyCompilerFlags *flags = NULL; + + PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "<fuzz input>", start, flags, optimize); + if (result == NULL) { + /* compilation failed, most likely from a syntax error */ + PyErr_Clear(); + } else { + Py_DECREF(result); + } + + return 0; +} + /* Run fuzzer and abort on failure. */ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) { int rv = fuzzer((const char*) data, size); @@ -643,5 +700,8 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole); #endif +#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile) + rv |= _run_fuzz(data, size, fuzz_pycompile); +#endif return rv; } |