From 725cb0aafdf78b550c52618fe5cea1fadd278881 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 12 Jul 2019 18:36:28 -0700 Subject: [lz4] Fix bugs in partial decoding * Partial decoding could read a few bytes beyond the end of the input * Partial decoding returned an error with an empty output buffer --- lib/lz4.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 38ad6ab..7a4c6aa 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1642,7 +1642,11 @@ LZ4_decompress_generic( /* Special cases */ assert(lowPrefix <= op); - if ((endOnInput) && (unlikely(outputSize==0))) { return ((srcSize==1) && (*ip==0)) ? 0 : -1; } /* Empty output buffer */ + if ((endOnInput) && (unlikely(outputSize==0))) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); } if ((endOnInput) && unlikely(srcSize==0)) { return -1; } @@ -1850,21 +1854,50 @@ LZ4_decompress_generic( if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { + /* We've either hit the input parsing restriction or the output parsing restriction. + * If we've hit the input parsing condition then this must be the last sequence. + * If we've hit the output parsing condition then we are either using partialDecoding + * or we've hit the output parsing condition. + */ if (partialDecoding) { - if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } /* Partial decoding : stop in the middle of literal segment */ - if ((endOnInput) && (ip+length > iend)) { goto _output_error; } /* Error : read attempt beyond end of input buffer */ + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + assert(endOnInput); + /* If we're in this block because of the input parsing condition, then we must be on the + * last sequence (or invalid), so we must check that we exactly consume the input. + */ + if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; } + assert(ip+length <= iend); + /* We are finishing in the middle of a literals segment. + * Break after the copy. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + assert(ip+length <= iend); } else { - if ((!endOnInput) && (cpy != oend)) { goto _output_error; } /* Error : block decoding must stop exactly there */ - if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; } /* Error : input must be consumed */ + /* We must be on the last sequence because of the parsing limitations so check + * that we exactly regenerate the original size (must be exact when !endOnInput). + */ + if ((!endOnInput) && (cpy != oend)) { goto _output_error; } + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; } } memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */ ip += length; op += length; - if (!partialDecoding || (cpy == oend)) { - /* Necessarily EOF, due to parsing restrictions */ + /* Necessarily EOF when !partialDecoding. When partialDecoding + * it is EOF if we've either filled the output buffer or hit + * the input parsing restriction. + */ + if (!partialDecoding || (cpy == oend) || (ip == iend)) { break; } - } else { LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */ ip += length; op = cpy; -- cgit v0.12 From 3c40db8d258716b9efcfb46fa6dc29de6e43e616 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 12 Jul 2019 19:27:00 -0700 Subject: [ossfuzz] Improve the fuzzers * Run more decompression variants * Round trip the compression fuzzer and do partial decompression as well * Add a compression fuzzer that compresses into a smaller output buffer and test the destSize variant These fuzzers caught 2 bugs that were fixed in the previous commit. * Input buffer over-read in partial decompress * Partial decompress fails if output size is 0 --- ossfuzz/Makefile | 1 + ossfuzz/compress_fuzzer.c | 52 +++++++++++++++++++------- ossfuzz/decompress_fuzzer.c | 60 ++++++++++++++++++++++-------- ossfuzz/fuzz.h | 48 ++++++++++++++++++++++++ ossfuzz/fuzz_helpers.h | 89 +++++++++++++++++++++++++++++++++++++++++++++ ossfuzz/round_trip_fuzzer.c | 50 +++++++++++++++++++++++++ ossfuzz/standaloneengine.c | 2 +- ossfuzz/testinput.h | 15 -------- 8 files changed, 273 insertions(+), 44 deletions(-) create mode 100644 ossfuzz/fuzz.h create mode 100644 ossfuzz/fuzz_helpers.h create mode 100644 ossfuzz/round_trip_fuzzer.c delete mode 100644 ossfuzz/testinput.h diff --git a/ossfuzz/Makefile b/ossfuzz/Makefile index bd01123..4d24944 100644 --- a/ossfuzz/Makefile +++ b/ossfuzz/Makefile @@ -57,3 +57,4 @@ $(LZ4DIR)/liblz4.a: .PHONY: clean clean: compress_fuzzer_clean decompress_fuzzer_clean + $(MAKE) -C $(LZ4DIR) clean diff --git a/ossfuzz/compress_fuzzer.c b/ossfuzz/compress_fuzzer.c index 3908534..7021624 100644 --- a/ossfuzz/compress_fuzzer.c +++ b/ossfuzz/compress_fuzzer.c @@ -1,25 +1,51 @@ +/** + * This fuzz target attempts to compress the fuzzed data with the simple + * compression function with an output buffer that may be too small to + * ensure that the compressor never crashes. + */ + #include #include #include -#include "lz4.h" +#include -#define CHECK(COND) if (!(COND)) { abort(); } +#include "fuzz_helpers.h" +#include "lz4.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - size_t const compressed_dest_size = LZ4_compressBound(size); - char *const dest_buffer = (char *)malloc(compressed_dest_size); + uint32_t seed = FUZZ_seed(&data, &size); + size_t const dstCapacity = FUZZ_rand32(&seed, 0, LZ4_compressBound(size)); + char* const dst = (char*)malloc(dstCapacity); + char* const rt = (char*)malloc(size); + + FUZZ_ASSERT(dst); + FUZZ_ASSERT(rt); - CHECK(dest_buffer != NULL); + /* If compression succeeds it must round trip correctly. */ + { + int const dstSize = LZ4_compress_default((const char*)data, dst, + size, dstCapacity); + if (dstSize > 0) { + int const rtSize = LZ4_decompress_safe(dst, rt, dstSize, size); + FUZZ_ASSERT_MSG(rtSize == size, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(data, rt, size), "Corruption!"); + } + } - // Allocation succeeded, try compressing the incoming data. - int result = LZ4_compress_default((const char*)data, - dest_buffer, - size, - compressed_dest_size); - CHECK(result != 0); + if (dstCapacity > 0) { + /* Compression succeeds and must round trip correctly. */ + int compressedSize = size; + int const dstSize = LZ4_compress_destSize((const char*)data, dst, + &compressedSize, dstCapacity); + FUZZ_ASSERT(dstSize > 0); + int const rtSize = LZ4_decompress_safe(dst, rt, dstSize, size); + FUZZ_ASSERT_MSG(rtSize == compressedSize, "Incorrect regenerated size"); + FUZZ_ASSERT_MSG(!memcmp(data, rt, compressedSize), "Corruption!"); + } - free(dest_buffer); + free(dst); + free(rt); - return 0; + return 0; } diff --git a/ossfuzz/decompress_fuzzer.c b/ossfuzz/decompress_fuzzer.c index e6e14c4..0267c93 100644 --- a/ossfuzz/decompress_fuzzer.c +++ b/ossfuzz/decompress_fuzzer.c @@ -1,28 +1,58 @@ +/** + * This fuzz target attempts to decompress the fuzzed data with the simple + * decompression function to ensure the decompressor never crashes. + */ + #include #include #include -#include "lz4.h" +#include -#define CHECK(COND) if (!(COND)) { abort(); } +#include "fuzz_helpers.h" +#include "lz4.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - // TODO: Size input buffer pseudo-randomly based on seed extracted from input - size_t const buffer_size = 10 * 1024 * 1024; - char *const dest_buffer = (char *)malloc(buffer_size); - CHECK(dest_buffer != NULL); + uint32_t seed = FUZZ_seed(&data, &size); + size_t const dstCapacity = FUZZ_rand32(&seed, 0, 4 * size); + size_t const smallDictSize = size + 1; + size_t const largeDictSize = 64 * 1024 - 1; + size_t const dictSize = MAX(smallDictSize, largeDictSize); + char* const dst = (char*)malloc(dstCapacity); + char* const dict = (char*)malloc(dictSize + size); + char* const largeDict = dict; + char* const dataAfterDict = dict + dictSize; + char* const smallDict = dataAfterDict - smallDictSize; - // Allocation succeeded, try decompressing the incoming data. - int result = LZ4_decompress_safe((const char*)data, - dest_buffer, - size, - buffer_size); + FUZZ_ASSERT(dst); + FUZZ_ASSERT(dict); - // Ignore the result of decompression. - (void)result; + /* Prepare the dictionary. The data doesn't matter for decompression. */ + memset(dict, 0, dictSize); + memcpy(dataAfterDict, data, size); - free(dest_buffer); + /* Decompress using each possible dictionary configuration. */ + /* No dictionary. */ + LZ4_decompress_safe_usingDict((char const*)data, dst, size, + dstCapacity, NULL, 0); + /* Small external dictonary. */ + LZ4_decompress_safe_usingDict((char const*)data, dst, size, + dstCapacity, smallDict, smallDictSize); + /* Large external dictionary. */ + LZ4_decompress_safe_usingDict((char const*)data, dst, size, + dstCapacity, largeDict, largeDictSize); + /* Small prefix. */ + LZ4_decompress_safe_usingDict((char const*)dataAfterDict, dst, size, + dstCapacity, smallDict, smallDictSize); + /* Large prefix. */ + LZ4_decompress_safe_usingDict((char const*)data, dst, size, + dstCapacity, largeDict, largeDictSize); + /* Partial decompression. */ + LZ4_decompress_safe_partial((char const*)data, dst, size, + dstCapacity, dstCapacity); + free(dst); + free(dict); - return 0; + return 0; } diff --git a/ossfuzz/fuzz.h b/ossfuzz/fuzz.h new file mode 100644 index 0000000..eefac63 --- /dev/null +++ b/ossfuzz/fuzz.h @@ -0,0 +1,48 @@ +/** + * Fuzz target interface. + * Fuzz targets have some common parameters passed as macros during compilation. + * Check the documentation for each individual fuzzer for more parameters. + * + * @param FUZZ_RNG_SEED_SIZE: + * The number of bytes of the source to look at when constructing a seed + * for the deterministic RNG. These bytes are discarded before passing + * the data to lz4 functions. Every fuzzer initializes the RNG exactly + * once before doing anything else, even if it is unused. + * Default: 4. + * @param LZ4_DEBUG: + * This is a parameter for the lz4 library. Defining `LZ4_DEBUG=1` + * enables assert() statements in the lz4 library. Higher levels enable + * logging, so aren't recommended. Defining `LZ4_DEBUG=1` is + * recommended. + * @param LZ4_FORCE_MEMORY_ACCESS: + * This flag controls how the zstd library accesses unaligned memory. + * It can be undefined, or 0 through 2. If it is undefined, it selects + * the method to use based on the compiler. If testing with UBSAN set + * MEM_FORCE_MEMORY_ACCESS=0 to use the standard compliant method. + * @param FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + * This is the canonical flag to enable deterministic builds for fuzzing. + * Changes to zstd for fuzzing are gated behind this define. + * It is recommended to define this when building zstd for fuzzing. + */ + +#ifndef FUZZ_H +#define FUZZ_H + +#ifndef FUZZ_RNG_SEED_SIZE +# define FUZZ_RNG_SEED_SIZE 4 +#endif + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ossfuzz/fuzz_helpers.h b/ossfuzz/fuzz_helpers.h new file mode 100644 index 0000000..626f209 --- /dev/null +++ b/ossfuzz/fuzz_helpers.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016-present, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + */ + +/** + * Helper functions for fuzzing. + */ + +#ifndef FUZZ_HELPERS_H +#define FUZZ_HELPERS_H + +#include "fuzz.h" +#include "xxhash.h" +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define FUZZ_QUOTE_IMPL(str) #str +#define FUZZ_QUOTE(str) FUZZ_QUOTE_IMPL(str) + +/** + * Asserts for fuzzing that are always enabled. + */ +#define FUZZ_ASSERT_MSG(cond, msg) \ + ((cond) ? (void)0 \ + : (fprintf(stderr, "%s: %u: Assertion: `%s' failed. %s\n", __FILE__, \ + __LINE__, FUZZ_QUOTE(cond), (msg)), \ + abort())) +#define FUZZ_ASSERT(cond) FUZZ_ASSERT_MSG((cond), ""); + +#if defined(__GNUC__) +#define FUZZ_STATIC static __inline __attribute__((unused)) +#elif defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +#define FUZZ_STATIC static inline +#elif defined(_MSC_VER) +#define FUZZ_STATIC static __inline +#else +#define FUZZ_STATIC static +#endif + +/** + * Deterministically constructs a seed based on the fuzz input. + * Consumes up to the first FUZZ_RNG_SEED_SIZE bytes of the input. + */ +FUZZ_STATIC uint32_t FUZZ_seed(uint8_t const **src, size_t* size) { + uint8_t const *data = *src; + size_t const toHash = MIN(FUZZ_RNG_SEED_SIZE, *size); + *size -= toHash; + *src += toHash; + return XXH32(data, toHash, 0); +} + +#define FUZZ_rotl32(x, r) (((x) << (r)) | ((x) >> (32 - (r)))) + +FUZZ_STATIC uint32_t FUZZ_rand(uint32_t *state) { + static const uint32_t prime1 = 2654435761U; + static const uint32_t prime2 = 2246822519U; + uint32_t rand32 = *state; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZZ_rotl32(rand32, 13); + *state = rand32; + return rand32 >> 5; +} + +/* Returns a random numer in the range [min, max]. */ +FUZZ_STATIC uint32_t FUZZ_rand32(uint32_t *state, uint32_t min, uint32_t max) { + uint32_t random = FUZZ_rand(state); + return min + (random % (max - min + 1)); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/ossfuzz/round_trip_fuzzer.c b/ossfuzz/round_trip_fuzzer.c new file mode 100644 index 0000000..3a66e80 --- /dev/null +++ b/ossfuzz/round_trip_fuzzer.c @@ -0,0 +1,50 @@ +/** + * This fuzz target performs a lz4 round-trip test (compress & decompress), + * compares the result with the original, and calls abort() on corruption. + */ + +#include +#include +#include +#include + +#include "fuzz_helpers.h" +#include "lz4.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + uint32_t seed = FUZZ_seed(&data, &size); + size_t const dstCapacity = LZ4_compressBound(size); + char* const dst = (char*)malloc(dstCapacity); + char* const rt = (char*)malloc(size); + + FUZZ_ASSERT(dst); + FUZZ_ASSERT(rt); + + /* Compression must succeed and round trip correctly. */ + int const dstSize = LZ4_compress_default((const char*)data, dst, + size, dstCapacity); + FUZZ_ASSERT(dstSize > 0); + + int const rtSize = LZ4_decompress_safe(dst, rt, dstSize, size); + FUZZ_ASSERT_MSG(rtSize == size, "Incorrect size"); + FUZZ_ASSERT_MSG(!memcmp(data, rt, size), "Corruption!"); + + /* Partial decompression must succeed. */ + { + size_t const partialCapacity = FUZZ_rand32(&seed, 0, size); + char* const partial = (char*)malloc(partialCapacity); + FUZZ_ASSERT(partial); + int const partialSize = LZ4_decompress_safe_partial( + dst, partial, dstSize, partialCapacity, partialCapacity); + FUZZ_ASSERT(partialSize >= 0); + FUZZ_ASSERT_MSG(partialSize == partialCapacity, "Incorrect size"); + FUZZ_ASSERT_MSG(!memcmp(data, partial, partialSize), "Corruption!"); + free(partial); + } + + free(dst); + free(rt); + + return 0; +} diff --git a/ossfuzz/standaloneengine.c b/ossfuzz/standaloneengine.c index 175360e..6afeffd 100644 --- a/ossfuzz/standaloneengine.c +++ b/ossfuzz/standaloneengine.c @@ -2,7 +2,7 @@ #include #include -#include "testinput.h" +#include "fuzz.h" /** * Main procedure for standalone fuzzing engine. diff --git a/ossfuzz/testinput.h b/ossfuzz/testinput.h deleted file mode 100644 index 0e50a3c..0000000 --- a/ossfuzz/testinput.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef TESTINPUT_H_INCLUDED -#define TESTINPUT_H_INCLUDED - -#include - -#if defined (__cplusplus) -extern "C" { -#endif - -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); - -#if defined(__cplusplus) -} -#endif -#endif -- cgit v0.12