From 16c09428225f466a2ee13e060d290e90663e776a Mon Sep 17 00:00:00 2001 From: "yann.collet.73@gmail.com" Date: Mon, 10 Jun 2013 17:29:13 +0000 Subject: lz4.c no longer depends on lz4_decoder.h (removed) Decompression speed improved under GCC Improved speed of LZ4_decompress_safe_partial() Added new utility : fullbench Modified x64 detection macro, as suggested by David Karner Improved Fuzzer tool Updated xxHash to r30 git-svn-id: https://lz4.googlecode.com/svn/trunk@97 650e7d94-2a16-8b24-b05c-7c0b3f6821cd --- Makefile | 7 +- bench.c | 16 +- fullbench.c | 622 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fuzzer.c | 109 +++++----- lz4.c | 247 +++++++++++++++-------- lz4.h | 10 +- lz4_decoder.h | 233 ---------------------- lz4hc.c | 13 +- xxhash.c | 160 +++++++++++---- xxhash.h | 37 ++-- 10 files changed, 1021 insertions(+), 433 deletions(-) create mode 100644 fullbench.c delete mode 100644 lz4_decoder.h diff --git a/Makefile b/Makefile index ea60d11..53ea0f4 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ endif default: lz4c -all: lz4c lz4c32 fuzzer +all: lz4c lz4c32 fuzzer fullbench lz4c: lz4.c lz4hc.c bench.c xxhash.c lz4c.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) @@ -21,5 +21,8 @@ lz4c32: lz4.c lz4hc.c bench.c xxhash.c lz4c.c fuzzer : lz4.c lz4hc.c fuzzer.c $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) +fullbench : lz4.c lz4hc.c xxhash.c fullbench.c + $(CC) -O3 $(CFLAGS) $^ -o $@$(EXT) + clean: - rm -f core *.o lz4c$(EXT) lz4c32$(EXT) fuzzer$(EXT) + rm -f core *.o lz4c$(EXT) lz4c32$(EXT) fuzzer$(EXT) fullbench$(EXT) diff --git a/bench.c b/bench.c index eef5cdb..f605249 100644 --- a/bench.c +++ b/bench.c @@ -65,7 +65,6 @@ #endif #include "lz4.h" -//int LZ4_compress_stack(const char* in, char* out, int size); #define COMPRESSOR0 LZ4_compress #include "lz4hc.h" #define COMPRESSOR1 LZ4_compressHC @@ -209,7 +208,7 @@ static size_t BMK_findMaxMem(U64 requiredMem) while (!testmem) { requiredMem -= step; - testmem = malloc ((size_t)requiredMem); + testmem = (BYTE*) malloc ((size_t)requiredMem); } free (testmem); @@ -294,11 +293,11 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) // Alloc chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); - orig_buff = malloc((size_t )benchedSize); + orig_buff = (char*)malloc((size_t )benchedSize); nbChunks = (int) (benchedSize / chunkSize) + 1; maxCChunkSize = LZ4_compressBound(chunkSize); compressed_buff_size = nbChunks * maxCChunkSize; - compressed_buff = malloc((size_t )compressed_buff_size); + compressed_buff = (char*)malloc((size_t )compressed_buff_size); if(!orig_buff || !compressed_buff) @@ -386,10 +385,11 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) while(BMK_GetMilliSpan(milliTime) < TIMELOOP) { for (chunkNb=0; chunkNb 1) - printf("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); + DISPLAY("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); - if (BMK_pause) { printf("press enter...\n"); getchar(); } + if (BMK_pause) { DISPLAY("press enter...\n"); getchar(); } return 0; } diff --git a/fullbench.c b/fullbench.c new file mode 100644 index 0000000..54b46f6 --- /dev/null +++ b/fullbench.c @@ -0,0 +1,622 @@ +/* + bench.c - Demo program to benchmark open-source compression algorithm + Copyright (C) Yann Collet 2012-2013 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ +*/ + +//************************************** +// Compiler Options +//************************************** +// Disable some Visual warning messages +#define _CRT_SECURE_NO_WARNINGS +#define _CRT_SECURE_NO_DEPRECATE // VS2005 + +// Unix Large Files support (>4GB) +#if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions +# define _LARGEFILE_SOURCE +# define _FILE_OFFSET_BITS 64 +#elif ! defined(__LP64__) // No point defining Large file for 64 bit +# define _LARGEFILE64_SOURCE +#endif + +// S_ISREG & gettimeofday() are not supported by MSVC +#if defined(_MSC_VER) +# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +# define BMK_LEGACY_TIMER 1 +#endif + +// GCC does not support _rotl outside of Windows +#if !defined(_WIN32) +# define _rotl(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +//************************************** +// Includes +//************************************** +#include // malloc +#include // fprintf, fopen, ftello64 +#include // stat64 +#include // stat64 + +// Use ftime() if gettimeofday() is not available on your target +#if defined(BMK_LEGACY_TIMER) +# include // timeb, ftime +#else +# include // gettimeofday +#endif + +#include "lz4.h" +#define COMPRESSOR0 LZ4_compress +#include "lz4hc.h" +#define COMPRESSOR1 LZ4_compressHC +#define DEFAULTCOMPRESSOR COMPRESSOR0 + +#include "xxhash.h" + + +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + + +//**************************** +// Constants +//**************************** +#define COMPRESSOR_NAME "Full LZ4 speed analyzer" +#define COMPRESSOR_VERSION "" +#define COMPILED __DATE__ +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %s, by %s (%s) ***\n", COMPRESSOR_NAME, COMPRESSOR_VERSION, AUTHOR, COMPILED + +#define NBLOOPS 6 +#define TIMELOOP 2500 + +#define KNUTH 2654435761U +#define MAX_MEM (1984<<20) +#define DEFAULT_CHUNKSIZE (4<<20) + + +//************************************** +// Local structures +//************************************** +struct chunkParameters +{ + U32 id; + char* origBuffer; + char* compressedBuffer; + int origSize; + int compressedSize; +}; + + +//************************************** +// MACRO +//************************************** +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + + + +//************************************** +// Benchmark Parameters +//************************************** +static int chunkSize = DEFAULT_CHUNKSIZE; +static int nbIterations = NBLOOPS; +static int BMK_pause = 0; + +void BMK_SetBlocksize(int bsize) +{ + chunkSize = bsize; + DISPLAY("-Using Block Size of %i KB-\n", chunkSize>>10); +} + +void BMK_SetNbIterations(int nbLoops) +{ + nbIterations = nbLoops; + DISPLAY("- %i iterations -\n", nbIterations); +} + +void BMK_SetPause() +{ + BMK_pause = 1; +} + +//********************************************************* +// Private functions +//********************************************************* + +#if defined(BMK_LEGACY_TIMER) + +static int BMK_GetMilliStart() +{ + // Based on Legacy ftime() + // Rolls over every ~ 12.1 days (0x100000/24/60/60) + // Use GetMilliSpan to correct for rollover + struct timeb tb; + int nCount; + ftime( &tb ); + nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); + return nCount; +} + +#else + +static int BMK_GetMilliStart() +{ + // Based on newer gettimeofday() + // Use GetMilliSpan to correct for rollover + struct timeval tv; + int nCount; + gettimeofday(&tv, NULL); + nCount = (int) (tv.tv_usec/1000 + (tv.tv_sec & 0xfffff) * 1000); + return nCount; +} + +#endif + + +static int BMK_GetMilliSpan( int nTimeStart ) +{ + int nSpan = BMK_GetMilliStart() - nTimeStart; + if ( nSpan < 0 ) + nSpan += 0x100000 * 1000; + return nSpan; +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t step = (64U<<20); // 64 MB + BYTE* testmem=NULL; + + requiredMem = (((requiredMem >> 25) + 1) << 26); + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + requiredMem += 2*step; + while (!testmem) + { + requiredMem -= step; + testmem = (BYTE*) malloc ((size_t)requiredMem); + } + + free (testmem); + return (size_t) (requiredMem - step); +} + + +static U64 BMK_GetFileSize(char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); +#else + struct stat statbuf; + r = stat(infilename, &statbuf); +#endif + if (r || !S_ISREG(statbuf.st_mode)) return 0; // No good... + return (U64)statbuf.st_size; +} + + +//********************************************************* +// Public function +//********************************************************* + +static inline int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSize) +{ + return LZ4_compress_limitedOutput(in, out, inSize, LZ4_compressBound(inSize)); +} + +static inline int local_LZ4_compressHC_limitedOutput(const char* in, char* out, int inSize) +{ + return LZ4_compressHC_limitedOutput(in, out, inSize, LZ4_compressBound(inSize)); +} + +static inline int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast(in, out, outSize); + return outSize; +} + +static inline int local_LZ4_decompress_fast_withPrefix64k(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast_withPrefix64k(in, out, outSize); + return outSize; +} + +static inline int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize) +{ + return LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); +} + +int fullSpeedBench(char** fileNamesTable, int nbFiles) +{ + int fileIdx=0; + FILE* fileIn; + char* infilename; + U64 largefilesize; + size_t benchedSize; + int nbChunks; + int maxCChunkSize; + size_t readSize; + char* orig_buff; + char* compressed_buff; int compressed_buff_size; + struct chunkParameters* chunkP; + U32 crcc, crcd=0; +# define NB_COMPRESSION_ALGORITHMS 4 + static char* compressionNames[] = { "LZ4_compress", "LZ4_compressHC", "LZ4_compressHC_limitedOutput", "LZ4_compress_limitedOutput" }; + double totalCTime[NB_COMPRESSION_ALGORITHMS] = {0}; + double totalCSize[NB_COMPRESSION_ALGORITHMS] = {0}; +# define NB_DECOMPRESSION_ALGORITHMS 5 + static char* decompressionNames[] = { "LZ4_decompress_fast", "LZ4_decompress_fast_withPrefix64k", "LZ4_decompress_safe", "LZ4_decompress_safe_withPrefix64k", "LZ4_decompress_safe_partial" }; + double totalDTime[NB_DECOMPRESSION_ALGORITHMS] = {0}; + + U64 totals = 0; + + + // Loop for each file + while (fileIdx largefilesize) benchedSize = (size_t)largefilesize; + if (benchedSize < largefilesize) + { + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20)); + } + + // Alloc + chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); + orig_buff = (char*) malloc((size_t)benchedSize); + nbChunks = (int) (benchedSize / chunkSize) + 1; + maxCChunkSize = LZ4_compressBound(chunkSize); + compressed_buff_size = nbChunks * maxCChunkSize; + compressed_buff = (char*)malloc((size_t)compressed_buff_size); + + + if(!orig_buff || !compressed_buff) + { + DISPLAY("\nError: not enough memory!\n"); + free(orig_buff); + free(compressed_buff); + fclose(fileIn); + return 12; + } + + // Init chunks data + { + int i; + size_t remaining = benchedSize; + char* in = orig_buff; + char* out = compressed_buff; + for (i=0; i chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } + chunkP[i].compressedBuffer = out; out += maxCChunkSize; + chunkP[i].compressedSize = 0; + } + } + + // Fill input buffer + DISPLAY("Loading %s... \r", infilename); + readSize = fread(orig_buff, 1, benchedSize, fileIn); + fclose(fileIn); + + if(readSize != benchedSize) + { + DISPLAY("\nError: problem reading file '%s' !! \n", infilename); + free(orig_buff); + free(compressed_buff); + return 13; + } + + // Calculating input Checksum + crcc = XXH32(orig_buff, (unsigned int)benchedSize,0); + + + // Bench + { + int loopNb, nb_loops, chunkNb, cAlgNb, dAlgNb; + size_t cSize=0; + double ratio=0.; + + DISPLAY("\r%79s\r", ""); + DISPLAY(" %s : \n", infilename); + + // Compression Algorithms + for (cAlgNb=0; cAlgNb < NB_COMPRESSION_ALGORITHMS; cAlgNb++) + { + char* cName = compressionNames[cAlgNb]; + int (*compressionFunction)(const char*, char*, int); + double bestTime = 100000000.; + + switch(cAlgNb) + { + case 0: compressionFunction = LZ4_compress; break; + case 1: compressionFunction = LZ4_compressHC; break; + case 2: compressionFunction = local_LZ4_compressHC_limitedOutput; break; + case 3: compressionFunction = local_LZ4_compress_limitedOutput; break; + default : DISPLAY("ERROR ! Bad algorithm Id !! \n"); return 1; + } + + for (loopNb = 1; loopNb <= nbIterations; loopNb++) + { + double averageTime; + int milliTime; + + DISPLAY("%1i-%-19.19s : %9i ->\r", loopNb, cName, (int)benchedSize); + { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s\r", loopNb, cName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000.); + } + + if (ratio<100.) + DISPLAY("%-21.21s : %9i -> %9i (%5.2f%%),%7.1f MB/s\n", cName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000.); + else + DISPLAY("%-21.21s : %9i -> %9i (%5.1f%%),%7.1f MB/s\n", cName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000.); + + totalCTime[cAlgNb] += bestTime; + totalCSize[cAlgNb] += cSize; + } + + { size_t i; for (i=0; i\r", loopNb, dName, (int)benchedSize); + + nb_loops = 0; + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliStart() == milliTime); + milliTime = BMK_GetMilliStart(); + while(BMK_GetMilliSpan(milliTime) < TIMELOOP) + { + for (chunkNb=0; chunkNb %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); + } + + // CRC Checking + crcd = XXH32(orig_buff, (int)benchedSize, 0); + if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); exit(1); } + DISPLAY("%-21.21s : %9i -> %7.1f MB/s\n", dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.); + + totalDTime[dAlgNb] += bestTime; + } + + totals += benchedSize; + } + + free(orig_buff); + free(compressed_buff); + free(chunkP); + } + + if (nbFiles > 1) + { + int AlgNb; + + DISPLAY(" TOTAL : \n"); + for (AlgNb = 0; AlgNb < NB_COMPRESSION_ALGORITHMS; AlgNb ++) + { + char* cName = compressionNames[AlgNb]; + DISPLAY("%-21.21s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s\n", cName, (long long unsigned int)totals, (long long unsigned int)totalCSize[AlgNb], (double)totalCSize[AlgNb]/(double)totals*100., (double)totals/totalCTime[AlgNb]/1000.); + } + for (AlgNb = 0; AlgNb < NB_DECOMPRESSION_ALGORITHMS; AlgNb ++) + { + char* dName = decompressionNames[AlgNb]; + DISPLAY("%-21.21s :%10llu -> %6.1f MB/s\n", dName, (long long unsigned int)totals, (double)totals/totalDTime[AlgNb]/1000.); + } + } + + if (BMK_pause) { printf("press enter...\n"); getchar(); } + + return 0; +} + + +int usage(char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " -H : Help (this text + advanced options)\n"); + return 0; +} + +int usage_advanced() +{ + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -B# : Block size [4-7](default : 7)\n"); + //DISPLAY( " -BD : Block dependency (improve compression ratio)\n"); + DISPLAY( " -i# : iteration loops [1-9](default : 6)\n"); + return 0; +} + +int badusage(char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 0; +} + +int main(int argc, char** argv) +{ + int i, + filenamesStart=2; + char* exename=argv[0]; + char* input_filename=0; + + // Welcome message + DISPLAY( WELCOME_MESSAGE); + + if (argc<2) { badusage(exename); return 1; } + + for(i=1; i='1') && (argument[1] <='9')) + { + int iters = argument[1] - '0'; + BMK_SetNbIterations(iters); + argument++; + } + break; + + // Pause at the end (benchmark only) (hidden option) + case 'p': BMK_SetPause(); break; + + // Unrecognised command + default : badusage(exename); return 1; + } + } + continue; + } + + // first provided filename is input + if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + + } + + // No input filename ==> Error + if(!input_filename) { badusage(exename); return 1; } + + return fullSpeedBench(argv+filenamesStart, argc-filenamesStart); + +} + diff --git a/fuzzer.c b/fuzzer.c index c8bb5d9..44ca885 100644 --- a/fuzzer.c +++ b/fuzzer.c @@ -1,7 +1,7 @@ /* fuzzer.c - Fuzzer test tool for LZ4 - Copyright (C) Andrew Mahone - Yann Collet 2012-2013 - Original code by Andrew Mahone / Modified by Yann Collet + Copyright (C) Yann Collet - Andrew Mahone 2012-2013 + Code started by Andrew Mahone, modified by Yann Collet GPL v2 License This program is free software; you can redistribute it and/or modify @@ -56,25 +56,25 @@ #define PRIME3 3266489917U - //********************************************************* // Functions //********************************************************* static int FUZ_GetMilliStart() { - struct timeb tb; - int nCount; - ftime( &tb ); - nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); - return nCount; + struct timeb tb; + int nCount; + ftime( &tb ); + nCount = (int) (tb.millitm + (tb.time & 0xfffff) * 1000); + return nCount; } + static int FUZ_GetMilliSpan( int nTimeStart ) { - int nSpan = FUZ_GetMilliStart() - nTimeStart; - if ( nSpan < 0 ) - nSpan += 0x100000 * 1000; - return nSpan; + int nSpan = FUZ_GetMilliStart() - nTimeStart; + if ( nSpan < 0 ) + nSpan += 0x100000 * 1000; + return nSpan; } @@ -85,14 +85,16 @@ unsigned int FUZ_rand(unsigned int* src) } -int test_canary(unsigned char *buf) { - int i; - for (i = 0; i < 2048; i++) - if (buf[i] != buf[i + 2048]) - return 0; - return 1; +int test_canary(unsigned char *buf) +{ + int i; + for (i = 0; i < 2048; i++) + if (buf[i] != buf[i + 2048]) + return 0; + return 1; } + int FUZ_SecurityTest() { char* output; @@ -127,9 +129,10 @@ int main() { # define FUZ_avail ROUND_PAGE(FUZ_max) const int off_full = FUZ_avail - FUZ_max; unsigned char cbuf[FUZ_avail + PAGE_SIZE]; - unsigned int seed, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); - int i, j, k, ret, len, lenHC; + unsigned int seed, randState, cur_seq=PRIME3, seeds[NUM_SEQ], timestamp=FUZ_GetMilliStart(); + int i, j, k, ret, len, lenHC, attemptNb; char userInput[30] = {0}; +# define FUZ_CHECKTEST(cond, message) testNb++; if (cond) { printf("Test %i : %s : seed %u, cycle %u \n", testNb, message, seed, attemptNb); goto _output_error; } printf("starting LZ4 fuzzer\n"); printf("Select an Initialisation number (default : random) : "); @@ -140,96 +143,108 @@ int main() { else seed = FUZ_GetMilliSpan(timestamp); } printf("Seed = %u\n", seed); + randState = seed; FUZ_SecurityTest(); for (i = 0; i < 2048; i++) - cbuf[FUZ_avail + i] = cbuf[FUZ_avail + 2048 + i] = FUZ_rand(&seed) >> 16; + cbuf[FUZ_avail + i] = cbuf[FUZ_avail + 2048 + i] = FUZ_rand(&randState) >> 16; - for (i = 0; i < NB_ATTEMPTS; i++) + for (attemptNb = 0; attemptNb < NB_ATTEMPTS; attemptNb++) { - printf("\r%7i /%7i\r", i, NB_ATTEMPTS); + int testNb = 0; + + printf("\r%7i /%7i\r", attemptNb, NB_ATTEMPTS); - FUZ_rand(&seed); for (j = 0; j < NUM_SEQ; j++) { - seeds[j] = FUZ_rand(&seed) << 8; - seeds[j] ^= (FUZ_rand(&seed) >> 8) & 65535; + seeds[j] = FUZ_rand(&randState) << 8; + seeds[j] ^= (FUZ_rand(&randState) >> 8) & 65535; } for (j = 0; j < LEN; j++) { - k = FUZ_rand(&seed); + k = FUZ_rand(&randState); if (j == 0 || NEW_SEQ(k)) - cur_seq = seeds[(FUZ_rand(&seed) >> 16) & SEQ_MSK]; + cur_seq = seeds[(FUZ_rand(&randState) >> 16) & SEQ_MSK]; if (MOD_SEQ(k)) { - k = (FUZ_rand(&seed) >> 16) & SEQ_MSK; - seeds[k] = FUZ_rand(&seed) << 8; - seeds[k] ^= (FUZ_rand(&seed) >> 8) & 65535; + k = (FUZ_rand(&randState) >> 16) & SEQ_MSK; + seeds[k] = FUZ_rand(&randState) << 8; + seeds[k] ^= (FUZ_rand(&randState) >> 8) & 65535; } buf[j] = FUZ_rand(&cur_seq) >> 16; } // Test compression HC ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); - if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space"); lenHC = ret; // Test compression ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[off_full], LEN, FUZ_max); - if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space"); len = ret; // Test decoding with output size being exactly what's necessary => must work ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN); - if (ret<0) { printf("decompression failed despite correct space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret<0, "decompression failed despite correct space"); // Test decoding with one byte missing => must fail ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN-1); - if (ret>=0) { printf("decompression should have failed, due to Output Size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too small"); // Test decoding with one byte too much => must fail ret = LZ4_decompress_fast((char*)&cbuf[off_full], (char*)testOut, LEN+1); - if (ret>=0) { printf("decompression should have failed, due to Output Size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to Output Size being too large"); // Test decoding with enough output size => must work ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN+1); - if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space"); // Test decoding with output size being exactly what's necessary => must work ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN); - if (ret<0) { printf("decompression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret<0, "decompression failed despite sufficient space"); // Test decoding with output size being one byte too short => must fail ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len, LEN-1); - if (ret>=0) { printf("decompression should have failed, due to Output Size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short"); // Test decoding with input size being one byte too short => must fail ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len-1, LEN); - if (ret>=0) { printf("decompression should have failed, due to input size being too small : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short"); // Test decoding with input size being one byte too large => must fail ret = LZ4_decompress_safe((char*)&cbuf[off_full], (char*)testOut, len+1, LEN); - if (ret>=0) { printf("decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret>=0, "decompression should have failed, due to input size being too large"); + //if (ret>=0) { printf("Test 10 : decompression should have failed, due to input size being too large : seed %u, len %d\n", seed, LEN); goto _output_error; } + + // Test partial decoding with target output size being max/2 => must work + ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN/2, LEN); + FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space"); + + // Test partial decoding with target output size being just below max => must work + ret = LZ4_decompress_safe_partial((char*)&cbuf[off_full], (char*)testOut, len, LEN-3, LEN); + FUZ_CHECKTEST(ret<0, "partial decompression failed despite sufficient space"); // Test compression with output size being exactly what's necessary (should work) ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, len); - if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d\n", seed, LEN, len); goto _output_error; } - if (ret == 0) { printf("compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); + FUZ_CHECKTEST(ret==0, "compression failed despite sufficient space"); // Test HC compression with output size being exactly what's necessary (should work) ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-len], LEN, lenHC); - if (ret == 0) { printf("HC compression failed despite sufficient space: seed %u, len %d\n", seed, LEN); goto _output_error; } + FUZ_CHECKTEST(ret==0, "HC compression failed despite sufficient space"); // Test compression with just one missing byte into output buffer => must fail ret = LZ4_compress_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, len-1); - if (ret) { printf("compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, len-1, ret); goto _output_error; } - if (!test_canary(&cbuf[FUZ_avail])) { printf("compression overran output buffer: seed %u, len %d, olen %d", seed, LEN, len-1); goto _output_error; } + FUZ_CHECKTEST(ret, "compression overran output buffer"); + FUZ_CHECKTEST(!test_canary(&cbuf[FUZ_avail]), "compression overran output buffer"); // Test HC compression with just one missing byte into output buffer => must fail ret = LZ4_compressHC_limitedOutput((const char*)buf, (char*)&cbuf[FUZ_avail-(len-1)], LEN, lenHC-1); - if (ret) { printf("HC compression overran output buffer: seed %u, len %d, olen %d => ret %d", seed, LEN, lenHC-1, ret); goto _output_error; } + FUZ_CHECKTEST(ret, "HC compression overran output buffer"); bytes += LEN; cbytes += len; hcbytes += lenHC; + FUZ_rand(&randState); } printf("all tests completed successfully \n"); diff --git a/lz4.c b/lz4.c index 327227e..91819ad 100644 --- a/lz4.c +++ b/lz4.c @@ -32,7 +32,7 @@ */ /* -Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" +Note : this source file requires "lz4_encoder.h" */ //************************************** @@ -64,9 +64,10 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" // CPU Feature Detection //************************************** // 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) \ - || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) \ - || defined(__ia64__) ) // Detects 64 bits mode +#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \ + || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \ + || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \ + || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) ) // Detects 64 bits mode # define LZ4_ARCH64 1 #else # define LZ4_ARCH64 0 @@ -82,7 +83,7 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) # define LZ4_BIG_ENDIAN 1 #elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ || defined(__hpux) || defined(__hppa) \ || defined(_MIPSEB) || defined(__s390__) # define LZ4_BIG_ENDIAN 1 @@ -218,7 +219,7 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; //************************************** // Architecture-specific macros //************************************** -#if LZ4_ARCH64 // 64-bit +#if LZ4_ARCH64 // 64-bit # define STEPSIZE 8 # define UARCH U64 # define AARCH A64 @@ -227,7 +228,7 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; # define LZ4_SECURECOPY(s,d,e) if (d oend-MFLIMIT)) oexit = oend-MFLIMIT; // targetOutputSize too large, better decode everything + if unlikely(outputSize==0) goto _output_error; // Empty output buffer + + + // Main Loop + while (1) + { + unsigned token; + size_t length; + + // get runlength + token = *ip++; + if ((length=(token>>ML_BITS)) == RUN_MASK) + { + unsigned s=255; + while (((endOnInput)?ip(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) ) + || ((!endOnInput) && (cpy>oend-COPYLENGTH))) + { + if (partialDecoding) + { + if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer + if ((endOnInput) && (ip+length > iend)) goto _output_error; // Error : read attempt beyond end of input buffer + } + else + { + if ((!endOnInput) && (cpy != oend)) goto _output_error; // Error : block decoding must stop exactly there, due to parsing restrictions + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals + } + memcpy(op, ip, length); + ip += length; + op += length; + break; // Necessarily EOF, due to parsing restrictions + } + LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; + + // get offset + LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; + if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error; // Error : offset outside destination buffer + + // get matchlength + if ((length=(token&ML_MASK)) == ML_MASK) + { + while (endOnInput ? ipoend-(COPYLENGTH)-(STEPSIZE-4)) + { + if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals + LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); + while(op= oexit) -#else -# define OUTPUTTARGET(cpy,oexit) (0) -#endif - - - - -//**************************** -// Function code -//**************************** - -int FUNCTION_NAME(const char* source, - char* dest, -#ifdef EXITCONDITION_INPUTSIZE - int inputSize, -#endif -#ifdef PARTIAL_DECODING - int targetOutputSize, -#endif - int outputSize - ) -{ - // Local Variables - const BYTE* restrict ip = (const BYTE*) source; - const BYTE* ref; -#ifdef EXITCONDITION_INPUTSIZE - const BYTE* const iend = ip + inputSize; -#endif - - BYTE* op = (BYTE*) dest; - BYTE* const oend = op + outputSize; - BYTE* cpy; -#ifdef PARTIAL_DECODING - BYTE* const oexit = op + targetOutputSize; -#endif - - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; -#endif - - -#ifdef EXITCONDITION_INPUTSIZE - // Special case - if unlikely(!inputSize) goto _output_error; // A correctly formed null-compressed LZ4 must have at least one byte (token=0) -#endif - - // Main Loop - while (1) - { - unsigned token; - size_t length; - - // get runlength - token = *ip++; - if ((length=(token>>ML_BITS)) == RUN_MASK) - { - unsigned s=255; - while (INPUTBUFFER_CONTROL(ip,iend) && (s==255)) - { - s=*ip++; - length += s; - } - } - - // copy literals - cpy = op+length; -#ifdef EXITCONDITION_INPUTSIZE - if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS)) || OUTPUTTARGET(cpy,oexit)) - { - if (cpy > oend) goto _output_error; // Error : write attempt beyond end of output buffer - if ((!OUTPUTTARGET(cpy,oexit)) && (ip+length != iend)) goto _output_error; // Error : Must consume all input at this stage, except if reaching TargetOutputSize -#else - if (cpy>oend-COPYLENGTH) - { - if (cpy != oend) goto _output_error; // Error : not enough place for another match (min 4) + 5 literals -#endif - memcpy(op, ip, length); - ip += length; - op += length; - break; // Necessarily EOF, due to parsing restrictions - } - LZ4_WILDCOPY(ip, op, cpy); ip -= (op-cpy); op = cpy; - - // get offset - LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2; -#ifndef PREFIX_64K - if unlikely(ref < (BYTE* const)dest) goto _output_error; // Error : offset outside destination buffer -#endif - - // get matchlength - if ((length=(token&ML_MASK)) == ML_MASK) - { - while INPUTBUFFER_CONTROL(ip,iend-(LASTLITERALS+1)) // A minimum nb of input bytes must remain for LASTLITERALS + token - { - unsigned s = *ip++; - length += s; - if (s==255) continue; - break; - } - } - - // copy repeated sequence - if unlikely((op-ref)oend-(COPYLENGTH)-(STEPSIZE-4)) - { - if (cpy > oend-LASTLITERALS) goto _output_error; // Error : last 5 bytes must be literals - LZ4_SECURECOPY(ref, op, (oend-COPYLENGTH)); - while(op // For Visual 2005 # if LZ4_ARCH64 // 64-bit # pragma intrinsic(_BitScanForward64) // For Visual 2005 @@ -205,6 +205,7 @@ typedef struct _U64_S { U64 v; } _PACKED U64_S; #define MB *(1U<<20) #define GB *(1U<<30) + //************************************** // Architecture-specific macros //************************************** diff --git a/xxhash.c b/xxhash.c index 3c5f560..6dacdcb 100644 --- a/xxhash.c +++ b/xxhash.c @@ -35,6 +35,14 @@ You can contact the author at : //************************************** // Tuning parameters //************************************** +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. +// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. +// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). +#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USE_UNALIGNED_ACCESS 1 +#endif + // XXH_ACCEPT_NULL_INPUT_POINTER : // If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input. // If this option is enabled, xxHash output for null input pointers will be the same as a null-length input. @@ -45,21 +53,33 @@ You can contact the author at : // XXH_FORCE_NATIVE_FORMAT : // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. // Results are therefore identical for little-endian and big-endian CPU. -// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independance be of no importance to your application, you may uncomment the #define below +// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. +// Should endian-independance be of no importance for your application, you may uncomment the #define below. // It will improve speed for Big-endian CPU. // This option has no impact on Little_Endian CPU. //#define XXH_FORCE_NATIVE_FORMAT 1 +//************************************** +// Compiler Options +//************************************** +#if defined(_MSC_VER) && !defined(__cplusplus) // Visual Studio +# define inline __inline // Visual C is not C99, but supports some kind of inline +#endif + //************************************** -// Includes +// Includes & Memory related functions //************************************** -#include // for malloc(), free() -#include // for memcpy() #include "xxhash.h" - +// Modify the local functions below should you wish to use some other memory related routines +// for malloc(), free() +#include +static inline void* XXH_malloc(size_t s) { return malloc(s); } +static inline void XXH_free (void* p) { free(p); } +// for memcpy() +#include +static inline void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } //************************************** @@ -77,8 +97,8 @@ You can contact the author at : # endif #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) # define XXH_BIG_ENDIAN 1 -#elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ +#elif defined(__sparc) || defined(__sparc__) \ + || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \ || defined(__hpux) || defined(__hppa) \ || defined(_MIPSEB) || defined(__s390__) # define XXH_BIG_ENDIAN 1 @@ -101,21 +121,39 @@ You can contact the author at : typedef int32_t S32; typedef uint64_t U64; #else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; #endif +#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif -//************************************** -// Compiler-specific Options & Functions -//************************************** +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(push, 1) +#endif + +typedef struct _U32_S { U32 v; } _PACKED U32_S; + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A32(x) (((U32_S *)(x))->v) + + +//*************************************** +// Compiler-specific Functions and Macros +//*************************************** #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -// Note : under GCC, it may sometimes be faster to enable the (2nd) macro definition, instead of using win32 intrinsic -#if defined(_WIN32) +// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +#if defined(_MSC_VER) # define XXH_rotl32(x,r) _rotl(x,r) #else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) @@ -147,7 +185,9 @@ static inline U32 XXH_swap32 (U32 x) { //************************************** // Macros //************************************** -#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p)) +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations +#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(A32(p)) : A32(p)) +#define XXH_alignedLE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p)) @@ -155,6 +195,53 @@ static inline U32 XXH_swap32 (U32 x) { // Simple Hash Functions //**************************** +#if !defined(XXH_USE_UNALIGNED_ACCESS) +// Specific version, for aligned 32-bits input. Useless for CPU supporting unaligned access. +static U32 XXH32_alignedInput(const void* input, int len, U32 seed) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U32 h32; + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + do + { + v1 += XXH_alignedLE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_alignedLE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_alignedLE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_alignedLE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else { h32 = seed + PRIME32_5; } + h32 += (U32) len; + while (p<=bEnd-4) + { + h32 += XXH_alignedLE32(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + return h32; +} +#endif + U32 XXH32(const void* input, int len, U32 seed) { #if 0 @@ -172,6 +259,10 @@ U32 XXH32(const void* input, int len, U32 seed) if (p==NULL) { len=0; p=(const BYTE*)16; } #endif +#if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((U32)p) & 3) == 0) return XXH32_alignedInput(input, len, seed); // Input is aligned, let's leverage the speed advantage +#endif + if (len>=16) { const BYTE* const limit = bEnd - 16; @@ -229,21 +320,25 @@ U32 XXH32(const void* input, int len, U32 seed) struct XXH_state32_t { + U64 total_len; U32 seed; U32 v1; U32 v2; U32 v3; U32 v4; - U64 total_len; - char memory[16]; int memsize; + char memory[16]; }; -int XXH32_sizeofState() { return sizeof(struct XXH_state32_t); } +int XXH32_sizeofState() +{ + XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state32_t); +} -XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed) +XXH_errorcode XXH32_resetState(void* state_in, U32 seed) { struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; state->seed = seed; @@ -253,15 +348,15 @@ XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed) state->v4 = seed - PRIME32_1; state->total_len = 0; state->memsize = 0; - return OK; + return XXH_OK; } void* XXH32_init (U32 seed) { - struct XXH_state32_t * state = (struct XXH_state32_t *) malloc (sizeof(struct XXH_state32_t)); + void* state = XXH_malloc (sizeof(struct XXH_state32_t)); XXH32_resetState(state, seed); - return (void*)state; + return state; } @@ -279,14 +374,14 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) if (state->memsize + len < 16) // fill in tmp buffer { - memcpy(state->memory + state->memsize, input, len); + XXH_memcpy(state->memory + state->memsize, input, len); state->memsize += len; - return OK; + return XXH_OK; } if (state->memsize) // some data left from previous update { - memcpy(state->memory + state->memsize, input, 16-state->memsize); + XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); { const U32* p32 = (const U32*)state->memory; state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; @@ -322,11 +417,11 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, int len) if (p < bEnd) { - memcpy(state->memory, p, bEnd-p); + XXH_memcpy(state->memory, p, bEnd-p); state->memsize = (int)(bEnd-p); } - return OK; + return XXH_OK; } @@ -337,7 +432,6 @@ U32 XXH32_intermediateDigest (void* state_in) BYTE* bEnd = (BYTE*)state->memory + state->memsize; U32 h32; - if (state->total_len >= 16) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); @@ -377,7 +471,7 @@ U32 XXH32_digest (void* state_in) { U32 h32 = XXH32_intermediateDigest(state_in); - free(state_in); + XXH_free(state_in); return h32; } diff --git a/xxhash.h b/xxhash.h index afdf243..8cb06d3 100644 --- a/xxhash.h +++ b/xxhash.h @@ -27,8 +27,8 @@ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ + You can contact the author at : + - xxHash source repository : http://code.google.com/p/xxhash/ */ /* Notice extracted from xxHash homepage : @@ -67,7 +67,7 @@ extern "C" { //**************************** // Type //**************************** -typedef enum { OK=0, XXH_ERROR } XXH_errorcode; +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; @@ -79,13 +79,13 @@ unsigned int XXH32 (const void* input, int len, unsigned int seed); /* XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". + Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". The memory between input & input+len must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - This function successfully passes all SMHasher tests. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. + "seed" can be used to alter the result predictably. + This function successfully passes all SMHasher tests. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + Note that "len" is type "int", which means it is limited to 2^31-1. + If your data is larger, use the advanced functions below. */ @@ -122,14 +122,19 @@ Memory will be freed by XXH32_digest(). int XXH32_sizeofState(); -XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed); +XXH_errorcode XXH32_resetState(void* state, unsigned int seed); + +#define XXH32_SIZEOFSTATE 48 +typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; /* -These functions are the basic elements of XXH32_init(); -The objective is to allow user application to make its own allocation. +These functions allow user application to make its own allocation for state. + +XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. +Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. +This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. -XXH32_sizeofState() is used to know how much space must be allocated by the application. -This space must be referenced by a void* pointer. -This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state. +For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), +use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. */ @@ -138,7 +143,7 @@ unsigned int XXH32_intermediateDigest (void* state); This function does the same as XXH32_digest(), generating a 32-bit hash, but preserve memory context. This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). -To free memory context, use XXH32_digest(). +To free memory context, use XXH32_digest(), or free(). */ -- cgit v0.12