From 94917c9a04ce08fcdb6b465b4aff38d2d82053aa Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Wed, 9 Nov 2016 16:20:47 -0800 Subject: Add dictionary random access example --- examples/.gitignore | 1 + examples/Makefile | 16 ++- examples/README.md | 1 + examples/dictionaryRandomAccess.c | 280 +++++++++++++++++++++++++++++++++++++ examples/dictionaryRandomAccess.md | 67 +++++++++ 5 files changed, 359 insertions(+), 6 deletions(-) create mode 100644 examples/dictionaryRandomAccess.c create mode 100644 examples/dictionaryRandomAccess.md diff --git a/examples/.gitignore b/examples/.gitignore index 4893866..3ceb90d 100644 --- a/examples/.gitignore +++ b/examples/.gitignore @@ -1,6 +1,7 @@ /Makefile.lz4* /printVersion /doubleBuffer +/dictionaryRandomAccess /ringBuffer /ringBufferHC /lineCompress diff --git a/examples/Makefile b/examples/Makefile index c8caf24..aad713b 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -48,7 +48,7 @@ endif default: all -all: printVersion doubleBuffer ringBuffer ringBufferHC lineCompress frameCompress +all: printVersion doubleBuffer dictionaryRandomAccess ringBuffer ringBufferHC lineCompress frameCompress printVersion: $(LZ4DIR)/lz4.c printVersion.c $(CC) $(FLAGS) $^ -o $@$(EXT) @@ -56,6 +56,9 @@ printVersion: $(LZ4DIR)/lz4.c printVersion.c doubleBuffer: $(LZ4DIR)/lz4.c blockStreaming_doubleBuffer.c $(CC) $(FLAGS) $^ -o $@$(EXT) +dictionaryRandomAccess: $(LZ4DIR)/lz4.c dictionaryRandomAccess.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + ringBuffer : $(LZ4DIR)/lz4.c blockStreaming_ringBuffer.c $(CC) $(FLAGS) $^ -o $@$(EXT) @@ -66,7 +69,7 @@ lineCompress: $(LZ4DIR)/lz4.c blockStreaming_lineByLine.c $(CC) $(FLAGS) $^ -o $@$(EXT) frameCompress: frameCompress.c - $(CC) $(FLAGS) $^ -o $@$(EXT) -L$(LZ4DIR) -llz4 + $(CC) $(FLAGS) $^ -o $@$(EXT) $(LZ4DIR)/liblz4.a compressFunctions: $(LZ4DIR)/lz4.c compress_functions.c $(CC) $(FLAGS) $^ -o $@$(EXT) -lrt @@ -77,15 +80,16 @@ simpleBuffer: $(LZ4DIR)/lz4.c simple_buffer.c test : all ./printVersion$(EXT) ./doubleBuffer$(EXT) $(TESTFILE) + ./dictionaryRandomAccess$(EXT) $(TESTFILE) $(TESTFILE) 1100 1400 ./ringBuffer$(EXT) $(TESTFILE) ./ringBufferHC$(EXT) $(TESTFILE) ./lineCompress$(EXT) $(TESTFILE) - LD_LIBRARY_PATH=$(LZ4DIR) ./frameCompress$(EXT) $(TESTFILE) + ./frameCompress$(EXT) $(TESTFILE) $(LZ4) -vt $(TESTFILE).lz4 clean: @rm -f core *.o *.dec *-0 *-9 *-8192 *.lz4s *.lz4 \ - printVersion$(EXT) doubleBuffer$(EXT) ringBuffer$(EXT) ringBufferHC$(EXT) \ - lineCompress$(EXT) frameCompress$(EXT) compressFunctions$(EXT) simpleBuffer$(EXT) + printVersion$(EXT) doubleBuffer$(EXT) dictionaryRandomAccess$(EXT) \ + ringBuffer$(EXT) ringBufferHC$(EXT) lineCompress$(EXT) frameCompress$(EXT) \ + compressFunctions$(EXT) simpleBuffer$(EXT) @echo Cleaning completed - diff --git a/examples/README.md b/examples/README.md index 74527d4..e6839e9 100644 --- a/examples/README.md +++ b/examples/README.md @@ -8,3 +8,4 @@ All examples are GPL-v2 licensed. - Examples - [Double Buffer](blockStreaming_doubleBuffer.md) - [Line by Line Text Compression](blockStreaming_lineByLine.md) + - [Dictionary Random Access](dictionaryRandomAccess.md) diff --git a/examples/dictionaryRandomAccess.c b/examples/dictionaryRandomAccess.c new file mode 100644 index 0000000..6acf99b --- /dev/null +++ b/examples/dictionaryRandomAccess.c @@ -0,0 +1,280 @@ +// LZ4 API example : Dictionary Random Access + +#ifdef _MSC_VER /* Visual Studio */ +# define _CRT_SECURE_NO_WARNINGS +# define snprintf sprintf_s +#endif +#include "lz4.h" + +#include +#include +#include +#include + +#define MIN(x, y) (x) < (y) ? (x) : (y) + +enum { + BLOCK_BYTES = 1024, /* 1 KiB of uncompressed data in a block */ + DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */ + MAX_BLOCKS = 1024 /* For simplicity of implementation */ +}; + +/** + * Magic bytes for this test case. + * This is not a great magic number because it is a common word in ASCII. + * However, it is important to have some versioning system in your format. + */ +const char kTestMagic[] = { 'T', 'E', 'S', 'T' }; + + +void write_int(FILE* fp, int i) { + size_t written = fwrite(&i, sizeof(i), 1, fp); + if (written != 1) { exit(10); } +} + +void write_bin(FILE* fp, const void* array, size_t arrayBytes) { + size_t written = fwrite(array, 1, arrayBytes, fp); + if (written != arrayBytes) { exit(11); } +} + +void read_int(FILE* fp, int* i) { + size_t read = fread(i, sizeof(*i), 1, fp); + if (read != 1) { exit(12); } +} + +size_t read_bin(FILE* fp, void* array, size_t arrayBytes) { + size_t read = fread(array, 1, arrayBytes, fp); + if (ferror(fp)) { exit(12); } + return read; +} + +void seek_bin(FILE* fp, long offset, int origin) { + if (fseek(fp, offset, origin)) { exit(14); } +} + + +void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize) +{ + LZ4_stream_t lz4Stream_body; + LZ4_stream_t* lz4Stream = &lz4Stream_body; + + char inpBuf[BLOCK_BYTES]; + int offsets[MAX_BLOCKS]; + int *offsetsEnd = offsets; + + + LZ4_resetStream(lz4Stream); + + /* Write header magic */ + write_bin(outFp, kTestMagic, sizeof(kTestMagic)); + + *offsetsEnd++ = sizeof(kTestMagic); + /* Write compressed data blocks. Each block contains BLOCK_BYTES of plain + data except possibly the last. */ + for(;;) { + const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES); + if(0 == inpBytes) { + break; + } + + /* Forget previously compressed data and load the dictionary */ + LZ4_loadDict(lz4Stream, dict, dictSize); + { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + const int cmpBytes = LZ4_compress_fast_continue( + lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1); + if(cmpBytes <= 0) { exit(1); } + write_bin(outFp, cmpBuf, (size_t)cmpBytes); + /* Keep track of the offsets */ + *offsetsEnd = *(offsetsEnd - 1) + cmpBytes; + ++offsetsEnd; + } + if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); } + } + /* Write the tailing jump table */ + { + int *ptr = offsets; + while (ptr != offsetsEnd) { + write_int(outFp, *ptr++); + } + write_int(outFp, offsetsEnd - offsets); + } +} + + +void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length) +{ + LZ4_streamDecode_t lz4StreamDecode_body; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + + /* The blocks [currentBlock, endBlock) contain the data we want */ + int currentBlock = offset / BLOCK_BYTES; + int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1; + + char decBuf[BLOCK_BYTES]; + int offsets[MAX_BLOCKS]; + + /* Special cases */ + if (length == 0) { return; } + + /* Read the magic bytes */ + { + char magic[sizeof(kTestMagic)]; + size_t read = read_bin(inpFp, magic, sizeof(magic)); + if (read != sizeof(magic)) { exit(1); } + if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); } + } + + /* Read the offsets tail */ + { + int numOffsets; + int block; + int *offsetsPtr = offsets; + seek_bin(inpFp, -4, SEEK_END); + read_int(inpFp, &numOffsets); + if (numOffsets <= endBlock) { exit(3); } + seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END); + for (block = 0; block <= endBlock; ++block) { + read_int(inpFp, offsetsPtr++); + } + } + /* Seek to the first block to read */ + seek_bin(inpFp, offsets[currentBlock], SEEK_SET); + offset = offset % BLOCK_BYTES; + + /* Start decoding */ + for(; currentBlock < endBlock; ++currentBlock) { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + /* The difference in offsets is the size of the block */ + int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock]; + { + const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes); + if(read != (size_t)cmpBytes) { exit(4); } + } + + /* Load the dictionary */ + LZ4_setStreamDecode(lz4StreamDecode, dict, dictSize); + { + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES); + if(decBytes <= 0) { exit(5); } + { + /* Write out the part of the data we care about */ + int blockLength = MIN(length, (decBytes - offset)); + write_bin(outFp, decBuf + offset, (size_t)blockLength); + offset = 0; + length -= blockLength; + } + } + } +} + + +int compare(FILE* fp0, FILE* fp1, int length) +{ + int result = 0; + + while(0 == result) { + char b0[4096]; + char b1[4096]; + const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0))); + const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1))); + + result = (int) r0 - (int) r1; + + if(0 == r0 || 0 == r1) { + break; + } + if(0 == result) { + result = memcmp(b0, b1, r0); + } + length -= r0; + } + + return result; +} + + +int main(int argc, char* argv[]) +{ + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + char dictFilename[256] = { 0 }; + int offset; + int length; + char dict[DICTIONARY_BYTES]; + int dictSize; + + if(argc < 5) { + printf("Usage: %s input dictionary offset length", argv[0]); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES); + snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES); + snprintf(dictFilename, 256, "%s", argv[2]); + offset = atoi(argv[3]); + length = atoi(argv[4]); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + printf("dict = [%s]\n", dictFilename); + printf("offset = [%d]\n", offset); + printf("length = [%d]\n", length); + + /* Load dictionary */ + { + FILE* dictFp = fopen(dictFilename, "rb"); + dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES); + fclose(dictFp); + } + + /* compress */ + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* outFp = fopen(lz4Filename, "wb"); + + printf("compress : %s -> %s\n", inpFilename, lz4Filename); + test_compress(outFp, inpFp, dict, dictSize); + printf("compress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* decompress */ + { + FILE* inpFp = fopen(lz4Filename, "rb"); + FILE* outFp = fopen(decFilename, "wb"); + + printf("decompress : %s -> %s\n", lz4Filename, decFilename); + test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length); + printf("decompress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* verify */ + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* decFp = fopen(decFilename, "rb"); + seek_bin(inpFp, offset, SEEK_SET); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + const int cmp = compare(inpFp, decFp, length); + if(0 == cmp) { + printf("verify : OK\n"); + } else { + printf("verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); + } + + return 0; +} diff --git a/examples/dictionaryRandomAccess.md b/examples/dictionaryRandomAccess.md new file mode 100644 index 0000000..53d825d --- /dev/null +++ b/examples/dictionaryRandomAccess.md @@ -0,0 +1,67 @@ +# LZ4 API Example : Dictionary Random Access + +`dictionaryRandomAccess.c` is LZ4 API example which implements dictionary compression and random access decompression. + +Please note that the output file is not compatible with lz4frame and is platform dependent. + + +## What's the point of this example ? + + - Dictionary based compression for homogenous files. + - Random access to compressed blocks. + + +## How the compression works + +Reads the dictionary from a file, and uses it as the history for each block. +This allows each block to be independent, but maintains compression ratio. + +``` + Dictionary + + + | + v + +---------+ + | Block#1 | + +----+----+ + | + v + {Out#1} + + + Dictionary + + + | + v + +---------+ + | Block#2 | + +----+----+ + | + v + {Out#2} +``` + +After writing the magic bytes `TEST` and then the compressed blocks, write out the jump table. +The last 4 bytes is an integer containing the number of blocks in the stream. +If there are `N` blocks, then just before the last 4 bytes is `N + 1` 4 byte integers containing the offsets at the beginning and end of each block. +Let `Offset#K` be the total number of bytes written after writing out `Block#K` *including* the magic bytes for simplicity. + +``` ++------+---------+ +---------+---+----------+ +----------+-----+ +| TEST | Block#1 | ... | Block#N | 4 | Offset#1 | ... | Offset#N | N+1 | ++------+---------+ +---------+---+----------+ +----------+-----+ +``` + +## How the decompression works + +Decompression will do reverse order. + + - Seek to the last 4 bytes of the file and read the number of offsets. + - Read each offset into an array. + - Seek to the first block containing data we want to read. + We know where to look because we know each block contains a fixed amount of uncompressed data, except possibly the last. + - Decompress it and write what data we need from it to the file. + - Read the next block. + - Decompress it and write that page to the file. + +Continue these procedure until all the required data has been read. -- cgit v0.12 From 5e13a6ec469e25c7dd425c7da4518b4ad2a8862d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Nov 2016 12:22:37 -0800 Subject: improved man on compression level --- NEWS | 5 ++++- programs/lz4.1 | 14 ++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index 3257fd5..ebf8e1c 100644 --- a/NEWS +++ b/NEWS @@ -9,11 +9,14 @@ Improved cmake build script, by Evan Nemerson New liblz4-dll project, by Przemyslaw Skibinki Makefile: Generates object files (*.o) for faster (re)compilation on low power systems cli : new : --rm command -cli : new : file attributes are preserved, by Przemyslaw Skibinki +cli : new : preserved file attributes, by Przemyslaw Skibinki cli : fix : crash on some invalid inputs cli : fix : -t correctly validates lz4-compressed files, by Nick Terrell cli : fix : detects and reports fread() errors, thanks to Hiroshi Fujishima report #243 cli : bench : new : -r recursive mode +lz4cat : can cat multiple files in a single command line (#184) +Added : doc/lz4_manual.html, by Przemyslaw Skibinski +Added : dictionary compression example, by Nick Terrell Added : Debianization, by Evgeniy Polyakov r131 diff --git a/programs/lz4.1 b/programs/lz4.1 index fcc7980..1d9238d 100644 --- a/programs/lz4.1 +++ b/programs/lz4.1 @@ -141,11 +141,13 @@ Benchmark mode, using # compression level. . .SS "Operation modifiers" .TP -.B \-1 - fast compression (default) -.TP -.B \-9 - high compression +.B \-# + compression level, with # being any value from 1 to 16. + Higher values trade compression speed for compression ratio. + Values above 16 are considered the same as 16. + Recommended values are 1 for fast compression (default), and 9 for high compression. + Speed/compression trade-off will vary depending on data to compress. + Decompression speed remains fast at all settings. .TP .BR \-f ", " --[no-]force @@ -180,7 +182,7 @@ for files that have not been compressed with By default, the second filename is used as the destination filename for the compressed file. With .B -m -, you can specify any number of input filenames. Each of them will be compressed +, it is possible to specify any number of input filenames. Each of them will be compressed independently, and the resulting name of each compressed file will be .B filename.lz4 . -- cgit v0.12 From 1f246a98992fa7035823bedf6a0b7612fc290592 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Nov 2016 15:31:59 -0800 Subject: Fixed #178 fullbench on small input --- lib/lz4.c | 11 +++++++++-- tests/fullbench.c | 4 +++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/lz4.c b/lib/lz4.c index 53b503c..f86e6f8 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -1066,10 +1066,17 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* } +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : you don't need to call LZ4_loadDict() afterwards, + * dictionary is immediately usable, you can therefore call LZ4_compress_fast_continue(). + * Return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) { - LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; - const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; + LZ4_stream_t_internal* const dict = (LZ4_stream_t_internal*) LZ4_dict; + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */ if ((U32)dictSize > dict->dictSize) dictSize = dict->dictSize; diff --git a/tests/fullbench.c b/tests/fullbench.c index 975cf8a..fa37fa6 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -386,7 +386,7 @@ static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSi /* frame functions */ static int local_LZ4F_compressFrame(const char* in, char* out, int inSize) { - return (int)LZ4F_compressFrame(out, 2*inSize + 16, in, inSize, NULL); + return (int)LZ4F_compressFrame(out, LZ4F_compressFrameBound(inSize, NULL), in, inSize, NULL); } static LZ4F_decompressionContext_t g_dCtx; @@ -532,9 +532,11 @@ int fullSpeedBench(const char** fileNamesTable, int nbFiles) chunkP[0].origSize = (int)benchedSize; nbChunks=1; break; case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; + if (chunkP[0].origSize < 8) { DISPLAY(" cannot bench %s with less then 8 bytes \n", compressorName); continue; } LZ4_loadDict(&LZ4_stream, chunkP[0].origBuffer, chunkP[0].origSize); break; case 41: compressionFunction = local_LZ4_saveDictHC; compressorName = "LZ4_saveDictHC"; + if (chunkP[0].origSize < 8) { DISPLAY(" cannot bench %s with less then 8 bytes \n", compressorName); continue; } LZ4_loadDictHC(&LZ4_streamHC, chunkP[0].origBuffer, chunkP[0].origSize); break; case 60: DISPLAY("Obsolete compression functions : \n"); continue; -- cgit v0.12 From 5b37837e6e872bb57c8c7d07d6d4ff94176b8c59 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Thu, 10 Nov 2016 17:27:56 -0800 Subject: Fix license and remove references to zstd --- contrib/gen_manual/Makefile | 34 ++++++++++++++++++++++++++------ contrib/gen_manual/README.md | 6 +++--- contrib/gen_manual/gen_manual.cpp | 41 +++++++++++++++++++++++++++++++-------- lib/README.md | 2 +- programs/lz4io.c | 8 ++++---- programs/util.h | 2 +- 6 files changed, 70 insertions(+), 23 deletions(-) diff --git a/contrib/gen_manual/Makefile b/contrib/gen_manual/Makefile index 49616ee..adbcca2 100644 --- a/contrib/gen_manual/Makefile +++ b/contrib/gen_manual/Makefile @@ -1,11 +1,33 @@ -# ########################################################################## -# Copyright (c) 2016-present, Przemyslaw Skibinski +# ################################################################ +# Copyright (C) Przemyslaw Skibinski 2016-present # All rights reserved. # -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. An additional grant -# of patent rights can be found in the PATENTS file in the same directory. -# ########################################################################## +# BSD license +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer in the documentation and/or +# other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# You can contact the author at : +# - LZ4 source repository : https://github.com/Cyan4973/lz4 +# - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c +# ################################################################ CFLAGS ?= -O3 diff --git a/contrib/gen_manual/README.md b/contrib/gen_manual/README.md index 3d146d4..853e623 100644 --- a/contrib/gen_manual/README.md +++ b/contrib/gen_manual/README.md @@ -1,5 +1,5 @@ -gen_manual - a program for automatic generation of zstd manual -============================================================ +gen_manual - a program for automatic generation of lz4 manual +============================================================= #### Introduction @@ -27,5 +27,5 @@ gen_manual [lz4_version] [input_file] [output_html] To compile program and generate lz4 manual we have used: ``` make -./gen_manual.exe 1.7.3 ../../lib/lz4.h zstd_manual.html +./gen_manual.exe 1.7.3 ../../lib/lz4.h lz4_manual.html ``` diff --git a/contrib/gen_manual/gen_manual.cpp b/contrib/gen_manual/gen_manual.cpp index caf03fc..2df081d 100644 --- a/contrib/gen_manual/gen_manual.cpp +++ b/contrib/gen_manual/gen_manual.cpp @@ -1,11 +1,36 @@ /* - * Copyright (c) 2016-present, Przemyslaw Skibinski - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. An additional grant - * of patent rights can be found in the PATENTS file in the same directory. - */ +Copyright (c) 2016-present, Przemyslaw Skibinski +All rights reserved. + +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- LZ4 homepage : http://www.lz4.org +- LZ4 source repository : https://github.com/lz4/lz4 +*/ #include #include @@ -213,4 +238,4 @@ int main(int argc, char *argv[]) { ostream << "" << endl << "" << endl; return 0; -} \ No newline at end of file +} diff --git a/lib/README.md b/lib/README.md index 051ef54..3ca2776 100644 --- a/lib/README.md +++ b/lib/README.md @@ -10,7 +10,7 @@ The minimum required is **`lz4.c`** and **`lz4.h`**, which will provide the fast compression and decompression algorithm. -#### The High Compression variant od LZ4 +#### The High Compression variant of LZ4 For more compression at the cost of compression speed, the High Compression variant **lz4hc** is available. diff --git a/programs/lz4io.c b/programs/lz4io.c index 725d3a8..00576c2 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -258,7 +258,7 @@ static FILE* LZ4IO_openSrcFile(const char* srcFileName) SET_BINARY_MODE(stdin); } else { f = fopen(srcFileName, "rb"); - if ( f==NULL ) DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); + if ( f==NULL ) DISPLAYLEVEL(1, "%s: %s \n", srcFileName, strerror(errno)); } return f; @@ -285,10 +285,10 @@ static FILE* LZ4IO_openDstFile(const char* dstFileName) if (f != NULL) { /* dest exists, prompt for overwrite authorization */ fclose(f); if (g_displayLevel <= 1) { /* No interaction possible */ - DISPLAY("zstd: %s already exists; not overwritten \n", dstFileName); + DISPLAY("%s already exists; not overwritten \n", dstFileName); return NULL; } - DISPLAY("zstd: %s already exists; do you wish to overwrite (y/N) ? ", dstFileName); + DISPLAY("%s already exists; do you wish to overwrite (y/N) ? ", dstFileName); { int ch = getchar(); if ((ch!='Y') && (ch!='y')) { DISPLAY(" not overwritten \n"); @@ -297,7 +297,7 @@ static FILE* LZ4IO_openDstFile(const char* dstFileName) while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */ } } } f = fopen( dstFileName, "wb" ); - if (f==NULL) DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno)); + if (f==NULL) DISPLAYLEVEL(1, "%s: %s\n", dstFileName, strerror(errno)); } /* sparse file */ diff --git a/programs/util.h b/programs/util.h index d76ce6a..1ad61bc 100644 --- a/programs/util.h +++ b/programs/util.h @@ -427,7 +427,7 @@ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_ UTIL_STATIC int UTIL_prepareFileList(const char *dirName, char** bufStart, size_t* pos, char** bufEnd) { (void)bufStart; (void)bufEnd; (void)pos; - fprintf(stderr, "Directory %s ignored (zstd compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); + fprintf(stderr, "Directory %s ignored (lz4 compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); return 0; } -- cgit v0.12 From c72d2f5b10d7654bb481f060c04ecdfd5951700f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Nov 2016 18:02:56 -0800 Subject: minor typo --- contrib/gen_manual/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/gen_manual/README.md b/contrib/gen_manual/README.md index 853e623..7664ac6 100644 --- a/contrib/gen_manual/README.md +++ b/contrib/gen_manual/README.md @@ -1,5 +1,5 @@ -gen_manual - a program for automatic generation of lz4 manual -============================================================= +gen_manual - a program for automatic generation of manual from source code +========================================================================== #### Introduction @@ -24,7 +24,7 @@ The program requires 3 parameters: gen_manual [lz4_version] [input_file] [output_html] ``` -To compile program and generate lz4 manual we have used: +To compile program and generate lz4 manual we have used: ``` make ./gen_manual.exe 1.7.3 ../../lib/lz4.h lz4_manual.html -- cgit v0.12 From 4f9db1383a749e503e210ec6a712be67f31cfb1d Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Thu, 10 Nov 2016 18:34:49 -0800 Subject: updated xxhash lib --- lib/xxhash.c | 559 +++++++++++++++++++++++++++++++---------------------------- lib/xxhash.h | 142 ++++++++------- 2 files changed, 359 insertions(+), 342 deletions(-) diff --git a/lib/xxhash.c b/lib/xxhash.c index 156b484..e9ff2d4 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -46,7 +46,7 @@ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. * It can generate buggy code on targets which do not support unaligned memory accesses. * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See https://stackoverflow.com/a/32095106/646947 for details. + * See http://stackoverflow.com/a/32095106/646947 for details. * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ @@ -66,10 +66,10 @@ /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ /*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independant Hash values, based on little-endian convention. + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. * Results are therefore identical for little-endian and big-endian CPU. * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independance be of no importance for your application, you may set the #define below to 1, + * Should endian-independence be of no importance for your application, you may set the #define below to 1, * to improve speed for Big-endian CPU. * This option has no impact on Little_Endian CPU. */ @@ -95,12 +95,12 @@ /* ************************************* * Includes & Memory related functions ***************************************/ -/* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ +/*! Modify the local functions below should you wish to use some other memory routines +* for malloc(), free() */ #include static void* XXH_malloc(size_t s) { return malloc(s); } static void XXH_free (void* p) { free(p); } -/* for memcpy() */ +/*! and for memcpy() */ #include static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } @@ -115,7 +115,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ # define FORCE_INLINE static __forceinline #else -# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ # ifdef __GNUC__ # define FORCE_INLINE static inline __attribute__((always_inline)) # else @@ -131,45 +131,37 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp * Basic Types ***************************************/ #ifndef MEM_MODULE -# define MEM_MODULE -# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) # include typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; typedef int32_t S32; - typedef uint64_t U64; -# else +# else typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; typedef signed int S32; - typedef unsigned long long U64; -# endif +# endif #endif - #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; - +typedef union { U32 u32; } __attribute__((packed)) unalign; static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } #else /* portable and safe solution. Generally efficient. - * see : https://stackoverflow.com/a/32095106/646947 + * see : http://stackoverflow.com/a/32095106/646947 */ - static U32 XXH_read32(const void* memPtr) { U32 val; @@ -177,20 +169,13 @@ static U32 XXH_read32(const void* memPtr) return val; } -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ /* **************************************** * Compiler-specific Functions and Macros ******************************************/ -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ #if defined(_MSC_VER) @@ -203,10 +188,8 @@ static U64 XXH_read64(const void* memPtr) #if defined(_MSC_VER) /* Visual Studio */ # define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 +#elif XXH_GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 #else static U32 XXH_swap32 (U32 x) { @@ -215,17 +198,6 @@ static U32 XXH_swap32 (U32 x) ((x >> 8) & 0x0000ff00 ) | ((x >> 24) & 0x000000ff ); } -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} #endif @@ -264,67 +236,23 @@ static U32 XXH_readBE32(const void* ptr) return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); } -FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} - -FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} - /* ************************************* * Macros ***************************************/ #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } -/* ************************************* -* Constants -***************************************/ +/* ******************************************************************* +* 32-bits hash functions +*********************************************************************/ static const U32 PRIME32_1 = 2654435761U; static const U32 PRIME32_2 = 2246822519U; static const U32 PRIME32_3 = 3266489917U; static const U32 PRIME32_4 = 668265263U; static const U32 PRIME32_5 = 374761393U; -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; - -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ************************** -* Utils -****************************/ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - - -/* *************************** -* Simple Hash Functions -*****************************/ - static U32 XXH32_round(U32 seed, U32 input) { seed += input * PRIME32_2; @@ -394,10 +322,10 @@ XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int s { #if 0 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_CREATESTATE_STATIC(state); - XXH32_reset(state, seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, input, len); + return XXH32_digest(&state); #else XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -417,121 +345,8 @@ XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int s } -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} - -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} - -FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - - } else { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_CREATESTATE_STATIC(state); - XXH64_reset(state, seed); - XXH64_update(state, input, len); - return XXH64_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -/* ************************************************** -* Advanced Hash Functions -****************************************************/ +/*====== Hash streaming ======*/ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) { @@ -543,24 +358,15 @@ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) return XXH_OK; } -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) { - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; + memcpy(dstState, srcState, sizeof(*dstState)); } - -/*** Hash feed ***/ - XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) { XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.seed = seed; + memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ state.v1 = seed + PRIME32_1 + PRIME32_2; state.v2 = seed + PRIME32_2; state.v3 = seed + 0; @@ -570,20 +376,6 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int s } -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)); - state.seed = seed; - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} - - FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) { const BYTE* p = (const BYTE*)input; @@ -593,11 +385,12 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void if (input==NULL) return XXH_ERROR; #endif - state->total_len += len; + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); if (state->memsize + len < 16) { /* fill in tmp buffer */ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (U32)len; + state->memsize += (unsigned)len; return XXH_OK; } @@ -634,8 +427,8 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void } if (p < bEnd) { - XXH_memcpy(state->mem32, p, bEnd-p); - state->memsize = (int)(bEnd-p); + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); } return XXH_OK; @@ -659,13 +452,13 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; U32 h32; - if (state->total_len >= 16) { + if (state->large_len) { h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); } else { - h32 = state->seed + PRIME32_5; + h32 = state->v3 /* == seed */ + PRIME32_5; } - h32 += (U32) state->total_len; + h32 += state->total_len_32; while (p+4<=bEnd) { h32 += XXH_readLE32(p, endian) * PRIME32_3; @@ -700,8 +493,257 @@ XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) } +/*====== Canonical representation ======*/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG -/* **** XXH64 **** */ +/* ******************************************************************* +* 64-bits hash functions +*********************************************************************/ + +/*====== Memory access ======*/ + +#ifndef MEM_MODULE +# define MEM_MODULE +# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint64_t U64; +# else + typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ +# endif +#endif + + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64; +static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + +FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/*====== xxh64 ======*/ + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, input, len); + return XXH64_digest(&state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +/*====== Hash Streaming ======*/ + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) { @@ -751,8 +793,8 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void } if (p < bEnd) { - XXH_memcpy(state->mem64, p, bEnd-p); - state->memsize = (int)(bEnd-p); + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); } return XXH_OK; @@ -768,8 +810,6 @@ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* return XXH64_update_endian(state_in, input, len, XXH_bigEndian); } - - FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) { const BYTE * p = (const BYTE*)state->mem64; @@ -788,7 +828,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess h64 = XXH64_mergeRound(h64, v3); h64 = XXH64_mergeRound(h64, v4); } else { - h64 = state->seed + PRIME64_5; + h64 = state->v3 + PRIME64_5; } h64 += (U64) state->total_len; @@ -821,7 +861,6 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess return h64; } - XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) { XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; @@ -833,22 +872,7 @@ XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) } -/* ************************** -* Canonical representation -****************************/ - -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. -*/ - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} +/*====== Canonical representation ======*/ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) { @@ -857,12 +881,9 @@ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t memcpy(dst, &hash, sizeof(*dst)); } -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) { return XXH_readBE64(src); } + +#endif /* XXH_NO_LONG_LONG */ diff --git a/lib/xxhash.h b/lib/xxhash.h index 4d7feff..870a6d9 100644 --- a/lib/xxhash.h +++ b/lib/xxhash.h @@ -83,20 +83,20 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; * API modifier ******************************/ /** XXH_PRIVATE_API -* This is useful if you want to include xxhash functions in `static` mode +* This is useful to include xxhash functions in `static` mode * in order to inline them, and remove their symbol from the public list. * Methodology : * #define XXH_PRIVATE_API * #include "xxhash.h" -* `xxhash.c` is automatically included, so the file is still needed, -* but it's not useful to compile and link it anymore. +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module. */ #ifdef XXH_PRIVATE_API # ifndef XXH_STATIC_LINKING_ONLY # define XXH_STATIC_LINKING_ONLY # endif # if defined(__GNUC__) -# define XXH_PUBLIC_API static __attribute__((unused)) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # define XXH_PUBLIC_API static inline # elif defined(_MSC_VER) @@ -111,10 +111,10 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; /*!XXH_NAMESPACE, aka Namespace Emulation : If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with another library which also includes xxHash, +but also want to avoid symbol collisions with other libraries which may also include xxHash, you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). +with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values). Note that no change is required within the calling program as long as it includes `xxhash.h` : regular symbol name will be automatically translated by this header. @@ -122,21 +122,25 @@ regular symbol name will be automatically translated by this header. #ifdef XXH_NAMESPACE # define XXH_CAT(A,B) A##B # define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) #endif @@ -145,58 +149,33 @@ regular symbol name will be automatically translated by this header. ***************************************/ #define XXH_VERSION_MAJOR 0 #define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 1 +#define XXH_VERSION_RELEASE 2 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) XXH_PUBLIC_API unsigned XXH_versionNumber (void); -/* **************************** -* Simple Hash Functions -******************************/ +/*-********************************************************************** +* 32-bits hash +************************************************************************/ typedef unsigned int XXH32_hash_t; -typedef unsigned long long XXH64_hash_t; - -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); -/*! -XXH32() : +/*! XXH32() : Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". The memory between input & input+length must be valid (allocated and read-accessible). "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). -*/ - + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -/* **************************** -* Streaming Hash Functions -******************************/ +/*====== Streaming ======*/ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - -/*! State allocation, compatible with dynamic libraries */ - XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - - -/* hash streaming */ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); - /* These functions generate the xxHash of an input provided in multiple segments. Note that, for small input, they are slower than single-call functions, due to state management. @@ -219,29 +198,11 @@ and generate some new hashes later on, by calling again XXH*_digest(). When done, free XXH state space if it was allocated dynamically. */ +/*====== Canonical representation ======*/ -/* ************************** -* Utils -****************************/ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ -# define restrict /* disable restrict */ -#endif - -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); - - -/* ************************** -* Canonical representation -****************************/ typedef struct { unsigned char digest[4]; } XXH32_canonical_t; -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; - XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); - XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); /* Default result type for XXH functions are primitive unsigned 32 and 64 bits. * The canonical representation uses human-readable write convention, aka big-endian (large digits first). @@ -250,41 +211,76 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src */ +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bits hash +************************************************************************/ +typedef unsigned long long XXH64_hash_t; + +/*! XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*====== Streaming ======*/ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/*====== Canonical representation ======*/ +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); +#endif /* XXH_NO_LONG_LONG */ + + #ifdef XXH_STATIC_LINKING_ONLY /* ================================================================================================ This section contains definitions which are not guaranteed to remain stable. - They could change in a future version, becoming incompatible with a different version of the library. + They may change in future versions, becoming incompatible with a different version of the library. They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! =================================================================================================== */ -/* These definitions allow allocating XXH state statically (on stack) */ +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ struct XXH32_state_s { - unsigned long long total_len; - unsigned seed; + unsigned total_len_32; + unsigned large_len; unsigned v1; unsigned v2; unsigned v3; unsigned v4; unsigned mem32[4]; /* buffer defined as U32 for alignment */ unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH32_state_t */ +#ifndef XXH_NO_LONG_LONG struct XXH64_state_s { unsigned long long total_len; - unsigned long long seed; unsigned long long v1; unsigned long long v2; unsigned long long v3; unsigned long long v4; unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ }; /* typedef'd to XXH64_state_t */ - +#endif # ifdef XXH_PRIVATE_API -# include "xxhash.c" /* include xxhash functions as `static`, for inlining */ +# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */ # endif #endif /* XXH_STATIC_LINKING_ONLY */ -- cgit v0.12