From 91cce757f521b99fe9bb558d80e76c9419eec6a1 Mon Sep 17 00:00:00 2001
From: Yann Collet <yann.collet.73@gmail.com>
Date: Wed, 29 Jun 2016 21:48:27 +0200
Subject: Updated xxhash library to v0.6.1

---
 lib/lz4frame.c       | 329 ++++++++--------------
 lib/xxhash.c         | 764 ++++++++++++++++++++++-----------------------------
 lib/xxhash.h         | 227 +++++++++++----
 programs/frametest.c |   9 +-
 programs/fuzzer.c    |  11 +-
 5 files changed, 631 insertions(+), 709 deletions(-)

diff --git a/lib/lz4frame.c b/lib/lz4frame.c
index e5458bb..27b86b4 100644
--- a/lib/lz4frame.c
+++ b/lib/lz4frame.c
@@ -1,6 +1,6 @@
 /*
 LZ4 auto-framing library
-Copyright (C) 2011-2015, Yann Collet.
+Copyright (C) 2011-2016, Yann Collet.
 
 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -28,8 +28,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 You can contact the author at :
+- LZ4 homepage : http://www.lz4.org
 - LZ4 source repository : https://github.com/Cyan4973/lz4
-- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
 /* LZ4F is a stand-alone API to create LZ4-compressed Frames
@@ -38,7 +38,7 @@ You can contact the author at :
 * */
 
 
-/**************************************
+/*-************************************
 *  Compiler Options
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -46,7 +46,7 @@ You can contact the author at :
 #endif
 
 
-/**************************************
+/*-************************************
 *  Memory routines
 **************************************/
 #include <stdlib.h>   /* malloc, calloc, free */
@@ -56,16 +56,17 @@ You can contact the author at :
 #define MEM_INIT       memset
 
 
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include "lz4frame_static.h"
 #include "lz4.h"
 #include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
 
 
-/**************************************
+/*-************************************
 *  Basic Types
 **************************************/
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   /* C99 */
@@ -84,7 +85,7 @@ typedef unsigned long long  U64;
 #endif
 
 
-/**************************************
+/*-************************************
 *  Constants
 **************************************/
 #define KB *(1<<10)
@@ -108,7 +109,7 @@ static const size_t BHSize = 4;
 static const int    minHClevel = 3;
 
 
-/**************************************
+/*-************************************
 *  Structures and local types
 **************************************/
 typedef struct LZ4F_cctx_s
@@ -150,7 +151,7 @@ typedef struct LZ4F_dctx_s
 } LZ4F_dctx_t;
 
 
-/**************************************
+/*-************************************
 *  Error management
 **************************************/
 #define LZ4F_GENERATE_STRING(STRING) #STRING,
@@ -170,7 +171,7 @@ const char* LZ4F_getErrorName(LZ4F_errorCode_t code)
 }
 
 
-/**************************************
+/*-************************************
 *  Private functions
 **************************************/
 static size_t LZ4F_getBlockSize(unsigned blockSizeID)
@@ -235,7 +236,7 @@ static BYTE LZ4F_headerChecksum (const void* header, size_t length)
 }
 
 
-/**************************************
+/*-************************************
 *  Simple compression functions
 **************************************/
 static LZ4F_blockSizeID_t LZ4F_optimalBSID(const LZ4F_blockSizeID_t requestedBSID, const size_t srcSize)
@@ -272,7 +273,7 @@ size_t LZ4F_compressFrameBound(size_t srcSize, const LZ4F_preferences_t* prefere
 }
 
 
-/* LZ4F_compressFrame()
+/*! LZ4F_compressFrame() :
 * Compress an entire srcBuffer into a valid LZ4 frame, as defined by specification v1.5.0, in a single step.
 * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
 * You can get the minimum value of dstMaxSize by using LZ4F_compressFrameBound()
@@ -305,8 +306,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf
     if (prefs.frameInfo.contentSize != 0)
         prefs.frameInfo.contentSize = (U64)srcSize;   /* auto-correct content size if selected (!=0) */
 
-    if (prefs.compressionLevel < (int)minHClevel)
-    {
+    if (prefs.compressionLevel < (int)minHClevel) {
         cctxI.lz4CtxPtr = &lz4ctx;
         cctxI.lz4CtxLevel = 1;
     }
@@ -340,7 +340,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf
 }
 
 
-/***********************************
+/*-*********************************
 *  Advanced compression functions
 ***********************************/
 
@@ -372,8 +372,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_comp
 {
     LZ4F_cctx_t* cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext;
 
-    if (cctxPtr != NULL)   /* null pointers can be safely provided to this function, like free() */
-    {
+    if (cctxPtr != NULL) {  /* null pointers can be safely provided to this function, like free() */
        FREEMEM(cctxPtr->lz4CtxPtr);
        FREEMEM(cctxPtr->tmpBuff);
        FREEMEM(LZ4F_compressionContext);
@@ -383,7 +382,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_comp
 }
 
 
-/* LZ4F_compressBegin() :
+/*! LZ4F_compressBegin() :
 * will write the frame header into dstBuffer.
 * dstBuffer must be large enough to accommodate a header (dstMaxSize). Maximum header size is LZ4F_MAXHEADERFRAME_SIZE bytes.
 * The result of the function is the number of bytes written into dstBuffer for the header
@@ -405,10 +404,8 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds
     cctxPtr->prefs = *preferencesPtr;
 
     /* ctx Management */
-    {
-        U32 tableID = (cctxPtr->prefs.compressionLevel < minHClevel) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
-        if (cctxPtr->lz4CtxLevel < tableID)
-        {
+    {   U32 const tableID = (cctxPtr->prefs.compressionLevel < minHClevel) ? 1 : 2;  /* 0:nothing ; 1:LZ4 table ; 2:HC tables */
+        if (cctxPtr->lz4CtxLevel < tableID) {
             FREEMEM(cctxPtr->lz4CtxPtr);
             if (cctxPtr->prefs.compressionLevel < minHClevel)
                 cctxPtr->lz4CtxPtr = (void*)LZ4_createStream();
@@ -426,8 +423,7 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds
     if (preferencesPtr->autoFlush)
         requiredBuffSize = (cctxPtr->prefs.frameInfo.blockMode == LZ4F_blockLinked) * 64 KB;   /* just needs dict */
 
-    if (cctxPtr->maxBufferSize < requiredBuffSize)
-    {
+    if (cctxPtr->maxBufferSize < requiredBuffSize) {
         cctxPtr->maxBufferSize = requiredBuffSize;
         FREEMEM(cctxPtr->tmpBuff);
         cctxPtr->tmpBuff = (BYTE*)ALLOCATOR(requiredBuffSize);
@@ -454,8 +450,7 @@ size_t LZ4F_compressBegin(LZ4F_compressionContext_t compressionContext, void* ds
     /* BD Byte */
     *dstPtr++ = (BYTE)((cctxPtr->prefs.frameInfo.blockSizeID & _3BITS) << 4);
     /* Optional Frame content size field */
-    if (cctxPtr->prefs.frameInfo.contentSize)
-    {
+    if (cctxPtr->prefs.frameInfo.contentSize) {
         LZ4F_writeLE64(dstPtr, cctxPtr->prefs.frameInfo.contentSize);
         dstPtr += 8;
         cctxPtr->totalInSize = 0;
@@ -479,8 +474,7 @@ size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesP
     LZ4F_preferences_t prefsNull;
     memset(&prefsNull, 0, sizeof(prefsNull));
     prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled;   /* worst case */
-    {
-        const LZ4F_preferences_t* prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
+    {   const LZ4F_preferences_t* prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
         LZ4F_blockSizeID_t bid = prefsPtr->frameInfo.blockSizeID;
         size_t blockSize = LZ4F_getBlockSize(bid);
         unsigned nbBlocks = (unsigned)(srcSize / blockSize) + 1;
@@ -502,8 +496,7 @@ static size_t LZ4F_compressBlock(void* dst, const void* src, size_t srcSize, com
     U32 cSize;
     cSize = (U32)compress(lz4ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level);
     LZ4F_writeLE32(cSizePtr, cSize);
-    if (cSize == 0)   /* compression failed */
-    {
+    if (cSize == 0) {  /* compression failed */
         cSize = (U32)srcSize;
         LZ4F_writeLE32(cSizePtr, cSize + LZ4F_BLOCKUNCOMPRESSED_FLAG);
         memcpy(cSizePtr+4, src, srcSize);
@@ -532,8 +525,7 @@ static int LZ4F_localLZ4_compressHC_limitedOutput_continue(void* ctx, const char
 
 static compressFunc_t LZ4F_selectCompression(LZ4F_blockMode_t blockMode, int level)
 {
-    if (level < minHClevel)
-    {
+    if (level < minHClevel) {
         if (blockMode == LZ4F_blockIndependent) return LZ4F_localLZ4_compress_limitedOutput_withState;
         return LZ4F_localLZ4_compress_limitedOutput_continue;
     }
@@ -550,7 +542,7 @@ static int LZ4F_localSaveDict(LZ4F_cctx_t* cctxPtr)
 
 typedef enum { notDone, fromTmpBuffer, fromSrcBuffer } LZ4F_lastBlockStatus;
 
-/* LZ4F_compressUpdate()
+/*! LZ4F_compressUpdate() :
 * LZ4F_compressUpdate() can be called repetitively to compress as much data as necessary.
 * The most important rule is that dstBuffer MUST be large enough (dstMaxSize) to ensure compression completion even in worst case.
 * If this condition is not respected, LZ4F_compress() will fail (result is an errorCode)
@@ -581,19 +573,15 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
     compress = LZ4F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel);
 
     /* complete tmp buffer */
-    if (cctxPtr->tmpInSize > 0)   /* some data already within tmp buffer */
-    {
+    if (cctxPtr->tmpInSize > 0) {   /* some data already within tmp buffer */
         size_t sizeToCopy = blockSize - cctxPtr->tmpInSize;
-        if (sizeToCopy > srcSize)
-        {
+        if (sizeToCopy > srcSize) {
             /* add src to tmpIn buffer */
             memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, srcSize);
             srcPtr = srcEnd;
             cctxPtr->tmpInSize += srcSize;
             /* still needs some CRC */
-        }
-        else
-        {
+        } else {
             /* complete tmpIn block and then compress it */
             lastBlockCompressed = fromTmpBuffer;
             memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy);
@@ -606,16 +594,14 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
         }
     }
 
-    while ((size_t)(srcEnd - srcPtr) >= blockSize)
-    {
+    while ((size_t)(srcEnd - srcPtr) >= blockSize) {
         /* compress full block */
         lastBlockCompressed = fromSrcBuffer;
         dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
         srcPtr += blockSize;
     }
 
-    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd))
-    {
+    if ((cctxPtr->prefs.autoFlush) && (srcPtr < srcEnd)) {
         /* compress remaining input < blockSize */
         lastBlockCompressed = fromSrcBuffer;
         dstPtr += LZ4F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz4CtxPtr, cctxPtr->prefs.compressionLevel);
@@ -623,14 +609,10 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
     }
 
     /* preserve dictionary if necessary */
-    if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer))
-    {
-        if (compressOptionsPtr->stableSrc)
-        {
+    if ((cctxPtr->prefs.frameInfo.blockMode==LZ4F_blockLinked) && (lastBlockCompressed==fromSrcBuffer)) {
+        if (compressOptionsPtr->stableSrc) {
             cctxPtr->tmpIn = cctxPtr->tmpBuff;
-        }
-        else
-        {
+        } else {
             int realDictSize = LZ4F_localSaveDict(cctxPtr);
             if (realDictSize==0) return (size_t)-LZ4F_ERROR_GENERIC;
             cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
@@ -646,8 +628,7 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
     }
 
     /* some input data left, necessarily < blockSize */
-    if (srcPtr < srcEnd)
-    {
+    if (srcPtr < srcEnd) {
         /* fill tmp buffer */
         size_t sizeToCopy = srcEnd - srcPtr;
         memcpy(cctxPtr->tmpIn, srcPtr, sizeToCopy);
@@ -662,7 +643,7 @@ size_t LZ4F_compressUpdate(LZ4F_compressionContext_t compressionContext, void* d
 }
 
 
-/* LZ4F_flush()
+/*! LZ4F_flush() :
 * Should you need to create compressed data immediately, without waiting for a block to be filled,
 * you can call LZ4_flush(), which will immediately compress any remaining data stored within compressionContext.
 * The result of the function is the number of bytes written into dstBuffer
@@ -692,8 +673,7 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer,
     cctxPtr->tmpInSize = 0;
 
     /* keep tmpIn within limits */
-    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize))   /* necessarily LZ4F_blockLinked */
-    {
+    if ((cctxPtr->tmpIn + cctxPtr->maxBlockSize) > (cctxPtr->tmpBuff + cctxPtr->maxBufferSize)) {  /* necessarily LZ4F_blockLinked */
         int realDictSize = LZ4F_localSaveDict(cctxPtr);
         cctxPtr->tmpIn = cctxPtr->tmpBuff + realDictSize;
     }
@@ -702,7 +682,7 @@ size_t LZ4F_flush(LZ4F_compressionContext_t compressionContext, void* dstBuffer,
 }
 
 
-/* LZ4F_compressEnd()
+/*! LZ4F_compressEnd() :
 * When you want to properly finish the compressed frame, just call LZ4F_compressEnd().
 * It will flush whatever data remained within compressionContext (like LZ4_flush())
 * but also properly finalize the frame, with an endMark and a checksum.
@@ -725,8 +705,7 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB
     LZ4F_writeLE32(dstPtr, 0);
     dstPtr+=4;   /* endMark */
 
-    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled)
-    {
+    if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) {
         U32 xxh = XXH32_digest(&(cctxPtr->xxh));
         LZ4F_writeLE32(dstPtr, xxh);
         dstPtr+=4;   /* content Checksum */
@@ -734,8 +713,7 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB
 
     cctxPtr->cStage = 0;   /* state is now re-usable (with identical preferences) */
 
-    if (cctxPtr->prefs.frameInfo.contentSize)
-    {
+    if (cctxPtr->prefs.frameInfo.contentSize) {
         if (cctxPtr->prefs.frameInfo.contentSize != cctxPtr->totalInSize)
             return (size_t)-LZ4F_ERROR_frameSize_wrong;
     }
@@ -744,7 +722,7 @@ size_t LZ4F_compressEnd(LZ4F_compressionContext_t compressionContext, void* dstB
 }
 
 
-/**********************************
+/*-*******************************
 *  Decompression functions
 **********************************/
 
@@ -773,8 +751,7 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_decompressionContext_t LZ4F_
 {
     LZ4F_errorCode_t result = LZ4F_OK_NoError;
     LZ4F_dctx_t* dctxPtr = (LZ4F_dctx_t*)LZ4F_decompressionContext;
-    if (dctxPtr != NULL)   /* can accept NULL input, like free() */
-    {
+    if (dctxPtr != NULL) {   /* can accept NULL input, like free() */
       result = (LZ4F_errorCode_t)dctxPtr->dStage;
       FREEMEM(dctxPtr->tmpIn);
       FREEMEM(dctxPtr->tmpOutBuffer);
@@ -800,7 +777,7 @@ typedef enum { dstage_getHeader=0, dstage_storeHeader,
 } dStage_t;
 
 
-/* LZ4F_decodeHeader
+/*! LZ4F_decodeHeader() :
    return : nb Bytes read from srcVoidPtr (necessarily <= srcSize)
             or an error code (testable with LZ4F_isError())
    output : set internal values of dctx, such as
@@ -820,18 +797,14 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_t* dctxPtr, const void* srcVoidPtr, si
     memset(&(dctxPtr->frameInfo), 0, sizeof(dctxPtr->frameInfo));
 
     /* special case : skippable frames */
-    if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START)
-    {
+    if ((LZ4F_readLE32(srcPtr) & 0xFFFFFFF0U) == LZ4F_MAGIC_SKIPPABLE_START) {
         dctxPtr->frameInfo.frameType = LZ4F_skippableFrame;
-        if (srcVoidPtr == (void*)(dctxPtr->header))
-        {
+        if (srcVoidPtr == (void*)(dctxPtr->header)) {
             dctxPtr->tmpInSize = srcSize;
             dctxPtr->tmpInTarget = 8;
             dctxPtr->dStage = dstage_storeSFrameSize;
             return srcSize;
-        }
-        else
-        {
+        } else {
             dctxPtr->dStage = dstage_getSFrameSize;
             return 4;
         }
@@ -852,8 +825,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_t* dctxPtr, const void* srcVoidPtr, si
     /* Frame Header Size */
     frameHeaderSize = contentSizeFlag ? maxFHSize : minFHSize;
 
-    if (srcSize < frameHeaderSize)
-    {
+    if (srcSize < frameHeaderSize) {
         /* not enough input to fully decode frame header */
         if (srcPtr != dctxPtr->header)
             memcpy(dctxPtr->header, srcPtr, srcSize);
@@ -891,8 +863,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_t* dctxPtr, const void* srcVoidPtr, si
 
     /* alloc */
     bufferNeeded = dctxPtr->maxBlockSize + ((dctxPtr->frameInfo.blockMode==LZ4F_blockLinked) * 128 KB);
-    if (bufferNeeded > dctxPtr->maxBufferSize)   /* tmp buffers too small */
-    {
+    if (bufferNeeded > dctxPtr->maxBufferSize) {   /* tmp buffers too small */
         FREEMEM(dctxPtr->tmpIn);
         FREEMEM(dctxPtr->tmpOutBuffer);
         dctxPtr->maxBufferSize = bufferNeeded;
@@ -915,7 +886,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_t* dctxPtr, const void* srcVoidPtr, si
 }
 
 
-/* LZ4F_getFrameInfo()
+/*! LZ4F_getFrameInfo() :
 * This function decodes frame header information, such as blockSize.
 * It is optional : you could start by calling directly LZ4F_decompress() instead.
 * The objective is to extract header information without starting decompression, typically for allocation purposes.
@@ -930,16 +901,13 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_decompressionContext_t dCtx, LZ4F_frameI
 {
     LZ4F_dctx_t* dctxPtr = (LZ4F_dctx_t*)dCtx;
 
-    if (dctxPtr->dStage > dstage_storeHeader)   /* note : requires dstage_* header related to be at beginning of enum */
-    {
+    if (dctxPtr->dStage > dstage_storeHeader) {  /* note : requires dstage_* header related to be at beginning of enum */
         size_t o=0, i=0;
         /* frameInfo already decoded */
         *srcSizePtr = 0;
         *frameInfoPtr = dctxPtr->frameInfo;
         return LZ4F_decompress(dCtx, NULL, &o, NULL, &i, NULL);
-    }
-    else
-    {
+    } else {
         size_t o=0;
         size_t nextSrcSize = LZ4F_decompress(dCtx, NULL, &o, srcBuffer, srcSizePtr, NULL);
         if (dctxPtr->dStage <= dstage_storeHeader)   /* note : requires dstage_* header related to be at beginning of enum */
@@ -963,28 +931,24 @@ static void LZ4F_updateDict(LZ4F_dctx_t* dctxPtr, const BYTE* dstPtr, size_t dst
     if (dctxPtr->dictSize==0)
         dctxPtr->dict = (const BYTE*)dstPtr;   /* priority to dictionary continuity */
 
-    if (dctxPtr->dict + dctxPtr->dictSize == dstPtr)   /* dictionary continuity */
-    {
+    if (dctxPtr->dict + dctxPtr->dictSize == dstPtr) {  /* dictionary continuity */
         dctxPtr->dictSize += dstSize;
         return;
     }
 
-    if (dstPtr - dstPtr0 + dstSize >= 64 KB)   /* dstBuffer large enough to become dictionary */
-    {
+    if (dstPtr - dstPtr0 + dstSize >= 64 KB) {  /* dstBuffer large enough to become dictionary */
         dctxPtr->dict = (const BYTE*)dstPtr0;
         dctxPtr->dictSize = dstPtr - dstPtr0 + dstSize;
         return;
     }
 
-    if ((withinTmp) && (dctxPtr->dict == dctxPtr->tmpOutBuffer))
-    {
+    if ((withinTmp) && (dctxPtr->dict == dctxPtr->tmpOutBuffer)) {
         /* assumption : dctxPtr->dict + dctxPtr->dictSize == dctxPtr->tmpOut + dctxPtr->tmpOutStart */
         dctxPtr->dictSize += dstSize;
         return;
     }
 
-    if (withinTmp) /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
-    {
+    if (withinTmp) { /* copy relevant dict portion in front of tmpOut within tmpOutBuffer */
         size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
         size_t copySize = 64 KB - dctxPtr->tmpOutSize;
         const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
@@ -998,10 +962,8 @@ static void LZ4F_updateDict(LZ4F_dctx_t* dctxPtr, const BYTE* dstPtr, size_t dst
         return;
     }
 
-    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)     /* copy dst into tmp to complete dict */
-    {
-        if (dctxPtr->dictSize + dstSize > dctxPtr->maxBufferSize)   /* tmp buffer not large enough */
-        {
+    if (dctxPtr->dict == dctxPtr->tmpOutBuffer) {    /* copy dst into tmp to complete dict */
+        if (dctxPtr->dictSize + dstSize > dctxPtr->maxBufferSize) {  /* tmp buffer not large enough */
             size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
             memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
             dctxPtr->dictSize = preserveSize;
@@ -1012,8 +974,7 @@ static void LZ4F_updateDict(LZ4F_dctx_t* dctxPtr, const BYTE* dstPtr, size_t dst
     }
 
     /* join dict & dest into tmp */
-    {
-        size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
+    {   size_t preserveSize = 64 KB - dstSize;   /* note : dstSize < 64 KB */
         if (preserveSize > dctxPtr->dictSize) preserveSize = dctxPtr->dictSize;
         memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - preserveSize, preserveSize);
         memcpy(dctxPtr->tmpOutBuffer + preserveSize, dstPtr, dstSize);
@@ -1024,7 +985,7 @@ static void LZ4F_updateDict(LZ4F_dctx_t* dctxPtr, const BYTE* dstPtr, size_t dst
 
 
 
-/* LZ4F_decompress()
+/*! LZ4F_decompress() :
 * Call this function repetitively to regenerate data compressed within srcBuffer.
 * The function will attempt to decode *srcSizePtr from srcBuffer, into dstBuffer of maximum size *dstSizePtr.
 *
@@ -1065,23 +1026,19 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
     *dstSizePtr = 0;
 
     /* expect to continue decoding src buffer where it left previously */
-    if (dctxPtr->srcExpect != NULL)
-    {
+    if (dctxPtr->srcExpect != NULL) {
         if (srcStart != dctxPtr->srcExpect) return (size_t)-LZ4F_ERROR_srcPtr_wrong;
     }
 
     /* programmed as a state machine */
 
-    while (doAnotherStage)
-    {
+    while (doAnotherStage) {
 
         switch(dctxPtr->dStage)
         {
 
         case dstage_getHeader:
-            {
-                if ((size_t)(srcEnd-srcPtr) >= maxFHSize)   /* enough to decode - shortcut */
-                {
+            {   if ((size_t)(srcEnd-srcPtr) >= maxFHSize) {  /* enough to decode - shortcut */
                     LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, srcPtr, srcEnd-srcPtr);
                     if (LZ4F_isError(errorCode)) return errorCode;
                     srcPtr += errorCode;
@@ -1093,38 +1050,30 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_storeHeader:
-            {
-                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+            {   size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
                 if (sizeToCopy > (size_t)(srcEnd - srcPtr)) sizeToCopy =  srcEnd - srcPtr;
                 memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 dctxPtr->tmpInSize += sizeToCopy;
                 srcPtr += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget)
-                {
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) {
                     nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;   /* rest of header + nextBlockHeader */
                     doAnotherStage = 0;   /* not enough src data, ask for some more */
                     break;
                 }
-                {
-                    LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, dctxPtr->header, dctxPtr->tmpInTarget);
+                {   LZ4F_errorCode_t errorCode = LZ4F_decodeHeader(dctxPtr, dctxPtr->header, dctxPtr->tmpInTarget);
                     if (LZ4F_isError(errorCode)) return errorCode;
                 }
                 break;
             }
 
         case dstage_getCBlockSize:
-            {
-                if ((size_t)(srcEnd - srcPtr) >= BHSize)
-                {
-                    selectedIn = srcPtr;
-                    srcPtr += BHSize;
-                }
-                else
-                {
+            if ((size_t)(srcEnd - srcPtr) >= BHSize) {
+                selectedIn = srcPtr;
+                srcPtr += BHSize;
+            } else {
                 /* not enough input to read cBlockSize field */
-                    dctxPtr->tmpInSize = 0;
-                    dctxPtr->dStage = dstage_storeCBlockSize;
-                }
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->dStage = dstage_storeCBlockSize;
             }
 
             if (dctxPtr->dStage == dstage_storeCBlockSize)
@@ -1135,8 +1084,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
                 memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
                 dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < BHSize) /* not enough input to get full cBlockSize; wait for more */
-                {
+                if (dctxPtr->tmpInSize < BHSize) { /* not enough input to get full cBlockSize; wait for more */
                     nextSrcSizeHint = BHSize - dctxPtr->tmpInSize;
                     doAnotherStage  = 0;
                     break;
@@ -1145,23 +1093,19 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         /* case dstage_decodeCBlockSize: */   /* no more direct access, to prevent scan-build warning */
-            {
-                size_t nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
-                if (nextCBlockSize==0)   /* frameEnd signal, no more CBlock */
-                {
+            {   size_t nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU;
+                if (nextCBlockSize==0) {  /* frameEnd signal, no more CBlock */
                     dctxPtr->dStage = dstage_getSuffix;
                     break;
                 }
                 if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ4F_ERROR_GENERIC;   /* invalid cBlockSize */
                 dctxPtr->tmpInTarget = nextCBlockSize;
-                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG)
-                {
+                if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) {
                     dctxPtr->dStage = dstage_copyDirect;
                     break;
                 }
                 dctxPtr->dStage = dstage_getCBlock;
-                if (dstPtr==dstEnd)
-                {
+                if (dstPtr==dstEnd) {
                     nextSrcSizeHint = nextCBlockSize + BHSize;
                     doAnotherStage = 0;
                 }
@@ -1169,8 +1113,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_copyDirect:   /* uncompressed block */
-            {
-                size_t sizeToCopy = dctxPtr->tmpInTarget;
+            {   size_t sizeToCopy = dctxPtr->tmpInTarget;
                 if ((size_t)(srcEnd-srcPtr) < sizeToCopy) sizeToCopy = srcEnd - srcPtr;  /* not enough input to read full block */
                 if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
                 memcpy(dstPtr, srcPtr, sizeToCopy);
@@ -1183,8 +1126,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
 
                 srcPtr += sizeToCopy;
                 dstPtr += sizeToCopy;
-                if (sizeToCopy == dctxPtr->tmpInTarget)   /* all copied */
-                {
+                if (sizeToCopy == dctxPtr->tmpInTarget) {  /* all copied */
                     dctxPtr->dStage = dstage_getCBlockSize;
                     break;
                 }
@@ -1195,28 +1137,24 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_getCBlock:   /* entry from dstage_decodeCBlockSize */
+            if ((size_t)(srcEnd-srcPtr) < dctxPtr->tmpInTarget)
             {
-                if ((size_t)(srcEnd-srcPtr) < dctxPtr->tmpInTarget)
-                {
-                    dctxPtr->tmpInSize = 0;
-                    dctxPtr->dStage = dstage_storeCBlock;
-                    break;
-                }
-                selectedIn = srcPtr;
-                srcPtr += dctxPtr->tmpInTarget;
-                dctxPtr->dStage = dstage_decodeCBlock;
+                dctxPtr->tmpInSize = 0;
+                dctxPtr->dStage = dstage_storeCBlock;
                 break;
             }
+            selectedIn = srcPtr;
+            srcPtr += dctxPtr->tmpInTarget;
+            dctxPtr->dStage = dstage_decodeCBlock;
+            break;
 
         case dstage_storeCBlock:
-            {
-                size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
+            {   size_t sizeToCopy = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
                 if (sizeToCopy > (size_t)(srcEnd-srcPtr)) sizeToCopy = srcEnd-srcPtr;
                 memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 dctxPtr->tmpInSize += sizeToCopy;
                 srcPtr += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget)  /* need more input */
-                {
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) { /* need more input */
                     nextSrcSizeHint = (dctxPtr->tmpInTarget - dctxPtr->tmpInSize) + BHSize;
                     doAnotherStage=0;
                     break;
@@ -1227,17 +1165,14 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_decodeCBlock:
-            {
-                if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
-                    dctxPtr->dStage = dstage_decodeCBlock_intoTmp;
-                else
-                    dctxPtr->dStage = dstage_decodeCBlock_intoDst;
-                break;
-            }
+            if ((size_t)(dstEnd-dstPtr) < dctxPtr->maxBlockSize)   /* not enough place into dst : decode into tmpOut */
+                dctxPtr->dStage = dstage_decodeCBlock_intoTmp;
+            else
+                dctxPtr->dStage = dstage_decodeCBlock_intoDst;
+            break;
 
         case dstage_decodeCBlock_intoDst:
-            {
-                int (*decoder)(const char*, char*, int, int, const char*, int);
+            {   int (*decoder)(const char*, char*, int, int, const char*, int);
                 int decodedSize;
 
                 if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
@@ -1260,9 +1195,8 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_decodeCBlock_intoTmp:
-            {
-                /* not enough place into dst : decode into tmpOut */
-                int (*decoder)(const char*, char*, int, int, const char*, int);
+            /* not enough place into dst : decode into tmpOut */
+            {   int (*decoder)(const char*, char*, int, int, const char*, int);
                 int decodedSize;
 
                 if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
@@ -1271,19 +1205,14 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
                     decoder = LZ4F_decompress_safe;
 
                 /* ensure enough place for tmpOut */
-                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked)
-                {
-                    if (dctxPtr->dict == dctxPtr->tmpOutBuffer)
-                    {
-                        if (dctxPtr->dictSize > 128 KB)
-                        {
+                if (dctxPtr->frameInfo.blockMode == LZ4F_blockLinked) {
+                    if (dctxPtr->dict == dctxPtr->tmpOutBuffer) {
+                        if (dctxPtr->dictSize > 128 KB) {
                             memcpy(dctxPtr->tmpOutBuffer, dctxPtr->dict + dctxPtr->dictSize - 64 KB, 64 KB);
                             dctxPtr->dictSize = 64 KB;
                         }
                         dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + dctxPtr->dictSize;
-                    }
-                    else   /* dict not within tmp */
-                    {
+                    } else {  /* dict not within tmp */
                         size_t reservedDictSpace = dctxPtr->dictSize;
                         if (reservedDictSpace > 64 KB) reservedDictSpace = 64 KB;
                         dctxPtr->tmpOut = dctxPtr->tmpOutBuffer + reservedDictSpace;
@@ -1302,8 +1231,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_flushOut:  /* flush decoded data from tmpOut to dstBuffer */
-            {
-                size_t sizeToCopy = dctxPtr->tmpOutSize - dctxPtr->tmpOutStart;
+            {   size_t sizeToCopy = dctxPtr->tmpOutSize - dctxPtr->tmpOutStart;
                 if (sizeToCopy > (size_t)(dstEnd-dstPtr)) sizeToCopy = dstEnd-dstPtr;
                 memcpy(dstPtr, dctxPtr->tmpOut + dctxPtr->tmpOutStart, sizeToCopy);
 
@@ -1315,8 +1243,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
                 dstPtr += sizeToCopy;
 
                 /* end of flush ? */
-                if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize)
-                {
+                if (dctxPtr->tmpOutStart == dctxPtr->tmpOutSize) {
                     dctxPtr->dStage = dstage_getCBlockSize;
                     break;
                 }
@@ -1326,23 +1253,18 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_getSuffix:
-            {
-                size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
+            {   size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
                 if (dctxPtr->frameRemainingSize) return (size_t)-LZ4F_ERROR_frameSize_wrong;   /* incorrect frame size decoded */
-                if (suffixSize == 0)   /* frame completed */
-                {
+                if (suffixSize == 0) {  /* frame completed */
                     nextSrcSizeHint = 0;
                     dctxPtr->dStage = dstage_getHeader;
                     doAnotherStage = 0;
                     break;
                 }
-                if ((srcEnd - srcPtr) < 4)   /* not enough size for entire CRC */
-                {
+                if ((srcEnd - srcPtr) < 4) {  /* not enough size for entire CRC */
                     dctxPtr->tmpInSize = 0;
                     dctxPtr->dStage = dstage_storeSuffix;
-                }
-                else
-                {
+                } else {
                     selectedIn = srcPtr;
                     srcPtr += 4;
                 }
@@ -1356,8 +1278,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
                 memcpy(dctxPtr->tmpIn + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
                 dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < 4)  /* not enough input to read complete suffix */
-                {
+                if (dctxPtr->tmpInSize < 4) { /* not enough input to read complete suffix */
                     nextSrcSizeHint = 4 - dctxPtr->tmpInSize;
                     doAnotherStage=0;
                     break;
@@ -1366,9 +1287,8 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         /* case dstage_checkSuffix: */   /* no direct call, to avoid scan-build warning */
-            {
-                U32 readCRC = LZ4F_readLE32(selectedIn);
-                U32 resultCRC = XXH32_digest(&(dctxPtr->xxh));
+            {   U32 const readCRC = LZ4F_readLE32(selectedIn);
+                U32 const resultCRC = XXH32_digest(&(dctxPtr->xxh));
                 if (readCRC != resultCRC) return (size_t)-LZ4F_ERROR_contentChecksum_invalid;
                 nextSrcSizeHint = 0;
                 dctxPtr->dStage = dstage_getHeader;
@@ -1377,19 +1297,14 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_getSFrameSize:
-            {
-                if ((srcEnd - srcPtr) >= 4)
-                {
-                    selectedIn = srcPtr;
-                    srcPtr += 4;
-                }
-                else
-                {
+            if ((srcEnd - srcPtr) >= 4) {
+                selectedIn = srcPtr;
+                srcPtr += 4;
+            } else {
                 /* not enough input to read cBlockSize field */
-                    dctxPtr->tmpInSize = 4;
-                    dctxPtr->tmpInTarget = 8;
-                    dctxPtr->dStage = dstage_storeSFrameSize;
-                }
+                dctxPtr->tmpInSize = 4;
+                dctxPtr->tmpInTarget = 8;
+                dctxPtr->dStage = dstage_storeSFrameSize;
             }
 
             if (dctxPtr->dStage == dstage_storeSFrameSize)
@@ -1400,8 +1315,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
                 memcpy(dctxPtr->header + dctxPtr->tmpInSize, srcPtr, sizeToCopy);
                 srcPtr += sizeToCopy;
                 dctxPtr->tmpInSize += sizeToCopy;
-                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) /* not enough input to get full sBlockSize; wait for more */
-                {
+                if (dctxPtr->tmpInSize < dctxPtr->tmpInTarget) { /* not enough input to get full sBlockSize; wait for more */
                     nextSrcSizeHint = dctxPtr->tmpInTarget - dctxPtr->tmpInSize;
                     doAnotherStage = 0;
                     break;
@@ -1410,8 +1324,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         /* case dstage_decodeSFrameSize: */   /* no direct access */
-            {
-                size_t SFrameSize = LZ4F_readLE32(selectedIn);
+            {   size_t const SFrameSize = LZ4F_readLE32(selectedIn);
                 dctxPtr->frameInfo.contentSize = SFrameSize;
                 dctxPtr->tmpInTarget = SFrameSize;
                 dctxPtr->dStage = dstage_skipSkippable;
@@ -1419,8 +1332,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
             }
 
         case dstage_skipSkippable:
-            {
-                size_t skipSize = dctxPtr->tmpInTarget;
+            {   size_t skipSize = dctxPtr->tmpInTarget;
                 if (skipSize > (size_t)(srcEnd-srcPtr)) skipSize = srcEnd-srcPtr;
                 srcPtr += skipSize;
                 dctxPtr->tmpInTarget -= skipSize;
@@ -1440,8 +1352,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
         &&((unsigned)(dctxPtr->dStage-1) < (unsigned)(dstage_getSuffix-1))
         )
     {
-        if (dctxPtr->dStage == dstage_flushOut)
-        {
+        if (dctxPtr->dStage == dstage_flushOut) {
             size_t preserveSize = dctxPtr->tmpOut - dctxPtr->tmpOutBuffer;
             size_t copySize = 64 KB - dctxPtr->tmpOutSize;
             const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize - dctxPtr->tmpOutStart;
@@ -1452,9 +1363,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
 
             dctxPtr->dict = dctxPtr->tmpOutBuffer;
             dctxPtr->dictSize = preserveSize + dctxPtr->tmpOutStart;
-        }
-        else
-        {
+        } else {
             size_t newDictSize = dctxPtr->dictSize;
             const BYTE* oldDictEnd = dctxPtr->dict + dctxPtr->dictSize;
             if ((newDictSize) > 64 KB) newDictSize = 64 KB;
diff --git a/lib/xxhash.c b/lib/xxhash.c
index 511d994..9238b15 100644
--- a/lib/xxhash.c
+++ b/lib/xxhash.c
@@ -1,49 +1,50 @@
 /*
-xxHash - Fast Hash algorithm
-Copyright (C) 2012-2015, Yann Collet
-
-BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-You can contact the author at :
-- xxHash source repository : https://github.com/Cyan4973/xxHash
+*  xxHash - Fast Hash algorithm
+*  Copyright (C) 2012-2016, Yann Collet
+*
+*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions are
+*  met:
+*
+*  * Redistributions of source code must retain the above copyright
+*  notice, this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above
+*  copyright notice, this list of conditions and the following disclaimer
+*  in the documentation and/or other materials provided with the
+*  distribution.
+*
+*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*  You can contact the author at :
+*  - xxHash homepage: http://www.xxhash.com
+*  - xxHash source repository : https://github.com/Cyan4973/xxHash
 */
 
 
-/**************************************
+/* *************************************
 *  Tuning parameters
-**************************************/
-/* XXH_FORCE_MEMORY_ACCESS
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
  * The below switch allow to select different access method for improved performance.
  * Method 0 (default) : use `memcpy()`. Safe and portable.
  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
- * Method 2 : direct access. This method is portable but violate C standard.
- *            It can generate buggy code on targets which generate assembly depending on alignment.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ *            It can generate buggy code on targets which do not support unaligned memory accesses.
  *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
  * See http://stackoverflow.com/a/32095106/646947 for details.
  * Prefer these methods in priority order (0 > 1 > 2)
@@ -57,14 +58,14 @@ You can contact the author at :
 #  endif
 #endif
 
-/* XXH_ACCEPT_NULL_INPUT_POINTER :
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
  * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
  * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
  * By default, this option is disabled. To enable it, uncomment below define :
  */
 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
 
-/* XXH_FORCE_NATIVE_FORMAT :
+/*!XXH_FORCE_NATIVE_FORMAT :
  * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
  * Results are therefore identical for little-endian and big-endian CPU.
  * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
@@ -72,19 +73,42 @@ You can contact the author at :
  * to improve speed for Big-endian CPU.
  * This option has no impact on Little_Endian CPU.
  */
-#define XXH_FORCE_NATIVE_FORMAT 0
+#ifndef XXH_FORCE_NATIVE_FORMAT   /* can be defined externally */
+#  define XXH_FORCE_NATIVE_FORMAT 0
+#endif
 
-/* XXH_USELESS_ALIGN_BRANCH :
+/*!XXH_FORCE_ALIGN_CHECK :
  * This is a minor performance trick, only useful with lots of very small keys.
- * It means : don't make a test between aligned/unaligned, because performance will be the same.
- * It saves one initial branch per hash.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash; set to 0 when the input data
+ * is guaranteed to be aligned.
  */
-#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-#  define XXH_USELESS_ALIGN_BRANCH 1
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+#  if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+#    define XXH_FORCE_ALIGN_CHECK 0
+#  else
+#    define XXH_FORCE_ALIGN_CHECK 1
+#  endif
 #endif
 
 
-/**************************************
+/* *************************************
+*  Includes & Memory related functions
+***************************************/
+/* Modify the local functions below should you wish to use some other memory routines */
+/* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void  XXH_free  (void* p)  { free(p); }
+/* for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
 *  Compiler Specific Options
 ***************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -103,36 +127,25 @@ You can contact the author at :
 #endif
 
 
-/**************************************
-*  Includes & Memory related functions
-***************************************/
-#include "xxhash.h"
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for malloc(), free() */
-#include <stdlib.h>
-static void* XXH_malloc(size_t s) { return malloc(s); }
-static void  XXH_free  (void* p)  { free(p); }
-/* for memcpy() */
-#include <string.h>
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
-
-
-/**************************************
+/* *************************************
 *  Basic Types
 ***************************************/
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
-# include <stdint.h>
-  typedef uint8_t  BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char      BYTE;
-  typedef unsigned short     U16;
-  typedef unsigned int       U32;
-  typedef   signed int       S32;
-  typedef unsigned long long U64;
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#   include <stdint.h>
+    typedef uint8_t  BYTE;
+    typedef uint16_t U16;
+    typedef uint32_t U32;
+    typedef  int32_t S32;
+    typedef uint64_t U64;
+#  else
+    typedef unsigned char      BYTE;
+    typedef unsigned short     U16;
+    typedef unsigned int       U32;
+    typedef   signed int       S32;
+    typedef unsigned long long U64;
+#  endif
 #endif
 
 
@@ -171,10 +184,10 @@ static U64 XXH_read64(const void* memPtr)
     return val;
 }
 
-#endif // XXH_FORCE_DIRECT_MEMORY_ACCESS
+#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
 
 
-/******************************************
+/* ****************************************
 *  Compiler-specific Functions and Macros
 ******************************************/
 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
@@ -216,19 +229,19 @@ static U64 XXH_swap64 (U64 x)
 #endif
 
 
-/***************************************
+/* *************************************
 *  Architecture Macros
 ***************************************/
 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
 
-/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example one the compiler command line */
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
 #ifndef XXH_CPU_LITTLE_ENDIAN
-    static const int one = 1;
-#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&one))
+    static const int g_one = 1;
+#   define XXH_CPU_LITTLE_ENDIAN   (*(const char*)(&g_one))
 #endif
 
 
-/*****************************
+/* ***************************
 *  Memory reads
 *****************************/
 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
@@ -246,6 +259,11 @@ FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
     return XXH_readLE32_align(ptr, endian, XXH_unaligned);
 }
 
+static U32 XXH_readBE32(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
 FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
 {
     if (align==XXH_unaligned)
@@ -259,32 +277,62 @@ FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
     return XXH_readLE64_align(ptr, endian, XXH_unaligned);
 }
 
+static U64 XXH_readBE64(const void* ptr)
+{
+    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
 
-/***************************************
+/* *************************************
 *  Macros
 ***************************************/
-#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(!!(c)) }; }    /* use only *after* variable declarations */
+#define XXH_STATIC_ASSERT(c)   { enum { XXH_static_assert = 1/(int)(!!(c)) }; }    /* use only *after* variable declarations */
 
 
-/***************************************
+/* *************************************
 *  Constants
 ***************************************/
-#define PRIME32_1   2654435761U
-#define PRIME32_2   2246822519U
-#define PRIME32_3   3266489917U
-#define PRIME32_4    668265263U
-#define PRIME32_5    374761393U
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 =  668265263U;
+static const U32 PRIME32_5 =  374761393U;
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 =  1609587929392839161ULL;
+static const U64 PRIME64_4 =  9650029242287828579ULL;
+static const U64 PRIME64_5 =  2870177450012600261ULL;
 
-#define PRIME64_1 11400714785074694791ULL
-#define PRIME64_2 14029467366897019727ULL
-#define PRIME64_3  1609587929392839161ULL
-#define PRIME64_4  9650029242287828579ULL
-#define PRIME64_5  2870177450012600261ULL
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 
 
-/*****************************
+/* **************************
+*  Utils
+****************************/
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
+{
+    memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+
+/* ***************************
 *  Simple Hash Functions
 *****************************/
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+    seed += input * PRIME32_2;
+    seed  = XXH_rotl32(seed, 13);
+    seed *= PRIME32_1;
+    return seed;
+}
+
 FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
@@ -293,60 +341,40 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH
 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL)
-    {
+    if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)16;
     }
 #endif
 
-    if (len>=16)
-    {
+    if (len>=16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = seed + PRIME32_1 + PRIME32_2;
         U32 v2 = seed + PRIME32_2;
         U32 v3 = seed + 0;
         U32 v4 = seed - PRIME32_1;
 
-        do
-        {
-            v1 += XXH_get32bits(p) * PRIME32_2;
-            v1 = XXH_rotl32(v1, 13);
-            v1 *= PRIME32_1;
-            p+=4;
-            v2 += XXH_get32bits(p) * PRIME32_2;
-            v2 = XXH_rotl32(v2, 13);
-            v2 *= PRIME32_1;
-            p+=4;
-            v3 += XXH_get32bits(p) * PRIME32_2;
-            v3 = XXH_rotl32(v3, 13);
-            v3 *= PRIME32_1;
-            p+=4;
-            v4 += XXH_get32bits(p) * PRIME32_2;
-            v4 = XXH_rotl32(v4, 13);
-            v4 *= PRIME32_1;
-            p+=4;
-        }
-        while (p<=limit);
+        do {
+            v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+            v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+            v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+            v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+        } while (p<=limit);
 
         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
-    }
-    else
-    {
+    } else {
         h32  = seed + PRIME32_5;
     }
 
     h32 += (U32) len;
 
-    while (p+4<=bEnd)
-    {
+    while (p+4<=bEnd) {
         h32 += XXH_get32bits(p) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4 ;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
         p++;
@@ -362,26 +390,24 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH
 }
 
 
-unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH32_state_t state;
-    XXH32_reset(&state, seed);
-    XXH32_update(&state, input, len);
-    return XXH32_digest(&state);
+    XXH32_CREATESTATE_STATIC(state);
+    XXH32_reset(state, seed);
+    XXH32_update(state, input, len);
+    return XXH32_digest(state);
 #else
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-#  if !defined(XXH_USELESS_ALIGN_BRANCH)
-    if ((((size_t)input) & 3) == 0)   /* Input is 4-bytes aligned, leverage the speed benefit */
-    {
-        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-            return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-        else
-            return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }
-#  endif
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
@@ -390,103 +416,77 @@ unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
 #endif
 }
 
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+    acc += input * PRIME64_2;
+    acc  = XXH_rotl64(acc, 31);
+    acc *= PRIME64_1;
+    return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+    val  = XXH64_round(0, val);
+    acc ^= val;
+    acc  = acc * PRIME64_1 + PRIME64_4;
+    return acc;
+}
+
 FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
 {
     const BYTE* p = (const BYTE*)input;
-    const BYTE* bEnd = p + len;
+    const BYTE* const bEnd = p + len;
     U64 h64;
 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
 
 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
-    if (p==NULL)
-    {
+    if (p==NULL) {
         len=0;
         bEnd=p=(const BYTE*)(size_t)32;
     }
 #endif
 
-    if (len>=32)
-    {
+    if (len>=32) {
         const BYTE* const limit = bEnd - 32;
         U64 v1 = seed + PRIME64_1 + PRIME64_2;
         U64 v2 = seed + PRIME64_2;
         U64 v3 = seed + 0;
         U64 v4 = seed - PRIME64_1;
 
-        do
-        {
-            v1 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v1 = XXH_rotl64(v1, 31);
-            v1 *= PRIME64_1;
-            v2 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v2 = XXH_rotl64(v2, 31);
-            v2 *= PRIME64_1;
-            v3 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v3 = XXH_rotl64(v3, 31);
-            v3 *= PRIME64_1;
-            v4 += XXH_get64bits(p) * PRIME64_2;
-            p+=8;
-            v4 = XXH_rotl64(v4, 31);
-            v4 *= PRIME64_1;
-        }
-        while (p<=limit);
+        do {
+            v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+            v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+            v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+            v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+        } while (p<=limit);
 
         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
 
-        v1 *= PRIME64_2;
-        v1 = XXH_rotl64(v1, 31);
-        v1 *= PRIME64_1;
-        h64 ^= v1;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v2 *= PRIME64_2;
-        v2 = XXH_rotl64(v2, 31);
-        v2 *= PRIME64_1;
-        h64 ^= v2;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v3 *= PRIME64_2;
-        v3 = XXH_rotl64(v3, 31);
-        v3 *= PRIME64_1;
-        h64 ^= v3;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-
-        v4 *= PRIME64_2;
-        v4 = XXH_rotl64(v4, 31);
-        v4 *= PRIME64_1;
-        h64 ^= v4;
-        h64 = h64 * PRIME64_1 + PRIME64_4;
-    }
-    else
-    {
+    } else {
         h64  = seed + PRIME64_5;
     }
 
     h64 += (U64) len;
 
-    while (p+8<=bEnd)
-    {
-        U64 k1 = XXH_get64bits(p);
-        k1 *= PRIME64_2;
-        k1 = XXH_rotl64(k1,31);
-        k1 *= PRIME64_1;
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_get64bits(p));
         h64 ^= k1;
-        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
         p+=8;
     }
 
-    if (p+4<=bEnd)
-    {
+    if (p+4<=bEnd) {
         h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
         h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h64 ^= (*p) * PRIME64_5;
         h64 = XXH_rotl64(h64, 11) * PRIME64_1;
         p++;
@@ -502,26 +502,24 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH
 }
 
 
-unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
 {
 #if 0
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH64_state_t state;
-    XXH64_reset(&state, seed);
-    XXH64_update(&state, input, len);
-    return XXH64_digest(&state);
+    XXH64_CREATESTATE_STATIC(state);
+    XXH64_reset(state, seed);
+    XXH64_update(state, input, len);
+    return XXH64_digest(state);
 #else
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
-#  if !defined(XXH_USELESS_ALIGN_BRANCH)
-    if ((((size_t)input) & 7)==0)   /* Input is aligned, let's leverage the speed advantage */
-    {
-        if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
-            return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
-        else
-            return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
-    }
-#  endif
+    if (XXH_FORCE_ALIGN_CHECK) {
+        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
+            if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+                return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+            else
+                return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+    }   }
 
     if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
         return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
@@ -530,53 +528,26 @@ unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed
 #endif
 }
 
-/****************************************************
+
+/* **************************************************
 *  Advanced Hash Functions
 ****************************************************/
 
-/*** Allocation ***/
-typedef struct
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
-    U64 total_len;
-    U32 seed;
-    U32 v1;
-    U32 v2;
-    U32 v3;
-    U32 v4;
-    U32 mem32[4];   /* defined as U32 for alignment */
-    U32 memsize;
-} XXH_istate32_t;
-
-typedef struct
-{
-    U64 total_len;
-    U64 seed;
-    U64 v1;
-    U64 v2;
-    U64 v3;
-    U64 v4;
-    U64 mem64[4];   /* defined as U64 for alignment */
-    U32 memsize;
-} XXH_istate64_t;
-
-
-XXH32_state_t* XXH32_createState(void)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t));   /* A compilation error here means XXH32_state_t is not large enough */
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-XXH64_state_t* XXH64_createState(void)
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
-    XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t));   /* A compilation error here means XXH64_state_t is not large enough */
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
-XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
@@ -585,36 +556,36 @@ XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 
 /*** Hash feed ***/
 
-XXH_errorcode XXH32_reset(XXH32_state_t* state_in, unsigned int seed)
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
 {
-    XXH_istate32_t* state = (XXH_istate32_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME32_1 + PRIME32_2;
-    state->v2 = seed + PRIME32_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME32_1;
-    state->total_len = 0;
-    state->memsize = 0;
+    XXH32_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME32_1 + PRIME32_2;
+    state.v2 = seed + PRIME32_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME32_1;
+    memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
-XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
 {
-    XXH_istate64_t* state = (XXH_istate64_t*) state_in;
-    state->seed = seed;
-    state->v1 = seed + PRIME64_1 + PRIME64_2;
-    state->v2 = seed + PRIME64_2;
-    state->v3 = seed + 0;
-    state->v4 = seed - PRIME64_1;
-    state->total_len = 0;
-    state->memsize = 0;
+    XXH64_state_t state;   /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+    memset(&state, 0, sizeof(state));
+    state.seed = seed;
+    state.v1 = seed + PRIME64_1 + PRIME64_2;
+    state.v2 = seed + PRIME64_2;
+    state.v3 = seed + 0;
+    state.v4 = seed - PRIME64_1;
+    memcpy(statePtr, &state, sizeof(state));
     return XXH_OK;
 }
 
 
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    XXH_istate32_t* state = (XXH_istate32_t *) state_in;
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
@@ -624,67 +595,37 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v
 
     state->total_len += len;
 
-    if (state->memsize + len < 16)   /* fill in tmp buffer */
-    {
+    if (state->memsize + len < 16)  {   /* fill in tmp buffer */
         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
         state->memsize += (U32)len;
         return XXH_OK;
     }
 
-    if (state->memsize)   /* some data left from previous update */
-    {
+    if (state->memsize) {   /* some data left from previous update */
         XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
-        {
-            const U32* p32 = state->mem32;
-            state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v1 = XXH_rotl32(state->v1, 13);
-            state->v1 *= PRIME32_1;
-            p32++;
-            state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v2 = XXH_rotl32(state->v2, 13);
-            state->v2 *= PRIME32_1;
-            p32++;
-            state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v3 = XXH_rotl32(state->v3, 13);
-            state->v3 *= PRIME32_1;
-            p32++;
-            state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
-            state->v4 = XXH_rotl32(state->v4, 13);
-            state->v4 *= PRIME32_1;
-            p32++;
+        {   const U32* p32 = state->mem32;
+            state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+            state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+            state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+            state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
         }
         p += 16-state->memsize;
         state->memsize = 0;
     }
 
-    if (p <= bEnd-16)
-    {
+    if (p <= bEnd-16) {
         const BYTE* const limit = bEnd - 16;
         U32 v1 = state->v1;
         U32 v2 = state->v2;
         U32 v3 = state->v3;
         U32 v4 = state->v4;
 
-        do
-        {
-            v1 += XXH_readLE32(p, endian) * PRIME32_2;
-            v1 = XXH_rotl32(v1, 13);
-            v1 *= PRIME32_1;
-            p+=4;
-            v2 += XXH_readLE32(p, endian) * PRIME32_2;
-            v2 = XXH_rotl32(v2, 13);
-            v2 *= PRIME32_1;
-            p+=4;
-            v3 += XXH_readLE32(p, endian) * PRIME32_2;
-            v3 = XXH_rotl32(v3, 13);
-            v3 *= PRIME32_1;
-            p+=4;
-            v4 += XXH_readLE32(p, endian) * PRIME32_2;
-            v4 = XXH_rotl32(v4, 13);
-            v4 *= PRIME32_1;
-            p+=4;
-        }
-        while (p<=limit);
+        do {
+            v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+            v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+            v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+            v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+        } while (p<=limit);
 
         state->v1 = v1;
         state->v2 = v2;
@@ -692,8 +633,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v
         state->v4 = v4;
     }
 
-    if (p < bEnd)
-    {
+    if (p < bEnd) {
         XXH_memcpy(state->mem32, p, bEnd-p);
         state->memsize = (int)(bEnd-p);
     }
@@ -701,7 +641,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const v
     return XXH_OK;
 }
 
-XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -713,35 +653,29 @@ XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t l
 
 
 
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
+FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
 {
-    const XXH_istate32_t* state = (const XXH_istate32_t*) state_in;
     const BYTE * p = (const BYTE*)state->mem32;
-    const BYTE* bEnd = (const BYTE*)(state->mem32) + state->memsize;
+    const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
     U32 h32;
 
-    if (state->total_len >= 16)
-    {
+    if (state->total_len >= 16) {
         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
-    }
-    else
-    {
-        h32  = state->seed + PRIME32_5;
+    } else {
+        h32 = state->seed + PRIME32_5;
     }
 
     h32 += (U32) state->total_len;
 
-    while (p+4<=bEnd)
-    {
+    while (p+4<=bEnd) {
         h32 += XXH_readLE32(p, endian) * PRIME32_3;
         h32  = XXH_rotl32(h32, 17) * PRIME32_4;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h32 += (*p) * PRIME32_5;
-        h32 = XXH_rotl32(h32, 11) * PRIME32_1;
+        h32  = XXH_rotl32(h32, 11) * PRIME32_1;
         p++;
     }
 
@@ -755,7 +689,7 @@ FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endiane
 }
 
 
-unsigned int XXH32_digest (const XXH32_state_t* state_in)
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -766,9 +700,11 @@ unsigned int XXH32_digest (const XXH32_state_t* state_in)
 }
 
 
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
+
+/* **** XXH64 **** */
+
+FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
 {
-    XXH_istate64_t * state = (XXH_istate64_t *) state_in;
     const BYTE* p = (const BYTE*)input;
     const BYTE* const bEnd = p + len;
 
@@ -778,67 +714,35 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v
 
     state->total_len += len;
 
-    if (state->memsize + len < 32)   /* fill in tmp buffer */
-    {
+    if (state->memsize + len < 32) {  /* fill in tmp buffer */
         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
         state->memsize += (U32)len;
         return XXH_OK;
     }
 
-    if (state->memsize)   /* some data left from previous update */
-    {
+    if (state->memsize) {   /* tmp buffer is full */
         XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
-        {
-            const U64* p64 = state->mem64;
-            state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v1 = XXH_rotl64(state->v1, 31);
-            state->v1 *= PRIME64_1;
-            p64++;
-            state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v2 = XXH_rotl64(state->v2, 31);
-            state->v2 *= PRIME64_1;
-            p64++;
-            state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v3 = XXH_rotl64(state->v3, 31);
-            state->v3 *= PRIME64_1;
-            p64++;
-            state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
-            state->v4 = XXH_rotl64(state->v4, 31);
-            state->v4 *= PRIME64_1;
-            p64++;
-        }
+        state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+        state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+        state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+        state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
         p += 32-state->memsize;
         state->memsize = 0;
     }
 
-    if (p+32 <= bEnd)
-    {
+    if (p+32 <= bEnd) {
         const BYTE* const limit = bEnd - 32;
         U64 v1 = state->v1;
         U64 v2 = state->v2;
         U64 v3 = state->v3;
         U64 v4 = state->v4;
 
-        do
-        {
-            v1 += XXH_readLE64(p, endian) * PRIME64_2;
-            v1 = XXH_rotl64(v1, 31);
-            v1 *= PRIME64_1;
-            p+=8;
-            v2 += XXH_readLE64(p, endian) * PRIME64_2;
-            v2 = XXH_rotl64(v2, 31);
-            v2 *= PRIME64_1;
-            p+=8;
-            v3 += XXH_readLE64(p, endian) * PRIME64_2;
-            v3 = XXH_rotl64(v3, 31);
-            v3 *= PRIME64_1;
-            p+=8;
-            v4 += XXH_readLE64(p, endian) * PRIME64_2;
-            v4 = XXH_rotl64(v4, 31);
-            v4 *= PRIME64_1;
-            p+=8;
-        }
-        while (p<=limit);
+        do {
+            v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+            v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+            v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+            v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+        } while (p<=limit);
 
         state->v1 = v1;
         state->v2 = v2;
@@ -846,8 +750,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v
         state->v4 = v4;
     }
 
-    if (p < bEnd)
-    {
+    if (p < bEnd) {
         XXH_memcpy(state->mem64, p, bEnd-p);
         state->memsize = (int)(bEnd-p);
     }
@@ -855,7 +758,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const v
     return XXH_OK;
 }
 
-XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -867,75 +770,45 @@ XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t l
 
 
 
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
 {
-    const XXH_istate64_t * state = (const XXH_istate64_t *) state_in;
     const BYTE * p = (const BYTE*)state->mem64;
-    const BYTE* bEnd = (const BYTE*)state->mem64 + state->memsize;
+    const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
     U64 h64;
 
-    if (state->total_len >= 32)
-    {
-        U64 v1 = state->v1;
-        U64 v2 = state->v2;
-        U64 v3 = state->v3;
-        U64 v4 = state->v4;
+    if (state->total_len >= 32) {
+        U64 const v1 = state->v1;
+        U64 const v2 = state->v2;
+        U64 const v3 = state->v3;
+        U64 const v4 = state->v4;
 
         h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-
-        v1 *= PRIME64_2;
-        v1 = XXH_rotl64(v1, 31);
-        v1 *= PRIME64_1;
-        h64 ^= v1;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v2 *= PRIME64_2;
-        v2 = XXH_rotl64(v2, 31);
-        v2 *= PRIME64_1;
-        h64 ^= v2;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v3 *= PRIME64_2;
-        v3 = XXH_rotl64(v3, 31);
-        v3 *= PRIME64_1;
-        h64 ^= v3;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-
-        v4 *= PRIME64_2;
-        v4 = XXH_rotl64(v4, 31);
-        v4 *= PRIME64_1;
-        h64 ^= v4;
-        h64 = h64*PRIME64_1 + PRIME64_4;
-    }
-    else
-    {
+        h64 = XXH64_mergeRound(h64, v1);
+        h64 = XXH64_mergeRound(h64, v2);
+        h64 = XXH64_mergeRound(h64, v3);
+        h64 = XXH64_mergeRound(h64, v4);
+    } else {
         h64  = state->seed + PRIME64_5;
     }
 
     h64 += (U64) state->total_len;
 
-    while (p+8<=bEnd)
-    {
-        U64 k1 = XXH_readLE64(p, endian);
-        k1 *= PRIME64_2;
-        k1 = XXH_rotl64(k1,31);
-        k1 *= PRIME64_1;
+    while (p+8<=bEnd) {
+        U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
         h64 ^= k1;
-        h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
+        h64  = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
         p+=8;
     }
 
-    if (p+4<=bEnd)
-    {
+    if (p+4<=bEnd) {
         h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
-        h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+        h64  = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
         p+=4;
     }
 
-    while (p<bEnd)
-    {
+    while (p<bEnd) {
         h64 ^= (*p) * PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+        h64  = XXH_rotl64(h64, 11) * PRIME64_1;
         p++;
     }
 
@@ -949,7 +822,7 @@ FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endiane
 }
 
 
-unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
 {
     XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
 
@@ -960,3 +833,36 @@ unsigned long long XXH64_digest (const XXH64_state_t* state_in)
 }
 
 
+/* **************************
+*  Canonical representation
+****************************/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+*   The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+*   These functions allow transformation of hash result into and from its canonical format.
+*   This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+    memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+    return XXH_readBE32(src);
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+    return XXH_readBE64(src);
+}
diff --git a/lib/xxhash.h b/lib/xxhash.h
index c60aa61..4d7feff 100644
--- a/lib/xxhash.h
+++ b/lib/xxhash.h
@@ -1,7 +1,7 @@
 /*
    xxHash - Extremely Fast Hash algorithm
    Header File
-   Copyright (C) 2012-2015, Yann Collet.
+   Copyright (C) 2012-2016, Yann Collet.
 
    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 
@@ -64,41 +64,67 @@ XXH64       13.8 GB/s            1.9 GB/s
 XXH32        6.8 GB/s            6.0 GB/s
 */
 
-#pragma once
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
 
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
 
-/*****************************
+/* ****************************
 *  Definitions
-*****************************/
+******************************/
 #include <stddef.h>   /* size_t */
 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
 
 
-/*****************************
-*  Namespace Emulation
-*****************************/
-/* Motivations :
-
-If you need to include xxHash into your library,
-but wish to avoid xxHash symbols to be present on your library interface
-in an effort to avoid potential name collision if another library also includes xxHash,
-
-you can use XXH_NAMESPACE, which will automatically prefix any symbol from xxHash
-with the value of XXH_NAMESPACE (so avoid to keep it NULL, and avoid numeric values).
-
-Note that no change is required within the calling program :
-it can still call xxHash functions using their regular name.
-They will be automatically translated by this header.
+/* ****************************
+*  API modifier
+******************************/
+/** XXH_PRIVATE_API
+*   This is useful if you want to include xxhash functions in `static` mode
+*   in order to inline them, and remove their symbol from the public list.
+*   Methodology :
+*     #define XXH_PRIVATE_API
+*     #include "xxhash.h"
+*   `xxhash.c` is automatically included, so the file is still needed,
+*   but it's not useful to compile and link it anymore.
+*/
+#ifdef XXH_PRIVATE_API
+#  ifndef XXH_STATIC_LINKING_ONLY
+#    define XXH_STATIC_LINKING_ONLY
+#  endif
+#  if defined(__GNUC__)
+#    define XXH_PUBLIC_API static __attribute__((unused))
+#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#    define XXH_PUBLIC_API static inline
+#  elif defined(_MSC_VER)
+#    define XXH_PUBLIC_API static __inline
+#  else
+#    define XXH_PUBLIC_API static   /* this version may generate warnings for unused static functions; disable the relevant warning */
+#  endif
+#else
+#  define XXH_PUBLIC_API   /* do nothing */
+#endif /* XXH_PRIVATE_API */
+
+/*!XXH_NAMESPACE, aka Namespace Emulation :
+
+If you want to include _and expose_ xxHash functions from within your own library,
+but also want to avoid symbol collisions with another library which also includes xxHash,
+
+you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
+
+Note that no change is required within the calling program as long as it includes `xxhash.h` :
+regular symbol name will be automatically translated by this header.
 */
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
 #  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
 #  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
 #  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
 #  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
 #  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
@@ -109,84 +135,163 @@ They will be automatically translated by this header.
 #  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
 #  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
 #  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
 #endif
 
 
-/*****************************
+/* *************************************
+*  Version
+***************************************/
+#define XXH_VERSION_MAJOR    0
+#define XXH_VERSION_MINOR    6
+#define XXH_VERSION_RELEASE  1
+#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/* ****************************
 *  Simple Hash Functions
-*****************************/
+******************************/
+typedef unsigned int       XXH32_hash_t;
+typedef unsigned long long XXH64_hash_t;
 
-unsigned int       XXH32 (const void* input, size_t length, unsigned seed);
-unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
 
-/*
+/*!
 XXH32() :
     Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
     The memory between input & input+length must be valid (allocated and read-accessible).
     "seed" can be used to alter the result predictably.
-    This function successfully passes all SMHasher tests.
     Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
 XXH64() :
     Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
-    Faster on 64-bits systems. Slower on 32-bits systems.
+    "seed" can be used to alter the result predictably.
+    This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
 */
 
 
+/* ****************************
+*  Streaming Hash Functions
+******************************/
+typedef struct XXH32_state_s XXH32_state_t;   /* incomplete type */
+typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
 
-/*****************************
-*  Advanced Hash Functions
-*****************************/
-typedef struct { long long ll[ 6]; } XXH32_state_t;
-typedef struct { long long ll[11]; } XXH64_state_t;
+/*! State allocation, compatible with dynamic libraries */
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+
+
+/* hash streaming */
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
 
 /*
-These structures allow static allocation of XXH states.
-States must then be initialized using XXHnn_reset() before first use.
+These functions generate the xxHash of an input provided in multiple segments.
+Note that, for small input, they are slower than single-call functions, due to state management.
+For small input, prefer `XXH32()` and `XXH64()` .
+
+XXH state must first be allocated, using XXH*_createState() .
+
+Start a new hash by initializing state with a seed, using XXH*_reset().
 
-If you prefer dynamic allocation, please refer to functions below.
+Then, feed the hash state by calling XXH*_update() as many times as necessary.
+Obviously, input must be allocated and read accessible.
+The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+
+Finally, a hash value can be produced anytime, by using XXH*_digest().
+This function returns the nn-bits hash as an int or long long.
+
+It's still possible to continue inserting input into the hash state after a digest,
+and generate some new hashes later on, by calling again XXH*_digest().
+
+When done, free XXH state space if it was allocated dynamically.
 */
 
-XXH32_state_t* XXH32_createState(void);
-XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
 
-XXH64_state_t* XXH64_createState(void);
-XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
+/* **************************
+*  Utils
+****************************/
+#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* ! C99 */
+#  define restrict   /* disable restrict */
+#endif
 
-/*
-These functions create and release memory for XXH state.
-States must then be initialized using XXHnn_reset() before first use.
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
+
+
+/* **************************
+*  Canonical representation
+****************************/
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+*  The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+*  These functions allow transformation of hash result into and from its canonical format.
+*  This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
 */
 
 
-XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, unsigned seed);
-XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-unsigned int  XXH32_digest (const XXH32_state_t* statePtr);
+#ifdef XXH_STATIC_LINKING_ONLY
 
-XXH_errorcode      XXH64_reset  (XXH64_state_t* statePtr, unsigned long long seed);
-XXH_errorcode      XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-unsigned long long XXH64_digest (const XXH64_state_t* statePtr);
+/* ================================================================================================
+   This section contains definitions which are not guaranteed to remain stable.
+   They could change in a future version, becoming incompatible with a different version of the library.
+   They shall only be used with static linking.
+=================================================================================================== */
 
-/*
-These functions calculate the xxHash of an input provided in multiple smaller packets,
-as opposed to an input provided as a single block.
+/* These definitions allow allocating XXH state statically (on stack) */
 
-XXH state space must first be allocated, using either static or dynamic method provided above.
+   struct XXH32_state_s {
+       unsigned long long total_len;
+       unsigned seed;
+       unsigned v1;
+       unsigned v2;
+       unsigned v3;
+       unsigned v4;
+       unsigned mem32[4];   /* buffer defined as U32 for alignment */
+       unsigned memsize;
+   };   /* typedef'd to XXH32_state_t */
 
-Start a new hash by initializing state with a seed, using XXHnn_reset().
+   struct XXH64_state_s {
+       unsigned long long total_len;
+       unsigned long long seed;
+       unsigned long long v1;
+       unsigned long long v2;
+       unsigned long long v3;
+       unsigned long long v4;
+       unsigned long long mem64[4];   /* buffer defined as U64 for alignment */
+       unsigned memsize;
+   };   /* typedef'd to XXH64_state_t */
 
-Then, feed the hash state by calling XXHnn_update() as many times as necessary.
-Obviously, input must be valid, meaning allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
 
-Finally, you can produce a hash anytime, by using XXHnn_digest().
-This function returns the final nn-bits hash.
-You can nonetheless continue feeding the hash state with more input,
-and therefore get some new hashes, by calling again XXHnn_digest().
+#  ifdef XXH_PRIVATE_API
+#    include "xxhash.c"   /* include xxhash functions as `static`, for inlining */
+#  endif
 
-When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
-*/
+#endif /* XXH_STATIC_LINKING_ONLY */
 
 
 #if defined (__cplusplus)
 }
 #endif
+
+#endif /* XXHASH_H_5627135585666179 */
diff --git a/programs/frametest.c b/programs/frametest.c
index 46ec030..6f9ee39 100644
--- a/programs/frametest.c
+++ b/programs/frametest.c
@@ -1,6 +1,6 @@
 /*
     frameTest - test tool for lz4frame
-    Copyright (C) Yann Collet 2014-2015
+    Copyright (C) Yann Collet 2014-2016
 
     GPL v2 License
 
@@ -19,11 +19,11 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
     - LZ4 source repository : https://github.com/Cyan4973/lz4
-    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
 */
 
-/**************************************
+/*-************************************
 *  Compiler specific
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -37,13 +37,14 @@
 #endif
 
 
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include <stdlib.h>     /* malloc, free */
 #include <stdio.h>      /* fprintf */
 #include <string.h>     /* strcmp */
 #include "lz4frame_static.h"
+#define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"     /* XXH64 */
 
 /* Use ftime() if gettimeofday() is not available on your target */
diff --git a/programs/fuzzer.c b/programs/fuzzer.c
index f1da1a8..60fd64f 100644
--- a/programs/fuzzer.c
+++ b/programs/fuzzer.c
@@ -1,6 +1,6 @@
 /*
     fuzzer.c - Fuzzer test tool for LZ4
-    Copyright (C) Yann Collet 2012-2015
+    Copyright (C) Yann Collet 2012-2016
 
     GPL v2 License
 
@@ -19,11 +19,11 @@
     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 
     You can contact the author at :
-    - LZ4 source mirror : https://github.com/Cyan4973/lz4
-    - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repo : https://github.com/Cyan4973/lz4
 */
 
-/**************************************
+/*-************************************
 *  Compiler options
 **************************************/
 #ifdef _MSC_VER    /* Visual Studio */
@@ -39,7 +39,7 @@
 #endif
 
 
-/**************************************
+/*-************************************
 *  Includes
 **************************************/
 #include <stdlib.h>
@@ -47,6 +47,7 @@
 #include <string.h>     /* strcmp */
 #include "lz4.h"
 #include "lz4hc.h"
+#define XXH_STATIC_LINKING_ONLY
 #include "xxhash.h"
 
 /* Use ftime() if gettimeofday() is not available on your target */
-- 
cgit v0.12