1 files changed, 274 insertions, 40 deletions
diff --git a/lz4hc.c b/lz4hc.c
index 8f2b422..79fd3f7 100644
--- a/lz4hc.c
+++ b/lz4hc.c
@@ -31,11 +31,6 @@
    - LZ4 source repository : http://code.google.com/p/lz4/
 */
 
-/*
-Note : this source file requires "lz4hc_encoder.h"
-*/
-
-
 //**************************************
 // Memory routines
 //**************************************
@@ -100,7 +95,7 @@ Note : this source file requires "lz4hc_encoder.h"
 #endif
 
 #ifdef _MSC_VER    // Visual Studio
-#  define forceinline static __forceinline
+#  define FORCE_INLINE static __forceinline
 #  include <intrin.h>                    // For Visual 2005
 #  if LZ4_ARCH64   // 64-bits
 #    pragma intrinsic(_BitScanForward64) // For Visual 2005
@@ -113,9 +108,9 @@ Note : this source file requires "lz4hc_encoder.h"
 #  pragma warning(disable : 4701)        // disable: C4701: potentially uninitialized local variable used
 #else 
 #  ifdef __GNUC__
-#    define forceinline static inline __attribute__((always_inline))
+#    define FORCE_INLINE static inline __attribute__((always_inline))
 #  else
-#    define forceinline static inline
+#    define FORCE_INLINE static inline
 #  endif
 #endif
 
@@ -273,7 +268,7 @@ typedef struct
 //**************************************
 #if LZ4_ARCH64
 
-forceinline int LZ4_NbCommonBytes (register U64 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -305,7 +300,7 @@ forceinline int LZ4_NbCommonBytes (register U64 val)
 
 #else
 
-forceinline int LZ4_NbCommonBytes (register U32 val)
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
 {
 #if defined(LZ4_BIG_ENDIAN)
 #  if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
@@ -337,7 +332,7 @@ forceinline int LZ4_NbCommonBytes (register U32 val)
 #endif
 
 
-forceinline int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
+FORCE_INLINE int LZ4_InitHC (LZ4HC_Data_Structure* hc4, const BYTE* base)
 {
     MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable));
     MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
@@ -365,7 +360,7 @@ int LZ4_freeHC (void* LZ4HC_Data)
 
 
 // Update chains up to ip (excluded)
-forceinline void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
+FORCE_INLINE void LZ4HC_Insert (LZ4HC_Data_Structure* hc4, const BYTE* ip)
 {
     U16*   chainTable = hc4->chainTable;
     HTYPE* HashTable  = hc4->hashTable;
@@ -403,7 +398,7 @@ char* LZ4_slideInputBufferHC(void* LZ4HC_Data)
 }
 
 
-forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
+FORCE_INLINE size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYTE* const matchlimit)
 {
     const BYTE* p1t = p1;
 
@@ -421,7 +416,7 @@ forceinline size_t LZ4HC_CommonLength (const BYTE* p1, const BYTE* p2, const BYT
 }
 
 
-forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
+FORCE_INLINE int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* const matchlimit, const BYTE** matchpos)
 {
     U16* const chainTable = hc4->chainTable;
     HTYPE* const HashTable = hc4->hashTable;
@@ -489,7 +484,7 @@ forceinline int LZ4HC_InsertAndFindBestMatch (LZ4HC_Data_Structure* hc4, const B
 }
 
 
-forceinline int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
+FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (LZ4HC_Data_Structure* hc4, const BYTE* ip, const BYTE* startLimit, const BYTE* matchlimit, int longest, const BYTE** matchpos, const BYTE** startpos)
 {
     U16* const  chainTable = hc4->chainTable;
     HTYPE* const HashTable = hc4->hashTable;
@@ -548,37 +543,276 @@ _endCount:
 }
 
 
+typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
 
-//**************************************
-// Compression functions
-//**************************************
+FORCE_INLINE int LZ4HC_encodeSequence (
+                       const BYTE** ip,
+                       BYTE** op,
+                       const BYTE** anchor,
+                       int matchLength,
+                       const BYTE* ref,
+                       limitedOutput_directive limitedOutputBuffer,
+                       BYTE* oend)
+{
+    int length;
+    BYTE* token;
 
-/*
-int LZ4_compressHC(
-                 const char* source,
-                 char* dest,
-                 int inputSize)
+    // Encode Literal length
+    length = (int)(*ip - *anchor);
+    token = (*op)++;
+    if ((limitedOutputBuffer) && ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend)) return 1;   // Check output limit
+    if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255;  *(*op)++ = (BYTE)len; } 
+    else *token = (BYTE)(length<<ML_BITS);
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest'.
-Destination buffer must be already allocated, and sized at a minimum of LZ4_compressBound(inputSize).
-return : the number of bytes written in buffer 'dest'
-*/
-#define FUNCTION_NAME LZ4_compressHC
-#include "lz4hc_encoder.h"
+    // Copy Literals
+    LZ4_BLINDCOPY(*anchor, *op, length);
 
+    // Encode Offset
+    LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
 
-/*
-int LZ4_compressHC_limitedOutput(
-                 const char* source,
+    // Encode MatchLength
+    length = (int)(matchLength-MINMATCH);
+    if ((limitedOutputBuffer) && (*op + (1 + LASTLITERALS) + (length>>8) > oend)) return 1;   // Check output limit
+    if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } 
+    else *token += (BYTE)(length);	
+
+    // Prepare next loop
+    *ip += matchLength;
+    *anchor = *ip; 
+
+    return 0;
+}
+
+
+static int LZ4HC_compress_generic (
+                 void* ctxvoid,
+                 const char* source, 
                  char* dest,
                  int inputSize,
-                 int maxOutputSize)
+                 int maxOutputSize,
+                 limitedOutput_directive limit
+                )
+{
+    LZ4HC_Data_Structure* ctx = (LZ4HC_Data_Structure*) ctxvoid;
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* anchor = ip;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    int   ml, ml2, ml3, ml0;
+    const BYTE* ref=NULL;
+    const BYTE* start2=NULL;
+    const BYTE* ref2=NULL;
+    const BYTE* start3=NULL;
+    const BYTE* ref3=NULL;
+    const BYTE* start0;
+    const BYTE* ref0;
+
+
+    // Ensure blocks follow each other
+    if (ip != ctx->end) return 0;
+    ctx->end += inputSize;
+
+    ip++;
+
+    // Main Loop
+    while (ip < mflimit)
+    {
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+        if (!ml) { ip++; continue; }
 
-Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-If it cannot achieve it, compression will stop, and result of the function will be zero.
-return : the number of bytes written in buffer 'dest', or 0 if the compression fails
-*/
-#define FUNCTION_NAME LZ4_compressHC_limitedOutput
-#define LIMITED_OUTPUT
-#include "lz4hc_encoder.h"
+        // saved, in case we would skip too much
+        start0 = ip;
+        ref0 = ref;
+        ml0 = ml;
+
+_Search2:
+        if (ip+ml < mflimit)
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
+        else ml2 = ml;
+
+        if (ml2 == ml)  // No better match
+        {
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start0 < ip)
+        {
+            if (start2 < ip + ml0)   // empirical
+            {
+                ip = start0;
+                ref = ref0;
+                ml = ml0;
+            }
+        }
+
+        // Here, start0==ip
+        if ((start2 - ip) < 3)   // First Match too small : removed
+        {
+            ml = ml2;
+            ip = start2;
+            ref =ref2;
+            goto _Search2;
+        }
+
+_Search3:
+        // Currently we have :
+        // ml2 > ml1, and
+        // ip1+3 <= ip2 (usually < ip1+ml1)
+        if ((start2 - ip) < OPTIMAL_ML)
+        {
+            int correction;
+            int new_ml = ml;
+            if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+            if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+            correction = new_ml - (int)(start2 - ip);
+            if (correction > 0)
+            {
+                start2 += correction;
+                ref2 += correction;
+                ml2 -= correction;
+            }
+        }
+        // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
+
+        if (start2 + ml2 < mflimit)
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
+        else ml3 = ml2;
+
+        if (ml3 == ml2) // No better match : 2 sequences to encode
+        {
+            // ip & ref are known; Now for ml
+            if (start2 < ip+ml)  ml = (int)(start2 - ip);
+            // Now, encode 2 sequences
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+            ip = start2;
+            if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0;
+            continue;
+        }
+
+        if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
+        {
+            if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+            {
+                if (start2 < ip+ml)
+                {
+                    int correction = (int)(ip+ml - start2);
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                    if (ml2 < MINMATCH)
+                    {
+                        start2 = start3;
+                        ref2 = ref3;
+                        ml2 = ml3;
+                    }
+                }
+
+                if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+                ip  = start3;
+                ref = ref3;
+                ml  = ml3;
+
+                start0 = start2;
+                ref0 = ref2;
+                ml0 = ml2;
+                goto _Search2;
+            }
+
+            start2 = start3;
+            ref2 = ref3;
+            ml2 = ml3;
+            goto _Search3;
+        }
+
+        // OK, now we have 3 ascending matches; let's write at least the first one
+        // ip & ref are known; Now for ml
+        if (start2 < ip+ml)
+        {
+            if ((start2 - ip) < (int)ML_MASK)
+            {
+                int correction;
+                if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+                if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+                correction = ml - (int)(start2 - ip);
+                if (correction > 0)
+                {
+                    start2 += correction;
+                    ref2 += correction;
+                    ml2 -= correction;
+                }
+            }
+            else
+            {
+                ml = (int)(start2 - ip);
+            }
+        }
+        if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0;
+
+        ip = start2;
+        ref = ref2;
+        ml = ml2;
+
+        start2 = start3;
+        ref2 = ref3;
+        ml2 = ml3;
+
+        goto _Search3;
+
+    }
+
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;  // Check output limit
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } 
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    } 
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compressHC(const char* source, char* dest, int inputSize)
+{
+    void* ctx = LZ4_createHC(source);
+    int result;
+    if (ctx==NULL) return 0;
+
+    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, 0, noLimit);
+
+    LZ4_freeHC(ctx);
+    return result;
+}
+
+int LZ4_compressHC_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, 0, noLimit);
+}
+
+
+int LZ4_compressHC_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    void* ctx = LZ4_createHC(source);
+    int result;
+    if (ctx==NULL) return 0;
+
+    result = LZ4HC_compress_generic (ctx, source, dest, inputSize, maxOutputSize, limitedOutput);
+
+    LZ4_freeHC(ctx);
+    return result;
+}
+
+int LZ4_compressHC_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4HC_compress_generic (LZ4HC_Data, source, dest, inputSize, maxOutputSize, limitedOutput);
+}