3 files changed, 93 insertions, 50 deletions
diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h
index 1899f8e..a59b94b 100644
--- a/lib/lz4frame_static.h
+++ b/lib/lz4frame_static.h
@@ -43,7 +43,15 @@ extern "C" {
 /* lz4frame_static.h should be used solely in the context of static linking.
  * It contains definitions which are not stable and may change in the future.
  * Never use it in the context of DLL linking.
+ *
+ * Defining LZ4F_PUBLISH_STATIC_FUNCTIONS allows one to override this. Use at
+ * your own risk.
  */
+#ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS
+#define LZ4FLIB_STATIC_API LZ4FLIB_API
+#else
+#define LZ4FLIB_STATIC_API
+#endif
 
 
 /* ---   Dependency   --- */
@@ -79,7 +87,7 @@ extern "C" {
 /* enum list is exposed, to handle specific errors */
 typedef enum { LZ4F_LIST_ERRORS(LZ4F_GENERATE_ENUM) } LZ4F_errorCodes;
 
-LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
+LZ4FLIB_STATIC_API LZ4F_errorCodes LZ4F_getErrorCode(size_t functionResult);
 
 
 
@@ -93,8 +101,8 @@ typedef struct LZ4F_CDict_s LZ4F_CDict;
  *  LZ4_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
  *  LZ4_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
  * `dictBuffer` can be released after LZ4_CDict creation, since its content is copied within CDict */
-LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
-void        LZ4F_freeCDict(LZ4F_CDict* CDict);
+LZ4FLIB_STATIC_API LZ4F_CDict* LZ4F_createCDict(const void* dictBuffer, size_t dictSize);
+LZ4FLIB_STATIC_API void        LZ4F_freeCDict(LZ4F_CDict* CDict);
 
 
 /*! LZ4_compressFrame_usingCDict() :
@@ -106,10 +114,11 @@ void        LZ4F_freeCDict(LZ4F_CDict* CDict);
  *  but it's not recommended, as it's the only way to provide dictID in the frame header.
  * @return : number of bytes written into dstBuffer.
  *           or an error code if it fails (can be tested using LZ4F_isError()) */
-size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
-                               const void* src, size_t srcSize,
-                               const LZ4F_CDict* cdict,
-                               const LZ4F_preferences_t* preferencesPtr);
+LZ4FLIB_STATIC_API size_t LZ4F_compressFrame_usingCDict(
+    void* dst, size_t dstCapacity,
+    const void* src, size_t srcSize,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* preferencesPtr);
 
 
 /*! LZ4F_compressBegin_usingCDict() :
@@ -119,21 +128,23 @@ size_t LZ4F_compressFrame_usingCDict(void* dst, size_t dstCapacity,
  *  however, it's the only way to provide dictID in the frame header.
  * @return : number of bytes written into dstBuffer for the header,
  *           or an error code (which can be tested using LZ4F_isError()) */
-size_t LZ4F_compressBegin_usingCDict(LZ4F_cctx* cctx,
-                                     void* dstBuffer, size_t dstCapacity,
-                                     const LZ4F_CDict* cdict,
-                                     const LZ4F_preferences_t* prefsPtr);
+LZ4FLIB_STATIC_API size_t LZ4F_compressBegin_usingCDict(
+    LZ4F_cctx* cctx,
+    void* dstBuffer, size_t dstCapacity,
+    const LZ4F_CDict* cdict,
+    const LZ4F_preferences_t* prefsPtr);
 
 
 /*! LZ4F_decompress_usingDict() :
  *  Same as LZ4F_decompress(), using a predefined dictionary.
  *  Dictionary is used "in place", without any preprocessing.
  *  It must remain accessible throughout the entire frame decoding. */
-size_t LZ4F_decompress_usingDict(LZ4F_dctx* dctxPtr,
-                       void* dstBuffer, size_t* dstSizePtr,
-                       const void* srcBuffer, size_t* srcSizePtr,
-                       const void* dict, size_t dictSize,
-                       const LZ4F_decompressOptions_t* decompressOptionsPtr);
+LZ4FLIB_STATIC_API size_t LZ4F_decompress_usingDict(
+    LZ4F_dctx* dctxPtr,
+    void* dstBuffer, size_t* dstSizePtr,
+    const void* srcBuffer, size_t* srcSizePtr,
+    const void* dict, size_t dictSize,
+    const LZ4F_decompressOptions_t* decompressOptionsPtr);
 
 
 #if defined (__cplusplus)
diff --git a/lib/lz4hc.c b/lib/lz4hc.c
index 388eb40..f2c2566 100644
--- a/lib/lz4hc.c
+++ b/lib/lz4hc.c
@@ -191,7 +191,8 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
     int longest,
     const BYTE** matchpos,
     const BYTE** startpos,
-    const int maxNbAttempts)
+    const int maxNbAttempts,
+    const int patternAnalysis)
 {
     U16* const chainTable = hc4->chainTable;
     U32* const HashTable = hc4->hashTable;
@@ -264,7 +265,7 @@ LZ4_FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch (
 
         {   U32 const nextOffset = DELTANEXTU16(chainTable, matchIndex);
             matchIndex -= nextOffset;
-            if (nextOffset==1) {
+            if (patternAnalysis && nextOffset==1) {
                 /* may be a repeated pattern */
                 if (repeat == rep_untested) {
                     if ( ((pattern & 0xFFFF) == (pattern >> 16))
@@ -299,13 +300,14 @@ LZ4_FORCE_INLINE
 int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4,   /* Index table will be updated */
                                  const BYTE* const ip, const BYTE* const iLimit,
                                  const BYTE** matchpos,
-                                 const int maxNbAttempts)
+                                 const int maxNbAttempts,
+                                 const int patternAnalysis)
 {
     const BYTE* uselessPtr = ip;
     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
      * but this won't be the case here, as we define iLowLimit==ip,
      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
-    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts);
+    return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis);
 }
 
 
@@ -403,6 +405,7 @@ static int LZ4HC_compress_hashChain (
     )
 {
     const int inputSize = *srcSizePtr;
+    const int patternAnalysis = (maxNbAttempts > 64);   /* levels 8+ */
 
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
@@ -425,15 +428,12 @@ static int LZ4HC_compress_hashChain (
 
     /* init */
     *srcSizePtr = 0;
-    if (limit == limitedDestSize && maxOutputSize < 1) return 0;         /* Impossible to store anything */
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;              /* Unsupported input size, too large (or negative) */
-
-    if (limit == limitedDestSize) oend -= LASTLITERALS;                  /* Hack for support limitations LZ4 decompressor */
+    if (limit == limitedDestSize) oend -= LASTLITERALS;                  /* Hack for support LZ4 format restriction */
     if (inputSize < LZ4_minLength) goto _last_literals;                  /* Input too small, no compression (all literals) */
 
     /* Main Loop */
     while (ip < mflimit) {
-        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts);
+        ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis);
         if (ml<MINMATCH) { ip++; continue; }
 
         /* saved, in case we would skip too much */
@@ -443,7 +443,9 @@ static int LZ4HC_compress_hashChain (
 
 _Search2:
         if (ip+ml < mflimit)
-            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2, maxNbAttempts);
+            ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            ip + ml - 2, ip + 0, matchlimit, ml, &ref2, &start2,
+                            maxNbAttempts, patternAnalysis);
         else
             ml2 = ml;
 
@@ -488,7 +490,9 @@ _Search3:
         /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */
 
         if (start2 + ml2 < mflimit)
-            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts);
+            ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
+                            start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3,
+                            maxNbAttempts, patternAnalysis);
         else
             ml3 = ml2;
 
@@ -641,11 +645,12 @@ static int LZ4HC_compress_generic (
         { lz4opt,8192, LZ4_OPT_NUM },  /* 12==LZ4HC_CLEVEL_MAX */
     };
 
+    if (limit == limitedDestSize && dstCapacity < 1) return 0;         /* Impossible to store anything */
+    if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0;          /* Unsupported input size (too large or negative) */
+
     ctx->end += *srcSizePtr;
     if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT;   /* note : convention is different from lz4frame, maybe something to review */
     cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel);
-    if (limit == limitedDestSize)
-        cLevel = MIN(LZ4HC_CLEVEL_OPT_MIN-1, cLevel);  /* no limitedDestSize variant for lz4opt */
     assert(cLevel >= 0);
     assert(cLevel <= LZ4HC_CLEVEL_MAX);
     {   cParams_t const cParam = clTable[cLevel];
@@ -655,8 +660,8 @@ static int LZ4HC_compress_generic (
                                 cParam.nbSearches, limit);
         assert(cParam.strat == lz4opt);
         return LZ4HC_compress_optimal(ctx,
-                            src, dst, *srcSizePtr, dstCapacity, limit,
-                            cParam.nbSearches, cParam.targetLength,
+                            src, dst, srcSizePtr, dstCapacity,
+                            cParam.nbSearches, cParam.targetLength, limit,
                             cLevel == LZ4HC_CLEVEL_MAX);  /* ultra mode */
     }
 }
diff --git a/lib/lz4opt.h b/lib/lz4opt.h
index 9917851..5a8438c 100644
--- a/lib/lz4opt.h
+++ b/lib/lz4opt.h
@@ -85,7 +85,9 @@ LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
     /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
      * but this won't be the case here, as we define iLowLimit==ip,
      * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
-    int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches);
+    int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx,
+                                ip, ip, iHighLimit, minLen, &matchPtr, &ip,
+                                nbSearches, 1 /* patternAnalysis */);
     if (matchLength <= minLen) return match;
     match.len = matchLength;
     match.off = (int)(ip-matchPtr);
@@ -97,11 +99,11 @@ static int LZ4HC_compress_optimal (
     LZ4HC_CCtx_internal* ctx,
     const char* const source,
     char* dst,
-    int inputSize,
+    int* srcSizePtr,
     int dstCapacity,
-    limitedOutput_directive limit,
     int const nbSearches,
     size_t sufficient_len,
+    limitedOutput_directive limit,
     int const fullUpdate
     )
 {
@@ -110,14 +112,17 @@ static int LZ4HC_compress_optimal (
 
     const BYTE* ip = (const BYTE*) source;
     const BYTE* anchor = ip;
-    const BYTE* const iend = ip + inputSize;
+    const BYTE* const iend = ip + *srcSizePtr;
     const BYTE* const mflimit = iend - MFLIMIT;
     const BYTE* const matchlimit = iend - LASTLITERALS;
     BYTE* op = (BYTE*) dst;
-    BYTE* const oend = op + dstCapacity;
+    BYTE* opSaved = (BYTE*) dst;
+    BYTE* oend = op + dstCapacity;
 
     /* init */
     DEBUGLOG(5, "LZ4HC_compress_optimal");
+    *srcSizePtr = 0;
+    if (limit == limitedDestSize) oend -= LASTLITERALS;   /* Hack for support LZ4 format restriction */
     if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
 
     /* Main Loop */
@@ -134,8 +139,9 @@ static int LZ4HC_compress_optimal (
             /* good enough solution : immediate encoding */
             int const firstML = firstMatch.len;
             const BYTE* const matchPos = ip - firstMatch.off;
+            opSaved = op;
             if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) )   /* updates ip, op and anchor */
-                return 0;  /* error */
+                goto _dest_overflow;
             continue;
         }
 
@@ -304,26 +310,47 @@ encode: /* cur, last_match_pos, best_mlen, best_off must be set */
                 rPos += ml;
                 assert(ml >= MINMATCH);
                 assert((offset >= 1) && (offset <= MAX_DISTANCE));
+                opSaved = op;
                 if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) )   /* updates ip, op and anchor */
-                    return 0;  /* error */
+                    goto _dest_overflow;
         }   }
     }  /* while (ip < mflimit) */
 
+_last_literals:
     /* Encode Last Literals */
-    {   int lastRun = (int)(iend - anchor);
-        if ( (limit)
-          && (((char*)op - dst) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)dstCapacity))
-            return 0;  /* Check output limit */
-        if (lastRun >= (int)RUN_MASK) {
-            *op++=(RUN_MASK<<ML_BITS);
-            lastRun-=RUN_MASK;
-            for (; lastRun > 254 ; lastRun-=255) *op++ = 255;
-            *op++ = (BYTE) lastRun;
-        } else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
+    {   size_t lastRunSize = (size_t)(iend - anchor);  /* literals */
+        size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+        size_t const totalSize = 1 + litLength + lastRunSize;
+        if (limit == limitedDestSize) oend += LASTLITERALS;  /* restore correct value */
+        if (limit && (op + totalSize > oend)) {
+            if (limit == limitedOutput) return 0;  /* Check output limit */
+            /* adapt lastRunSize to fill 'dst' */
+            lastRunSize  = (size_t)(oend - op) - 1;
+            litLength = (lastRunSize + 255 - RUN_MASK) / 255;
+            lastRunSize -= litLength;
+        }
+        ip = anchor + lastRunSize;
+
+        if (lastRunSize >= RUN_MASK) {
+            size_t accumulator = lastRunSize - RUN_MASK;
+            *op++ = (RUN_MASK << ML_BITS);
+            for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255;
+            *op++ = (BYTE) accumulator;
+        } else {
+            *op++ = (BYTE)(lastRunSize << ML_BITS);
+        }
+        memcpy(op, anchor, lastRunSize);
+        op += lastRunSize;
     }
 
     /* End */
+    *srcSizePtr = (int) (((const char*)ip) - source);
     return (int) ((char*)op-dst);
+
+_dest_overflow:
+    if (limit == limitedDestSize) {
+        op = opSaved;  /* restore correct out pointer */
+        goto _last_literals;
+    }
+    return 0;
 }