summaryrefslogtreecommitdiffstats
path: root/lz4hc_encoder.h
diff options
context:
space:
mode:
Diffstat (limited to 'lz4hc_encoder.h')
-rw-r--r--lz4hc_encoder.h341
1 files changed, 341 insertions, 0 deletions
diff --git a/lz4hc_encoder.h b/lz4hc_encoder.h
new file mode 100644
index 0000000..7efba9c
--- /dev/null
+++ b/lz4hc_encoder.h
@@ -0,0 +1,341 @@
+/*
+ LZ4 HC Encoder - Part of LZ4 HC algorithm
+ Copyright (C) 2011-2013, Yann Collet.
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+
+/* lz4hc_encoder.h must be included into lz4hc.c
+ The objective of this file is to create a single LZ4 compression function source
+ which will be instanciated multiple times with minor variations
+ depending on a set of #define.
+*/
+
+//****************************
+// Check required defines
+//****************************
+
+#ifndef FUNCTION_NAME
+# error "FUNTION_NAME is not defined"
+#endif
+
+
+//****************************
+// Local definitions
+//****************************
+#define COMBINED_NAME_RAW(n1,n2) n1 ## n2
+#define COMBINED_NAME(n1,n2) COMBINED_NAME_RAW(n1,n2)
+#define ENCODE_SEQUENCE_NAME COMBINED_NAME(FUNCTION_NAME,_encodeSequence)
+#ifdef LIMITED_OUTPUT
+# define ENCODE_SEQUENCE(i,o,a,m,r,d) if (ENCODE_SEQUENCE_NAME(i,o,a,m,r,d)) return 0;
+#else
+# define ENCODE_SEQUENCE(i,o,a,m,r,d) ENCODE_SEQUENCE_NAME(i,o,a,m,r)
+#endif
+
+//****************************
+// Function code
+//****************************
+
+forceinline static int ENCODE_SEQUENCE_NAME (
+ const BYTE** ip,
+ BYTE** op,
+ const BYTE** anchor,
+ int matchLength,
+ const BYTE* ref
+#ifdef LIMITED_OUTPUT
+ ,BYTE* oend
+#endif
+ )
+{
+ int length, len;
+ BYTE* token;
+
+ // Encode Literal length
+ length = (int)(*ip - *anchor);
+ token = (*op)++;
+#ifdef LIMITED_OUTPUT
+ if ((*op + length + (2 + 1 + LASTLITERALS) + (length>>8)) > oend) return 1; // Check output limit
+#endif
+ if (length>=(int)RUN_MASK) { *token=(RUN_MASK<<ML_BITS); len = length-RUN_MASK; for(; len > 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; }
+ else *token = (BYTE)(length<<ML_BITS);
+
+ // Copy Literals
+ LZ4_BLINDCOPY(*anchor, *op, length);
+
+ // Encode Offset
+ LZ4_WRITE_LITTLEENDIAN_16(*op,(U16)(*ip-ref));
+
+ // Encode MatchLength
+ length = (int)(matchLength-MINMATCH);
+#ifdef LIMITED_OUTPUT
+ if (*op + (1 + LASTLITERALS) + (length>>8) > oend) return 1; // Check output limit
+#endif
+ if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; }
+ else *token += (BYTE)length;
+
+ // Prepare next loop
+ *ip += matchLength;
+ *anchor = *ip;
+
+ return 0;
+}
+
+
+int COMBINED_NAME(FUNCTION_NAME,ctx) (
+ LZ4HC_Data_Structure* ctx,
+ const char* source,
+ char* dest,
+ int inputSize
+#ifdef LIMITED_OUTPUT
+ ,int maxOutputSize
+#endif
+ )
+{
+ const BYTE* ip = (const BYTE*) source;
+ const BYTE* anchor = ip;
+ const BYTE* const iend = ip + inputSize;
+ const BYTE* const mflimit = iend - MFLIMIT;
+ const BYTE* const matchlimit = (iend - LASTLITERALS);
+
+ BYTE* op = (BYTE*) dest;
+#ifdef LIMITED_OUTPUT
+ BYTE* const oend = op + maxOutputSize;
+#endif
+
+ int ml, ml2, ml3, ml0;
+ const BYTE* ref=NULL;
+ const BYTE* start2=NULL;
+ const BYTE* ref2=NULL;
+ const BYTE* start3=NULL;
+ const BYTE* ref3=NULL;
+ const BYTE* start0;
+ const BYTE* ref0;
+
+ ip++;
+
+ // Main Loop
+ while (ip < mflimit)
+ {
+ ml = LZ4HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref));
+ if (!ml) { ip++; continue; }
+
+ // saved, in case we would skip too much
+ start0 = ip;
+ ref0 = ref;
+ ml0 = ml;
+
+_Search2:
+ if (ip+ml < mflimit)
+ ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2);
+ else ml2 = ml;
+
+ if (ml2 == ml) // No better match
+ {
+ ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+ continue;
+ }
+
+ if (start0 < ip)
+ {
+ if (start2 < ip + ml0) // empirical
+ {
+ ip = start0;
+ ref = ref0;
+ ml = ml0;
+ }
+ }
+
+ // Here, start0==ip
+ if ((start2 - ip) < 3) // First Match too small : removed
+ {
+ ml = ml2;
+ ip = start2;
+ ref =ref2;
+ goto _Search2;
+ }
+
+_Search3:
+ // Currently we have :
+ // ml2 > ml1, and
+ // ip1+3 <= ip2 (usually < ip1+ml1)
+ if ((start2 - ip) < OPTIMAL_ML)
+ {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML;
+ if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0)
+ {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ // Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18)
+
+ if (start2 + ml2 < mflimit)
+ ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3);
+ else ml3 = ml2;
+
+ if (ml3 == ml2) // No better match : 2 sequences to encode
+ {
+ // ip & ref are known; Now for ml
+ if (start2 < ip+ml) ml = (int)(start2 - ip);
+ // Now, encode 2 sequences
+ ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+ ip = start2;
+ ENCODE_SEQUENCE(&ip, &op, &anchor, ml2, ref2, oend);
+ continue;
+ }
+
+ if (start3 < ip+ml+3) // Not enough space for match 2 : remove it
+ {
+ if (start3 >= (ip+ml)) // can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1
+ {
+ if (start2 < ip+ml)
+ {
+ int correction = (int)(ip+ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH)
+ {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _Search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _Search3;
+ }
+
+ // OK, now we have 3 ascending matches; let's write at least the first one
+ // ip & ref are known; Now for ml
+ if (start2 < ip+ml)
+ {
+ if ((start2 - ip) < (int)ML_MASK)
+ {
+ int correction;
+ if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0)
+ {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ else
+ {
+ ml = (int)(start2 - ip);
+ }
+ }
+ ENCODE_SEQUENCE(&ip, &op, &anchor, ml, ref, oend);
+
+ ip = start2;
+ ref = ref2;
+ ml = ml2;
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+
+ goto _Search3;
+
+ }
+
+ // Encode Last Literals
+ {
+ int lastRun = (int)(iend - anchor);
+#ifdef LIMITED_OUTPUT
+ if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit
+#endif
+ if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+ else *op++ = (BYTE)(lastRun<<ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend-anchor;
+ }
+
+ // End
+ return (int) (((char*)op)-dest);
+}
+
+
+int FUNCTION_NAME (const char* source,
+ char* dest,
+ int inputSize
+#ifdef LIMITED_OUTPUT
+ ,int maxOutputSize
+#endif
+ )
+{
+ void* ctx = LZ4HC_create((const BYTE*)source);
+#ifdef LIMITED_OUTPUT
+ int result = COMBINED_NAME(FUNCTION_NAME,ctx) (ctx, source, dest, inputSize, maxOutputSize);
+#else
+ int result = COMBINED_NAME(FUNCTION_NAME,ctx) (ctx, source, dest, inputSize);
+#endif
+ LZ4HC_free (&ctx);
+
+ return result;
+}
+
+
+//****************************
+// Clean defines
+//****************************
+
+// Required defines
+#undef FUNCTION_NAME
+
+// Locally Generated
+#undef ENCODE_SEQUENCE
+
+// Optional defines
+#ifdef LIMITED_OUTPUT
+#undef LIMITED_OUTPUT
+#endif
+
+