summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--programs/Makefile3
-rwxr-xr-x[-rw-r--r--]programs/fullbench.c172
2 files changed, 148 insertions, 27 deletions
diff --git a/programs/Makefile b/programs/Makefile
index 543eb7c..076d3bf 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -20,7 +20,7 @@
#
# You can contact the author at :
# - LZ4 source repository : http://code.google.com/p/lz4/
-# - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c
+# - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
# ##########################################################################
# lz4 : Command Line Utility, supporting gzip-like arguments
# lz4c : CLU, supporting also legacy lz4demo arguments
@@ -143,6 +143,7 @@ test-lz4: lz4 datagen
./datagen -g16KB | ./lz4 -9 | ./lz4 -vdq > $(VOID)
./datagen | ./lz4 | ./lz4 -vdq > $(VOID)
./datagen -g6M -p100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID)
+ ./datagen -g17M | ./lz4 -9v | ./lz4 -vdq > $(VOID)
./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID)
./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID)
# test frame concatenation with null-length frame
diff --git a/programs/fullbench.c b/programs/fullbench.c
index b785924..756357a 100644..100755
--- a/programs/fullbench.c
+++ b/programs/fullbench.c
@@ -24,12 +24,12 @@
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
*/
-//**************************************
-// Compiler Options
-//**************************************
-// Disable some Visual warning messages
+/**************************************
+* Compiler Options
+**************************************/
+/* Disable some Visual warning messages */
#define _CRT_SECURE_NO_WARNINGS
-#define _CRT_SECURE_NO_DEPRECATE // VS2005
+#define _CRT_SECURE_NO_DEPRECATE /* VS2005 */
// Unix Large Files support (>4GB)
#if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions
@@ -45,9 +45,9 @@
#endif
-//**************************************
-// Includes
-//**************************************
+/**************************************
+* Includes
+**************************************/
#include <stdlib.h> // malloc
#include <stdio.h> // fprintf, fopen, ftello64
#include <sys/types.h> // stat64
@@ -68,10 +68,10 @@
#include "xxhash.h"
-//**************************************
-// Compiler Options
-//**************************************
-// S_ISREG & gettimeofday() are not supported by MSVC
+/**************************************
+* Compiler Options
+**************************************/
+/* S_ISREG & gettimeofday() are not supported by MSVC */
#if !defined(S_ISREG)
# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
#endif
@@ -82,9 +82,9 @@
#endif
-//**************************************
-// Basic Types
-//**************************************
+/**************************************
+* Basic Types
+**************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
# include <stdint.h>
typedef uint8_t BYTE;
@@ -101,9 +101,9 @@
#endif
-//****************************
-// Constants
-//****************************
+/**************************************
+* Constants
+**************************************/
#define PROGRAM_DESCRIPTION "LZ4 speed analyzer"
#ifndef LZ4_VERSION
# define LZ4_VERSION ""
@@ -122,9 +122,9 @@
#define ALL_DECOMPRESSORS 0
-//**************************************
-// Local structures
-//**************************************
+/**************************************
+* Local structures
+**************************************/
struct chunkParameters
{
U32 id;
@@ -135,9 +135,9 @@ struct chunkParameters
};
-//**************************************
-// MACRO
-//**************************************
+/**************************************
+* MACRO
+**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define PROGRESS(...) no_prompt ? 0 : DISPLAY(__VA_ARGS__)
@@ -251,8 +251,127 @@ static U64 BMK_GetFileSize(char* infilename)
/*********************************************************
- Benchmark function
+* Benchmark function
*********************************************************/
+#ifdef __SSSE3__
+
+#include <tmmintrin.h>
+
+/* Idea proposed by Terje Mathisen */
+static BYTE stepSize16[17] = {16,16,16,15,16,15,12,14,16,9,10,11,12,13,14,15,16};
+static __m128i replicateTable[17] = {
+ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+ {0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1},
+ {0,1,2,0,1,2,0,1,2,0,1,2,0,1,2,0},
+ {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3},
+ {0,1,2,3,4,0,1,2,3,4,0,1,2,3,4,0},
+ {0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3},
+ {0,1,2,3,4,5,6,0,1,2,3,4,5,6,0,1},
+ {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7},
+ {0,1,2,3,4,5,6,7,8,0,1,2,3,4,5,6},
+ {0,1,2,3,4,5,6,7,8,9,0,1,2,3,4,5},
+ {0,1,2,3,4,5,6,7,8,9,10,0,1,2,3,4},
+ {0,1,2,3,4,5,6,7,8,9,10,11,0,1,2,3},
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,0,1,2},
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,13,0,1},
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0},
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}};
+static BYTE stepSize32[17] = {32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30,16};
+static __m128i replicateTable2[17] = {
+ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+ {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0},
+ {0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1},
+ {1,2,0,1,2,0,1,2,0,1,2,0,1,2,0,1},
+ {0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3},
+ {1,2,3,4,0,1,2,3,4,0,1,2,3,4,0,1},
+ {4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1},
+ {2,3,4,5,6,0,1,2,3,4,5,6,0,1,2,3},
+ {0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7},
+ {7,8,0,1,2,3,4,5,6,7,8,0,1,2,3,4},
+ {6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1},
+ {5,6,7,8,9,10,0,1,2,3,4,5,6,7,8,9},
+ {4,5,6,7,8,9,10,11,0,1,2,3,4,5,6,7},
+ {3,4,5,6,7,8,9,10,11,12,0,1,2,3,4,5},
+ {2,3,4,5,6,7,8,9,10,11,12,13,0,1,2,3},
+ {1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,1},
+ {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}};
+
+U32 lz4_decode_sse(BYTE* dest, BYTE* src, U32 srcLength)
+{
+ BYTE* d = dest, *e = src+srcLength;
+ unsigned token, lit_len, mat_len;
+ __m128i a;
+ BYTE* dstore, *msrc;
+
+ if (!srcLength) return 0;
+ goto start;
+
+ do {
+ U32 step;
+ unsigned mat_offset = src[0] + (src[1] << 8);
+ src += 2;
+ msrc = d - mat_offset;
+ if (mat_len == 15) {
+ do {
+ token = *src++;
+ mat_len += token;
+ } while (token == 255);
+ }
+ mat_len += 4;
+
+ dstore = d;
+ d += mat_len;
+
+ if (mat_offset <= 16)
+ { // Bulk store only!
+ __m128i a2;
+ a = _mm_loadu_si128((const __m128i *)msrc);
+ a2 = _mm_shuffle_epi8(a, replicateTable2[mat_offset]);
+ a = _mm_shuffle_epi8(a, replicateTable[mat_offset]);
+ step = stepSize32[mat_offset];
+ do {
+ _mm_storeu_si128((__m128i *)dstore, a);
+ _mm_storeu_si128((__m128i *)(dstore+16), a2);
+ dstore += step;
+ } while (dstore < d);
+ }
+ else
+ {
+ do
+ {
+ a = _mm_loadu_si128((const __m128i *)msrc);
+ _mm_storeu_si128((__m128i *)dstore, a);
+ msrc += sizeof(a);
+ dstore += sizeof(a);
+ } while (dstore < d);
+ }
+start:
+ token = *src++;
+ lit_len = token >> 4;
+ mat_len = token & 15;
+ if (token >= 0xf0) { // lit_len == 15
+ do {
+ token = *src++;
+ lit_len += token;
+ } while (token == 255);
+ }
+ dstore = d;
+ msrc = src;
+ d += lit_len;
+ src += lit_len;
+ do {
+ a = _mm_loadu_si128((const __m128i *)msrc);
+ _mm_storeu_si128((__m128i *)dstore, a);
+ msrc += sizeof(a);
+ dstore += sizeof(a);
+ } while (dstore < d);
+ } while (src < e);
+
+ return (U32)(d-dest);
+}
+#endif // __SSSE3__
+
static int local_LZ4_compress_limitedOutput(const char* in, char* out, int inSize)
{
@@ -345,6 +464,7 @@ static int local_LZ4_saveDictHC(const char* in, char* out, int inSize)
static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize)
{
(void)inSize;
+ //lz4_decode_sse((BYTE*)out, (BYTE*)in, inSize);
LZ4_decompress_fast(in, out, outSize);
return outSize;
}
@@ -677,7 +797,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles)
PROGRESS("%1i- %-29.29s :%10i -> %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000.);
- // CRC Checking
+ /* CRC Checking */
crcDecoded = XXH32(orig_buff, (int)benchedSize, 0);
if (crcOriginal!=crcDecoded) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); exit(1); }
}