summaryrefslogtreecommitdiffstats
path: root/programs
diff options
context:
space:
mode:
authorYann Collet <yann.collet.73@gmail.com>2015-03-09 23:12:51 (GMT)
committerYann Collet <yann.collet.73@gmail.com>2015-03-09 23:12:51 (GMT)
commit6b0c39b839b8343da195252a8c46e6d93138f3b8 (patch)
tree6176b68da7f6d7b6f07d613943b96cbae3d85008 /programs
parente277511ab65ad2482fecf70b8ad3665a8caa200a (diff)
downloadlz4-6b0c39b839b8343da195252a8c46e6d93138f3b8.zip
lz4-6b0c39b839b8343da195252a8c46e6d93138f3b8.tar.gz
lz4-6b0c39b839b8343da195252a8c46e6d93138f3b8.tar.bz2
Updated datagen (can create sparse files)
Diffstat (limited to 'programs')
-rw-r--r--programs/Makefile5
-rw-r--r--programs/datagen.c327
-rw-r--r--programs/datagen.h40
-rw-r--r--programs/datagencli.c190
4 files changed, 365 insertions, 197 deletions
diff --git a/programs/Makefile b/programs/Makefile
index 1070f40..2c883d0 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -92,7 +92,7 @@ frametest: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxha
frametest32: $(LZ4DIR)/lz4frame.c $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/xxhash.c frametest.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
-datagen : datagen.c
+datagen : datagen.c datagencli.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
@@ -142,7 +142,7 @@ test-travis: $(TRAVIS_TARGET)
test-lz4: lz4 datagen
./datagen -g16KB | ./lz4 -9 | ./lz4 -vdq > $(VOID)
./datagen | ./lz4 | ./lz4 -vdq > $(VOID)
- ./datagen -g6M -p100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID)
+ ./datagen -g6M -P100 | ./lz4 -9BD | ./lz4 -vdq > $(VOID)
./datagen -g17M | ./lz4 -9v | ./lz4 -vdq > $(VOID)
./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID)
./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID)
@@ -197,6 +197,7 @@ test-frametest32: frametest32
./frametest32
test-mem: lz4 datagen fuzzer frametest
+ valgrind --leak-check=yes ./datagen -g50M > /dev/null
./datagen -g16KB > tmp
valgrind --leak-check=yes ./lz4 -9 -BD -f tmp /dev/null
./datagen -g16MB > tmp
diff --git a/programs/datagen.c b/programs/datagen.c
index 0f07477..2a10b81 100644
--- a/programs/datagen.c
+++ b/programs/datagen.c
@@ -19,26 +19,20 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- - LZ4 source repository : http://code.google.com/p/lz4
- - LZ4 source mirror : https://github.com/Cyan4973/lz4
- - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+ - ZSTD source repository : https://github.com/Cyan4973/zstd
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
- Remove Visual warning messages
+* Includes
**************************************/
-#define _CRT_SECURE_NO_WARNINGS // fgets
+#include <stdlib.h> /* malloc */
+#include <stdio.h> /* FILE, fwrite */
+#include <string.h> /* memcpy */
/**************************************
- Includes
-**************************************/
-#include <stdio.h> // fgets, sscanf
-#include <string.h> // strcmp
-
-
-/**************************************
- Basic Types
+* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
# include <stdint.h>
@@ -57,230 +51,173 @@
/**************************************
- Constants
+* OS-specific Includes
**************************************/
-#ifndef LZ4_VERSION
-# define LZ4_VERSION "r125"
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+# include <fcntl.h> /* _O_BINARY */
+# include <io.h> /* _setmode, _isatty */
+# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+#else
+# define SET_BINARY_MODE(file)
#endif
-#define KB *(1 <<10)
-#define MB *(1 <<20)
-#define GB *(1U<<30)
-
-#define CDG_SIZE_DEFAULT (64 KB)
-#define CDG_SEED_DEFAULT 0
-#define CDG_COMPRESSIBILITY_DEFAULT 50
-#define PRIME1 2654435761U
-#define PRIME2 2246822519U
-
/**************************************
- Macros
+* Constants
**************************************/
-#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
-#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-
+#define KB *(1 <<10)
-/**************************************
- Local Parameters
-**************************************/
-static unsigned no_prompt = 0;
-static char* programName;
-static unsigned displayLevel = 2;
+#define PRIME1 2654435761U
+#define PRIME2 2246822519U
/*********************************************************
- functions
+* Local Functions
*********************************************************/
-
-#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-static unsigned int CDG_rand(U32* src)
+#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+static unsigned int RDG_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= PRIME1;
- rand32 += PRIME2;
- rand32 = CDG_rotl32(rand32, 13);
+ rand32 ^= PRIME2;
+ rand32 = RDG_rotl32(rand32, 13);
*src = rand32;
return rand32;
}
-#define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767)
-#define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 3) ? (CDG_rand(seed) % 14) : (CDG_rand(seed) & 511) + 15)
-#define CDG_RANDCHAR (((CDG_rand(seed) >> 9) & 63) + '0')
-static void CDG_generate(U64 size, U32* seed, double proba)
+#define LTSIZE 8192
+#define LTMASK (LTSIZE-1)
+static void* RDG_createLiteralDistrib(double ld)
{
- BYTE fullbuff[32 KB + 128 KB + 1];
- BYTE* buff = fullbuff + 32 KB;
- U64 total=0;
- U32 P32 = (U32)(32768 * proba);
- U32 pos=1;
- U32 genBlockSize = 128 KB;
+ BYTE* lt = malloc(LTSIZE);
+ U32 i = 0;
+ BYTE character = '0';
+ BYTE firstChar = '(';
+ BYTE lastChar = '}';
- // Build initial prefix
- fullbuff[0] = CDG_RANDCHAR;
- while (pos<32 KB)
+ if (ld==0.0)
{
- // Select : Literal (char) or Match (within 32K)
- if (CDG_RAND15BITS < P32)
+ character = 0;
+ firstChar = 0;
+ lastChar =255;
+ }
+ while (i<LTSIZE)
+ {
+ U32 weight = (U32)((double)(LTSIZE - i) * ld) + 1;
+ U32 end;
+ if (weight + i > LTSIZE) weight = LTSIZE-i;
+ end = i + weight;
+ while (i < end) lt[i++] = character;
+ character++;
+ if (character > lastChar) character = firstChar;
+ }
+ return lt;
+}
+
+static char RDG_genChar(U32* seed, const void* ltctx)
+{
+ const BYTE* lt = ltctx;
+ U32 id = RDG_rand(seed) & LTMASK;
+ return lt[id];
+}
+
+#define RDG_DICTSIZE (32 KB)
+#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767)
+#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
+void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
+{
+ BYTE* buffPtr = (BYTE*)buffer;
+ const U32 matchProba32 = (U32)(32768 * matchProba);
+ size_t pos = prefixSize;
+ void* ldctx = litTable;
+ U32* seed = seedPtr;
+
+ /* special case */
+ while (matchProba >= 1.0)
+ {
+ size_t size0 = RDG_rand(seed) & 3;
+ size0 = 1U << (16 + size0 * 2);
+ size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/
+ if (buffSize < pos + size0)
+ {
+ memset(buffPtr+pos, 0, buffSize-pos);
+ return;
+ }
+ memset(buffPtr+pos, 0, size0);
+ pos += size0;
+ buffPtr[pos-1] = RDG_genChar(seed, ldctx);
+ }
+
+ /* init */
+ if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
+
+ /* Generate compressible data */
+ while (pos < buffSize)
+ {
+ /* Select : Literal (char) or Match (within 32K) */
+ if (RDG_RAND15BITS < matchProba32)
{
- // Copy (within 64K)
+ /* Copy (within 32K) */
+ int match;
U32 d;
- int ref;
- int length = CDG_RANDLENGTH + 4;
- U32 offset = CDG_RAND15BITS + 1;
+ int length = RDG_RANDLENGTH + 4;
+ U32 offset = RDG_RAND15BITS + 1;
if (offset > pos) offset = pos;
- ref = pos - offset;
+ match = pos - offset;
d = pos + length;
- while (pos < d) fullbuff[pos++] = fullbuff[ref++];
+ if (d > buffSize) d = buffSize;
+ while (pos < d) buffPtr[pos++] = buffPtr[match++];
}
else
{
- // Literal (noise)
- U32 d = pos + CDG_RANDLENGTH;
- while (pos < d) fullbuff[pos++] = CDG_RANDCHAR;
- }
- }
-
- // Generate compressible data
- pos = 0;
- while (total < size)
- {
- if (size-total < 128 KB) genBlockSize = (U32)(size-total);
- total += genBlockSize;
- buff[genBlockSize] = 0;
- pos = 0;
- while (pos<genBlockSize)
- {
- // Select : Literal (char) or Match (within 32K)
- if (CDG_RAND15BITS < P32)
- {
- // Copy (within 64K)
- int ref;
- U32 d;
- int length = CDG_RANDLENGTH + 4;
- U32 offset = CDG_RAND15BITS + 1;
- if (pos + length > genBlockSize ) length = genBlockSize - pos;
- ref = pos - offset;
- d = pos + length;
- while (pos < d) buff[pos++] = buff[ref++];
- }
- else
- {
- // Literal (noise)
- U32 d;
- int length = CDG_RANDLENGTH;
- if (pos + length > genBlockSize) length = genBlockSize - pos;
- d = pos + length;
- while (pos < d) buff[pos++] = CDG_RANDCHAR;
- }
+ /* Literal (noise) */
+ size_t d;
+ size_t length = RDG_RANDLENGTH;
+ d = pos + length;
+ if (d > buffSize) d = buffSize;
+ while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
}
- // output datagen
- pos=0;
- for (;pos+512<=genBlockSize;pos+=512)
- printf("%512.512s", buff+pos);
- for (;pos<genBlockSize;pos++) printf("%c", buff[pos]);
- // Regenerate prefix
- memcpy(fullbuff, buff + 96 KB, 32 KB);
}
}
-int CDG_usage(void)
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
{
- DISPLAY( "Compressible data generator\n");
- DISPLAY( "Usage :\n");
- DISPLAY( " %s [size] [args]\n", programName);
- DISPLAY( "\n");
- DISPLAY( "Arguments :\n");
- DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
- DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
- DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
- DISPLAY( " -h : display help and exit\n");
- return 0;
+ void* ldctx;
+ if (litProba==0.0) litProba = matchProba / 4.5;
+ ldctx = RDG_createLiteralDistrib(litProba);
+ RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
+ free(ldctx);
}
-int main(int argc, char** argv)
+#define RDG_BLOCKSIZE (128 KB)
+void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
{
- int argNb;
- int proba = CDG_COMPRESSIBILITY_DEFAULT;
- U64 size = CDG_SIZE_DEFAULT;
- U32 seed = CDG_SEED_DEFAULT;
-
- // Check command line
- programName = argv[0];
- for(argNb=1; argNb<argc; argNb++)
- {
- char* argument = argv[argNb];
-
- if(!argument) continue; // Protection if argument empty
+ BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE];
+ U64 total = 0;
+ size_t genBlockSize = RDG_BLOCKSIZE;
+ void* ldctx;
- // Decode command (note : aggregated commands are allowed)
- if (*argument=='-')
- {
- if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
+ /* init */
+ if (litProba==0.0) litProba = matchProba / 4.5;
+ ldctx = RDG_createLiteralDistrib(litProba);
+ SET_BINARY_MODE(stdout);
- argument++;
- while (*argument!=0)
- {
- switch(*argument)
- {
- case 'h':
- return CDG_usage();
- case 'g':
- argument++;
- size=0;
- while ((*argument>='0') && (*argument<='9'))
- {
- size *= 10;
- size += *argument - '0';
- argument++;
- }
- if (*argument=='K') { size <<= 10; argument++; }
- if (*argument=='M') { size <<= 20; argument++; }
- if (*argument=='G') { size <<= 30; argument++; }
- if (*argument=='B') { argument++; }
- break;
- case 's':
- argument++;
- seed=0;
- while ((*argument>='0') && (*argument<='9'))
- {
- seed *= 10;
- seed += *argument - '0';
- argument++;
- }
- break;
- case 'p':
- argument++;
- proba=0;
- while ((*argument>='0') && (*argument<='9'))
- {
- proba *= 10;
- proba += *argument - '0';
- argument++;
- }
- if (proba<0) proba=0;
- if (proba>100) proba=100;
- break;
- case 'v':
- displayLevel = 4;
- argument++;
- break;
- default: ;
- }
- }
+ /* Generate dict */
+ RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
- }
+ /* Generate compressible data */
+ while (total < size)
+ {
+ RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
+ if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total);
+ total += genBlockSize;
+ fwrite(buff, 1, genBlockSize, stdout);
+ /* update dict */
+ memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE);
}
- // Get Seed
- DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION);
- DISPLAYLEVEL(3, "Seed = %u \n", seed);
- if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", proba);
-
- CDG_generate(size, &seed, ((double)proba) / 100);
-
- return 0;
+ free(ldctx);
}
diff --git a/programs/datagen.h b/programs/datagen.h
new file mode 100644
index 0000000..631d146
--- /dev/null
+++ b/programs/datagen.h
@@ -0,0 +1,40 @@
+/*
+ datagen.h - compressible data generator header
+ Copyright (C) Yann Collet 2012-2015
+
+ GPL v2 License
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+ You can contact the author at :
+ - ZSTD source repository : https://github.com/Cyan4973/zstd
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+
+#include <stddef.h> /* size_t */
+
+void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
+void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
+/* RDG_genOut
+ Generate 'size' bytes of compressible data into stdout.
+ Compressibility can be controlled using 'matchProba'.
+ 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used.
+ Generated data can be selected using 'seed'.
+ If (matchProba, litProba and seed) are equal, the function always generate the same content.
+
+ RDG_genBuffer
+ Same as RDG_genOut, but generate data into provided buffer
+*/
diff --git a/programs/datagencli.c b/programs/datagencli.c
new file mode 100644
index 0000000..801e198
--- /dev/null
+++ b/programs/datagencli.c
@@ -0,0 +1,190 @@
+/*
+ datagencli.c
+ compressible data command line generator
+ Copyright (C) Yann Collet 2012-2015
+
+ GPL v2 License
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+ You can contact the author at :
+ - ZSTD source repository : https://github.com/Cyan4973/zstd
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+/**************************************
+* Includes
+**************************************/
+#include <stdio.h> /* fprintf, stderr */
+#include "datagen.h" /* RDG_generate */
+
+
+/**************************************
+* Basic Types
+**************************************/
+#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+#else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+#endif
+
+
+/**************************************
+* Constants
+**************************************/
+#ifndef ZSTD_VERSION
+# define ZSTD_VERSION "r1"
+#endif
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define SIZE_DEFAULT (64 KB)
+#define SEED_DEFAULT 0
+#define COMPRESSIBILITY_DEFAULT 50
+
+
+/**************************************
+* Macros
+**************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
+static unsigned displayLevel = 2;
+
+
+/*********************************************************
+* Command line
+*********************************************************/
+static int usage(char* programName)
+{
+ DISPLAY( "Compressible data generator\n");
+ DISPLAY( "Usage :\n");
+ DISPLAY( " %s [size] [args]\n", programName);
+ DISPLAY( "\n");
+ DISPLAY( "Arguments :\n");
+ DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
+ DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
+ DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT);
+ DISPLAY( " -h : display help and exit\n");
+ return 0;
+}
+
+
+int main(int argc, char** argv)
+{
+ int argNb;
+ double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
+ double litProba = 0.0;
+ U64 size = SIZE_DEFAULT;
+ U32 seed = SEED_DEFAULT;
+ char* programName;
+
+ /* Check command line */
+ programName = argv[0];
+ for(argNb=1; argNb<argc; argNb++)
+ {
+ char* argument = argv[argNb];
+
+ if(!argument) continue; /* Protection if argument empty */
+
+ /* Handle commands. Aggregated commands are allowed */
+ if (*argument=='-')
+ {
+ argument++;
+ while (*argument!=0)
+ {
+ switch(*argument)
+ {
+ case 'h':
+ return usage(programName);
+ case 'g':
+ argument++;
+ size=0;
+ while ((*argument>='0') && (*argument<='9'))
+ {
+ size *= 10;
+ size += *argument - '0';
+ argument++;
+ }
+ if (*argument=='K') { size <<= 10; argument++; }
+ if (*argument=='M') { size <<= 20; argument++; }
+ if (*argument=='G') { size <<= 30; argument++; }
+ if (*argument=='B') { argument++; }
+ break;
+ case 's':
+ argument++;
+ seed=0;
+ while ((*argument>='0') && (*argument<='9'))
+ {
+ seed *= 10;
+ seed += *argument - '0';
+ argument++;
+ }
+ break;
+ case 'P':
+ argument++;
+ proba=0.0;
+ while ((*argument>='0') && (*argument<='9'))
+ {
+ proba *= 10;
+ proba += *argument - '0';
+ argument++;
+ }
+ if (proba>100.) proba=100.;
+ proba /= 100.;
+ break;
+ case 'L': /* hidden argument : Literal distribution probability */
+ argument++;
+ litProba=0.;
+ while ((*argument>='0') && (*argument<='9'))
+ {
+ litProba *= 10;
+ litProba += *argument - '0';
+ argument++;
+ }
+ if (litProba>100.) litProba=100.;
+ litProba /= 100.;
+ break;
+ case 'v':
+ displayLevel = 4;
+ argument++;
+ break;
+ default:
+ return usage(programName);
+ }
+ }
+
+ }
+ }
+
+ DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
+ DISPLAYLEVEL(3, "Seed = %u \n", seed);
+ if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
+
+ RDG_genOut(size, proba, litProba, seed);
+ DISPLAYLEVEL(1, "\n");
+
+ return 0;
+}