summaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
authorNick Terrell <terrelln@fb.com>2016-11-10 00:20:47 (GMT)
committerNick Terrell <terrelln@fb.com>2016-11-10 01:39:56 (GMT)
commit94917c9a04ce08fcdb6b465b4aff38d2d82053aa (patch)
tree4d762b18e40590e001f0579726160c6bb9367e96 /examples
parentbd88e4007b7e3eddd58e2c76c39b5bb650b5cb20 (diff)
downloadlz4-94917c9a04ce08fcdb6b465b4aff38d2d82053aa.zip
lz4-94917c9a04ce08fcdb6b465b4aff38d2d82053aa.tar.gz
lz4-94917c9a04ce08fcdb6b465b4aff38d2d82053aa.tar.bz2
Add dictionary random access example
Diffstat (limited to 'examples')
-rw-r--r--examples/.gitignore1
-rw-r--r--examples/Makefile16
-rw-r--r--examples/README.md1
-rw-r--r--examples/dictionaryRandomAccess.c280
-rw-r--r--examples/dictionaryRandomAccess.md67
5 files changed, 359 insertions, 6 deletions
diff --git a/examples/.gitignore b/examples/.gitignore
index 4893866..3ceb90d 100644
--- a/examples/.gitignore
+++ b/examples/.gitignore
@@ -1,6 +1,7 @@
/Makefile.lz4*
/printVersion
/doubleBuffer
+/dictionaryRandomAccess
/ringBuffer
/ringBufferHC
/lineCompress
diff --git a/examples/Makefile b/examples/Makefile
index c8caf24..aad713b 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -48,7 +48,7 @@ endif
default: all
-all: printVersion doubleBuffer ringBuffer ringBufferHC lineCompress frameCompress
+all: printVersion doubleBuffer dictionaryRandomAccess ringBuffer ringBufferHC lineCompress frameCompress
printVersion: $(LZ4DIR)/lz4.c printVersion.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
@@ -56,6 +56,9 @@ printVersion: $(LZ4DIR)/lz4.c printVersion.c
doubleBuffer: $(LZ4DIR)/lz4.c blockStreaming_doubleBuffer.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
+dictionaryRandomAccess: $(LZ4DIR)/lz4.c dictionaryRandomAccess.c
+ $(CC) $(FLAGS) $^ -o $@$(EXT)
+
ringBuffer : $(LZ4DIR)/lz4.c blockStreaming_ringBuffer.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
@@ -66,7 +69,7 @@ lineCompress: $(LZ4DIR)/lz4.c blockStreaming_lineByLine.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
frameCompress: frameCompress.c
- $(CC) $(FLAGS) $^ -o $@$(EXT) -L$(LZ4DIR) -llz4
+ $(CC) $(FLAGS) $^ -o $@$(EXT) $(LZ4DIR)/liblz4.a
compressFunctions: $(LZ4DIR)/lz4.c compress_functions.c
$(CC) $(FLAGS) $^ -o $@$(EXT) -lrt
@@ -77,15 +80,16 @@ simpleBuffer: $(LZ4DIR)/lz4.c simple_buffer.c
test : all
./printVersion$(EXT)
./doubleBuffer$(EXT) $(TESTFILE)
+ ./dictionaryRandomAccess$(EXT) $(TESTFILE) $(TESTFILE) 1100 1400
./ringBuffer$(EXT) $(TESTFILE)
./ringBufferHC$(EXT) $(TESTFILE)
./lineCompress$(EXT) $(TESTFILE)
- LD_LIBRARY_PATH=$(LZ4DIR) ./frameCompress$(EXT) $(TESTFILE)
+ ./frameCompress$(EXT) $(TESTFILE)
$(LZ4) -vt $(TESTFILE).lz4
clean:
@rm -f core *.o *.dec *-0 *-9 *-8192 *.lz4s *.lz4 \
- printVersion$(EXT) doubleBuffer$(EXT) ringBuffer$(EXT) ringBufferHC$(EXT) \
- lineCompress$(EXT) frameCompress$(EXT) compressFunctions$(EXT) simpleBuffer$(EXT)
+ printVersion$(EXT) doubleBuffer$(EXT) dictionaryRandomAccess$(EXT) \
+ ringBuffer$(EXT) ringBufferHC$(EXT) lineCompress$(EXT) frameCompress$(EXT) \
+ compressFunctions$(EXT) simpleBuffer$(EXT)
@echo Cleaning completed
-
diff --git a/examples/README.md b/examples/README.md
index 74527d4..e6839e9 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -8,3 +8,4 @@ All examples are GPL-v2 licensed.
- Examples
- [Double Buffer](blockStreaming_doubleBuffer.md)
- [Line by Line Text Compression](blockStreaming_lineByLine.md)
+ - [Dictionary Random Access](dictionaryRandomAccess.md)
diff --git a/examples/dictionaryRandomAccess.c b/examples/dictionaryRandomAccess.c
new file mode 100644
index 0000000..6acf99b
--- /dev/null
+++ b/examples/dictionaryRandomAccess.c
@@ -0,0 +1,280 @@
+// LZ4 API example : Dictionary Random Access
+
+#ifdef _MSC_VER /* Visual Studio */
+# define _CRT_SECURE_NO_WARNINGS
+# define snprintf sprintf_s
+#endif
+#include "lz4.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define MIN(x, y) (x) < (y) ? (x) : (y)
+
+enum {
+ BLOCK_BYTES = 1024, /* 1 KiB of uncompressed data in a block */
+ DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */
+ MAX_BLOCKS = 1024 /* For simplicity of implementation */
+};
+
+/**
+ * Magic bytes for this test case.
+ * This is not a great magic number because it is a common word in ASCII.
+ * However, it is important to have some versioning system in your format.
+ */
+const char kTestMagic[] = { 'T', 'E', 'S', 'T' };
+
+
+void write_int(FILE* fp, int i) {
+ size_t written = fwrite(&i, sizeof(i), 1, fp);
+ if (written != 1) { exit(10); }
+}
+
+void write_bin(FILE* fp, const void* array, size_t arrayBytes) {
+ size_t written = fwrite(array, 1, arrayBytes, fp);
+ if (written != arrayBytes) { exit(11); }
+}
+
+void read_int(FILE* fp, int* i) {
+ size_t read = fread(i, sizeof(*i), 1, fp);
+ if (read != 1) { exit(12); }
+}
+
+size_t read_bin(FILE* fp, void* array, size_t arrayBytes) {
+ size_t read = fread(array, 1, arrayBytes, fp);
+ if (ferror(fp)) { exit(12); }
+ return read;
+}
+
+void seek_bin(FILE* fp, long offset, int origin) {
+ if (fseek(fp, offset, origin)) { exit(14); }
+}
+
+
+void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize)
+{
+ LZ4_stream_t lz4Stream_body;
+ LZ4_stream_t* lz4Stream = &lz4Stream_body;
+
+ char inpBuf[BLOCK_BYTES];
+ int offsets[MAX_BLOCKS];
+ int *offsetsEnd = offsets;
+
+
+ LZ4_resetStream(lz4Stream);
+
+ /* Write header magic */
+ write_bin(outFp, kTestMagic, sizeof(kTestMagic));
+
+ *offsetsEnd++ = sizeof(kTestMagic);
+ /* Write compressed data blocks. Each block contains BLOCK_BYTES of plain
+ data except possibly the last. */
+ for(;;) {
+ const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES);
+ if(0 == inpBytes) {
+ break;
+ }
+
+ /* Forget previously compressed data and load the dictionary */
+ LZ4_loadDict(lz4Stream, dict, dictSize);
+ {
+ char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
+ const int cmpBytes = LZ4_compress_fast_continue(
+ lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1);
+ if(cmpBytes <= 0) { exit(1); }
+ write_bin(outFp, cmpBuf, (size_t)cmpBytes);
+ /* Keep track of the offsets */
+ *offsetsEnd = *(offsetsEnd - 1) + cmpBytes;
+ ++offsetsEnd;
+ }
+ if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); }
+ }
+ /* Write the tailing jump table */
+ {
+ int *ptr = offsets;
+ while (ptr != offsetsEnd) {
+ write_int(outFp, *ptr++);
+ }
+ write_int(outFp, offsetsEnd - offsets);
+ }
+}
+
+
+void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length)
+{
+ LZ4_streamDecode_t lz4StreamDecode_body;
+ LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;
+
+ /* The blocks [currentBlock, endBlock) contain the data we want */
+ int currentBlock = offset / BLOCK_BYTES;
+ int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1;
+
+ char decBuf[BLOCK_BYTES];
+ int offsets[MAX_BLOCKS];
+
+ /* Special cases */
+ if (length == 0) { return; }
+
+ /* Read the magic bytes */
+ {
+ char magic[sizeof(kTestMagic)];
+ size_t read = read_bin(inpFp, magic, sizeof(magic));
+ if (read != sizeof(magic)) { exit(1); }
+ if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); }
+ }
+
+ /* Read the offsets tail */
+ {
+ int numOffsets;
+ int block;
+ int *offsetsPtr = offsets;
+ seek_bin(inpFp, -4, SEEK_END);
+ read_int(inpFp, &numOffsets);
+ if (numOffsets <= endBlock) { exit(3); }
+ seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END);
+ for (block = 0; block <= endBlock; ++block) {
+ read_int(inpFp, offsetsPtr++);
+ }
+ }
+ /* Seek to the first block to read */
+ seek_bin(inpFp, offsets[currentBlock], SEEK_SET);
+ offset = offset % BLOCK_BYTES;
+
+ /* Start decoding */
+ for(; currentBlock < endBlock; ++currentBlock) {
+ char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
+ /* The difference in offsets is the size of the block */
+ int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock];
+ {
+ const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes);
+ if(read != (size_t)cmpBytes) { exit(4); }
+ }
+
+ /* Load the dictionary */
+ LZ4_setStreamDecode(lz4StreamDecode, dict, dictSize);
+ {
+ const int decBytes = LZ4_decompress_safe_continue(
+ lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES);
+ if(decBytes <= 0) { exit(5); }
+ {
+ /* Write out the part of the data we care about */
+ int blockLength = MIN(length, (decBytes - offset));
+ write_bin(outFp, decBuf + offset, (size_t)blockLength);
+ offset = 0;
+ length -= blockLength;
+ }
+ }
+ }
+}
+
+
+int compare(FILE* fp0, FILE* fp1, int length)
+{
+ int result = 0;
+
+ while(0 == result) {
+ char b0[4096];
+ char b1[4096];
+ const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0)));
+ const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1)));
+
+ result = (int) r0 - (int) r1;
+
+ if(0 == r0 || 0 == r1) {
+ break;
+ }
+ if(0 == result) {
+ result = memcmp(b0, b1, r0);
+ }
+ length -= r0;
+ }
+
+ return result;
+}
+
+
+int main(int argc, char* argv[])
+{
+ char inpFilename[256] = { 0 };
+ char lz4Filename[256] = { 0 };
+ char decFilename[256] = { 0 };
+ char dictFilename[256] = { 0 };
+ int offset;
+ int length;
+ char dict[DICTIONARY_BYTES];
+ int dictSize;
+
+ if(argc < 5) {
+ printf("Usage: %s input dictionary offset length", argv[0]);
+ return 0;
+ }
+
+ snprintf(inpFilename, 256, "%s", argv[1]);
+ snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES);
+ snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES);
+ snprintf(dictFilename, 256, "%s", argv[2]);
+ offset = atoi(argv[3]);
+ length = atoi(argv[4]);
+
+ printf("inp = [%s]\n", inpFilename);
+ printf("lz4 = [%s]\n", lz4Filename);
+ printf("dec = [%s]\n", decFilename);
+ printf("dict = [%s]\n", dictFilename);
+ printf("offset = [%d]\n", offset);
+ printf("length = [%d]\n", length);
+
+ /* Load dictionary */
+ {
+ FILE* dictFp = fopen(dictFilename, "rb");
+ dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES);
+ fclose(dictFp);
+ }
+
+ /* compress */
+ {
+ FILE* inpFp = fopen(inpFilename, "rb");
+ FILE* outFp = fopen(lz4Filename, "wb");
+
+ printf("compress : %s -> %s\n", inpFilename, lz4Filename);
+ test_compress(outFp, inpFp, dict, dictSize);
+ printf("compress : done\n");
+
+ fclose(outFp);
+ fclose(inpFp);
+ }
+
+ /* decompress */
+ {
+ FILE* inpFp = fopen(lz4Filename, "rb");
+ FILE* outFp = fopen(decFilename, "wb");
+
+ printf("decompress : %s -> %s\n", lz4Filename, decFilename);
+ test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length);
+ printf("decompress : done\n");
+
+ fclose(outFp);
+ fclose(inpFp);
+ }
+
+ /* verify */
+ {
+ FILE* inpFp = fopen(inpFilename, "rb");
+ FILE* decFp = fopen(decFilename, "rb");
+ seek_bin(inpFp, offset, SEEK_SET);
+
+ printf("verify : %s <-> %s\n", inpFilename, decFilename);
+ const int cmp = compare(inpFp, decFp, length);
+ if(0 == cmp) {
+ printf("verify : OK\n");
+ } else {
+ printf("verify : NG\n");
+ }
+
+ fclose(decFp);
+ fclose(inpFp);
+ }
+
+ return 0;
+}
diff --git a/examples/dictionaryRandomAccess.md b/examples/dictionaryRandomAccess.md
new file mode 100644
index 0000000..53d825d
--- /dev/null
+++ b/examples/dictionaryRandomAccess.md
@@ -0,0 +1,67 @@
+# LZ4 API Example : Dictionary Random Access
+
+`dictionaryRandomAccess.c` is LZ4 API example which implements dictionary compression and random access decompression.
+
+Please note that the output file is not compatible with lz4frame and is platform dependent.
+
+
+## What's the point of this example ?
+
+ - Dictionary based compression for homogenous files.
+ - Random access to compressed blocks.
+
+
+## How the compression works
+
+Reads the dictionary from a file, and uses it as the history for each block.
+This allows each block to be independent, but maintains compression ratio.
+
+```
+ Dictionary
+ +
+ |
+ v
+ +---------+
+ | Block#1 |
+ +----+----+
+ |
+ v
+ {Out#1}
+
+
+ Dictionary
+ +
+ |
+ v
+ +---------+
+ | Block#2 |
+ +----+----+
+ |
+ v
+ {Out#2}
+```
+
+After writing the magic bytes `TEST` and then the compressed blocks, write out the jump table.
+The last 4 bytes is an integer containing the number of blocks in the stream.
+If there are `N` blocks, then just before the last 4 bytes is `N + 1` 4 byte integers containing the offsets at the beginning and end of each block.
+Let `Offset#K` be the total number of bytes written after writing out `Block#K` *including* the magic bytes for simplicity.
+
+```
++------+---------+ +---------+---+----------+ +----------+-----+
+| TEST | Block#1 | ... | Block#N | 4 | Offset#1 | ... | Offset#N | N+1 |
++------+---------+ +---------+---+----------+ +----------+-----+
+```
+
+## How the decompression works
+
+Decompression will do reverse order.
+
+ - Seek to the last 4 bytes of the file and read the number of offsets.
+ - Read each offset into an array.
+ - Seek to the first block containing data we want to read.
+ We know where to look because we know each block contains a fixed amount of uncompressed data, except possibly the last.
+ - Decompress it and write what data we need from it to the file.
+ - Read the next block.
+ - Decompress it and write that page to the file.
+
+Continue these procedure until all the required data has been read.