From 45a357fd1704e9c6d2d8037277bda62e8c86308e Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Fri, 13 Mar 2015 02:24:08 +0100 Subject: Improved sparse file support --- lib/lz4.c | 3 +-- lib/lz4.h | 5 +++-- lib/lz4frame.c | 4 ++-- lib/lz4frame.h | 3 +-- lib/lz4frame_static.h | 3 +-- lib/lz4hc.c | 7 ++++--- lib/lz4hc.h | 6 +++--- lib/xxhash.c | 3 +-- programs/Makefile | 12 ++++++------ programs/bench.c | 4 ++-- programs/bench.h | 4 ++-- programs/datagen.c | 8 ++++---- programs/lz4cli.c | 2 +- programs/lz4io.c | 42 +++++++++++++++++++++++++++++++++++------- programs/lz4io.h | 2 +- 15 files changed, 67 insertions(+), 41 deletions(-) mode change 100644 => 100755 programs/datagen.c diff --git a/lib/lz4.c b/lib/lz4.c index f21270a..a651843 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -27,8 +27,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4 - - LZ4 source mirror : https://github.com/Cyan4973/lz4 + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/lib/lz4.h b/lib/lz4.h index 7778caa..df1d839 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -1,7 +1,8 @@ /* LZ4 - Fast LZ compression algorithm Header File - Copyright (C) 2011-2014, Yann Collet. + Copyright (C) 2011-2015, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -28,7 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #pragma once diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 6feb5dc..7153871 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -1,6 +1,6 @@ /* LZ4 auto-framing library -Copyright (C) 2011-2014, Yann Collet. +Copyright (C) 2011-2015, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -28,7 +28,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : -- LZ4 source repository : http://code.google.com/p/lz4/ +- LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/lib/lz4frame.h b/lib/lz4frame.h index f52ed2f..61e461b 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -28,8 +28,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ - - LZ4 source mirror : https://github.com/Cyan4973/lz4 + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/lib/lz4frame_static.h b/lib/lz4frame_static.h index cde8186..4c34c6c 100644 --- a/lib/lz4frame_static.h +++ b/lib/lz4frame_static.h @@ -29,8 +29,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ - - LZ4 source mirror : https://github.com/Cyan4973/lz4 + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 5549969..357fa96 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -1,6 +1,7 @@ /* LZ4 HC - High Compression Mode of LZ4 -Copyright (C) 2011-2014, Yann Collet. +Copyright (C) 2011-2015, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -27,8 +28,8 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : -- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html -- LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/lib/lz4hc.h b/lib/lz4hc.h index ce813ab..eb72051 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -1,7 +1,7 @@ /* LZ4 HC - High Compression Mode of LZ4 Header File - Copyright (C) 2011-2014, Yann Collet. + Copyright (C) 2011-2015, Yann Collet. BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) Redistribution and use in source and binary forms, with or without @@ -28,8 +28,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #pragma once diff --git a/lib/xxhash.c b/lib/xxhash.c index 093564c..aca1e0a 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -28,8 +28,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. You can contact the author at : -- xxHash source repository : http://code.google.com/p/xxhash/ -- xxHash source mirror : https://github.com/Cyan4973/xxHash +- xxHash source repository : https://github.com/Cyan4973/xxHash - public discussion board : https://groups.google.com/forum/#!forum/lz4c */ diff --git a/programs/Makefile b/programs/Makefile index b9bb5b3..f6fbd68 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -146,8 +146,7 @@ test-lz4: lz4 datagen ./datagen -g17M | ./lz4 -9v | ./lz4 -dq > $(VOID) ./datagen -g256MB | ./lz4 -vqB4D | ./lz4 -vdq > $(VOID) ./datagen -g6GB | ./lz4 -vqB5D | ./lz4 -vdq > $(VOID) -# test frame concatenation with null-length frame - @echo *** test frame concatenation *** + @echo ---- test frame concatenation ---- @echo -n > empty.test @echo hi > nonempty.test cat nonempty.test empty.test nonempty.test > orig.test @@ -158,15 +157,16 @@ test-lz4: lz4 datagen sdiff orig.test result.test @rm *.test @echo frame concatenation test completed -# test frame concatenation with null-length frame - @echo *** test multiple input files *** + @echo ---- test multiple input files ---- @./datagen -s1 > file1 @./datagen -s2 > file2 @./datagen -s3 > file3 ./lz4 -f -m file1 file2 file3 + ls -l file* @rm file1 file2 file3 file1.lz4 file2.lz4 file3.lz4 - @echo *** test sparse file support *** - ./datagen -g50M -P100 | ./lz4 -B4 | ./lz4 -dvX > tmp + @echo ---- test sparse file support ---- + ./datagen -g50M -P100 | ./lz4 -B4D | ./lz4 -dvX > tmp + ./datagen -g50M -P100 | diff -s - tmp ls -ls tmp @rm tmp diff --git a/programs/bench.c b/programs/bench.c index 0ed7fcb..b632314 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -19,8 +19,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /************************************** diff --git a/programs/bench.h b/programs/bench.h index 3231727..c04fb17 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -17,8 +17,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ - - LZ4 public forum : https://group.google.com/forum/#!forum/lz4c + - LZ4 source repository : https://github.com/Cyan4973/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ #pragma once diff --git a/programs/datagen.c b/programs/datagen.c old mode 100644 new mode 100755 index 743691e..bccb21e --- a/programs/datagen.c +++ b/programs/datagen.c @@ -137,7 +137,7 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match while (matchProba >= 1.0) { size_t size0 = RDG_rand(seed) & 3; - size0 = 1U << (16 + size0 * 2); + size0 = (size_t)1 << (16 + size0 * 2); size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ if (buffSize < pos + size0) { @@ -159,11 +159,11 @@ void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double match if (RDG_RAND15BITS < matchProba32) { /* Copy (within 32K) */ - int match; - U32 d; + size_t match; + size_t d; int length = RDG_RANDLENGTH + 4; U32 offset = RDG_RAND15BITS + 1; - if (offset > pos) offset = pos; + if (offset > pos) offset = (U32)pos; match = pos - offset; d = pos + length; if (d > buffSize) d = buffSize; diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 7ecfa93..28b7fd6 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -19,7 +19,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* diff --git a/programs/lz4io.c b/programs/lz4io.c index 6e977f1..ebd270e 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -1,6 +1,7 @@ /* LZ4io.c - LZ4 File/Stream Interface Copyright (C) Yann Collet 2011-2015 + GPL v2 License This program is free software; you can redistribute it and/or modify @@ -18,7 +19,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* @@ -558,6 +559,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) LZ4F_decompressionContext_t ctx; LZ4F_errorCode_t errorCode; LZ4F_frameInfo_t frameInfo; + unsigned storedSkips = 0; /* init */ errorCode = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION); @@ -608,13 +610,39 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) int seekResult; for (checked=0; (checked < toCheckLength) && (sPtr[checked] == 0); checked++) ; skippedLength = checked * sizeof(size_t); - if (skippedLength == decodedBytes) skippedLength--; /* ensure 1 byte at least is written */ - seekResult = fseek(foutput, skippedLength, SEEK_CUR); - if (seekResult != 0) EXM_THROW(68, "Skip error (sparse file)\n"); - decodedBytes -= skippedLength; + storedSkips += (unsigned)skippedLength; + if (storedSkips > 2 GB) + { + seekResult = fseek(foutput, 2 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(68, "2 GB skip error (sparse file)\n"); + storedSkips -= 2 GB; + } + if (skippedLength != decodedBytes) + { + seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult != 0) EXM_THROW(68, "Skip error (sparse file)\n"); + storedSkips = 0; + decodedBytes -= skippedLength; + sizeCheck = fwrite(((char*)outBuff) + skippedLength, 1, decodedBytes, foutput); + if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); + } } - sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); - if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); + else + { + sizeCheck = fwrite(outBuff, 1, decodedBytes, foutput); + if (sizeCheck != decodedBytes) EXM_THROW(68, "Write error : cannot write decoded block\n"); + } + } + + if ((g_sparseFileSupport) && (storedSkips>0)) + { + int seekResult; + storedSkips --; + seekResult = fseek(foutput, storedSkips, SEEK_CUR); + if (seekResult != 0) EXM_THROW(69, "Skip error (sparse file)\n"); + memset(outBuff, 0, 1); + sizeCheck = fwrite(outBuff, 1, 1, foutput); + if (sizeCheck != 1) EXM_THROW(69, "Write error : cannot write decoded block\n"); } /* Free */ diff --git a/programs/lz4io.h b/programs/lz4io.h index 12c9a6a..2441174 100644 --- a/programs/lz4io.h +++ b/programs/lz4io.h @@ -18,7 +18,7 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. You can contact the author at : - - LZ4 source repository : http://code.google.com/p/lz4/ + - LZ4 source repository : https://github.com/Cyan4973/lz4 - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c */ /* -- cgit v0.12