diff options
author | Alexey Tourbin <alexey.tourbin@gmail.com> | 2018-04-23 05:37:44 (GMT) |
---|---|---|
committer | Alexey Tourbin <alexey.tourbin@gmail.com> | 2018-04-25 10:18:06 (GMT) |
commit | b4eda8d08f307dfc3cbd0d06baea4e6c581b70de (patch) | |
tree | 4f983e141b3c3651580272408d394161ad19f244 /lib | |
parent | 62d7cdcc741480842a0c217df7cb26ad3946ab32 (diff) | |
download | lz4-b4eda8d08f307dfc3cbd0d06baea4e6c581b70de.zip lz4-b4eda8d08f307dfc3cbd0d06baea4e6c581b70de.tar.gz lz4-b4eda8d08f307dfc3cbd0d06baea4e6c581b70de.tar.bz2 |
lz4.c: refactor the decoding routines
I noticed that LZ4_decompress_generic is sometimes instantiated with
identical set of parameters, or (what's worse) with a subtly different
sets of parameters. For example, LZ4_decompress_fast_withPrefix64k is
instantiated as follows:
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize,
full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
while the equivalent withPrefix64k call in LZ4_decompress_usingDict_generic
passes 0 for the last argument instead of 64 KB. It turns out that there
is no difference in this case: if you change 64 KB to 0 KB in
LZ4_decompress_fast_withPrefix64k, you get the same binary code.
Moreover, because it's been clarified that LZ4_decompress_fast doesn't
check match offsets, it is now obvious that both of these fast/withPrefix64k
instantiations are simply redundant. Exactly because LZ4_decompress_fast
doesn't check offsets, it serves well with any prefixed dictionary.
There's a difference, though, with LZ4_decompress_safe_withPrefix64k.
It also passes 64 KB as the last argument, and if you change that to 0,
as in LZ4_decompress_usingDict_generic, you get a completely different
binary code. It seems that passing 0 enables offset checking:
const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB)));
However, the resulting code seems to run a bit faster. How come
enabling extra checks can make the code run faster? Curiouser and
curiouser! This needs extra study. Currently I take the view that
the dictSize should be set to non-zero when nothing else will do,
i.e. when passing the external dictionary via dictStart. Otherwise,
lowPrefix betrays just enough information about the dictionary.
* * *
Anyway, with this change, I instantiate all the necessary cases as
functions with distinctive names, which also take fewer arguments and
are therefore less error-prone. I also make the functions non-inline.
(The compiler won't inline the functions because they are used more than
once. Hence I attach LZ4_FORCE_O2_GCC_PPC64LE to the instances while
removing from the callers.) The number of instances is now is reduced
from 18 (safe+fast+partial+4*continue+4*prefix+4*dict+2*prefix64+forceExtDict)
down to 7 (safe+fast+partial+2*prefix+2*dict). The size of the code is
not the only issue here. Separate helper function are much more
amenable to profile-guided optimization: it is enough to profile only
a few basic functions, while the other less-often used functions, such
as LZ4_decompress_*_continue, will benefit automatically.
This is the list of LZ4_decompress* functions in liblz4.so, sorted by size.
Exported functions are marked with a capital T.
$ nm -S lib/liblz4.so |grep -wi T |grep LZ4_decompress |sort -k2
0000000000016260 0000000000000005 T LZ4_decompress_fast_withPrefix64k
0000000000016dc0 0000000000000025 T LZ4_decompress_fast_usingDict
0000000000016d80 0000000000000040 T LZ4_decompress_safe_usingDict
0000000000016d10 000000000000006b T LZ4_decompress_fast_continue
0000000000016c70 000000000000009f T LZ4_decompress_safe_continue
00000000000156c0 000000000000059c T LZ4_decompress_fast
0000000000014a90 00000000000005fa T LZ4_decompress_safe
0000000000015c60 00000000000005fa T LZ4_decompress_safe_withPrefix64k
0000000000002280 00000000000005fa t LZ4_decompress_safe_withSmallPrefix
0000000000015090 000000000000062f T LZ4_decompress_safe_partial
0000000000002880 00000000000008ea t LZ4_decompress_fast_extDict
0000000000016270 0000000000000993 t LZ4_decompress_safe_forceExtDict
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lz4.c | 132 |
1 files changed, 79 insertions, 53 deletions
@@ -92,6 +92,7 @@ * Dependency **************************************/ #define LZ4_STATIC_LINKING_ONLY +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ #include "lz4.h" /* see also "memory routines" below */ @@ -1666,6 +1667,8 @@ _output_error: } +/*===== Instantiate the API decoding functions. =====*/ + LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { @@ -1687,9 +1690,63 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, - (BYTE*)(dest - 64 KB), NULL, 64 KB); + (BYTE*)dest - 64 KB, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, full, 0, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + /* LZ4_decompress_fast doesn't validate match offsets, + * and thus serves well with any prefixed dictionary. */ + return LZ4_decompress_fast(source, dest, originalSize); +} + +LZ4_FORCE_O2_GCC_PPC64LE +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, full, 0, noDict, + (BYTE*)dest-dictSize, NULL, 0); } +LZ4_FORCE_INLINE +int LZ4_decompress_safe_withPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t dictSize) +{ + if (dictSize >= 64 KB - 1) + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize); +} + +LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */ +#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict +int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + const char* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + endOnInputSize, full, 0, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2_GCC_PPC64LE +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const char* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, 0, originalSize, + endOnOutputSize, full, 0, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} /*===== streaming decompression functions =====*/ @@ -1730,25 +1787,26 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ -LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE*)dest) { - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (lz4sd->prefixSize == 0) { + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + result = LZ4_decompress_safe_withPrefix(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize); if (result <= 0) return result; lz4sd->prefixSize += result; lz4sd->prefixEnd += result; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, + (const char*)lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; lz4sd->prefixEnd = (BYTE*)dest + result; @@ -1757,25 +1815,21 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch return result; } -LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE*)dest) { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (lz4sd->prefixSize == 0 || lz4sd->prefixEnd == (BYTE*)dest) { + result = LZ4_decompress_fast(source, dest, originalSize); if (result <= 0) return result; lz4sd->prefixSize += originalSize; lz4sd->prefixEnd += originalSize; } else { lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize); + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + (const char*)lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = originalSize; lz4sd->prefixEnd = (BYTE*)dest + originalSize; @@ -1792,36 +1846,20 @@ Advanced decoding functions : the dictionary must be explicitly provided within parameters */ -LZ4_FORCE_O2_GCC_PPC64LE -LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize) -{ - if (dictSize==0) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0); - if (dictStart+dictSize == dest) { - if (dictSize >= (int)(64 KB - 1)) - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0); - } - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); -} - -LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) { - return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize); + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) + return LZ4_decompress_safe_withPrefix(source, dest, compressedSize, maxOutputSize, dictSize); + return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize); } -LZ4_FORCE_O2_GCC_PPC64LE int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) { - return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize); -} - -/* debug function */ -LZ4_FORCE_O2_GCC_PPC64LE -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize); + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize); } @@ -1892,16 +1930,4 @@ char* LZ4_slideInputBuffer (void* state) return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; } -/* Obsolete streaming decompression functions */ - -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) -{ - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) -{ - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); -} - #endif /* LZ4_COMMONDEFS_ONLY */ |