lz4.c: refactor the decoding routines
I noticed that LZ4_decompress_generic is sometimes instantiated with identical set of parameters, or (what's worse) with a subtly different sets of parameters. For example, LZ4_decompress_fast_withPrefix64k is instantiated as follows: return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB); while the equivalent withPrefix64k call in LZ4_decompress_usingDict_generic passes 0 for the last argument instead of 64 KB. It turns out that there is no difference in this case: if you change 64 KB to 0 KB in LZ4_decompress_fast_withPrefix64k, you get the same binary code. Moreover, because it's been clarified that LZ4_decompress_fast doesn't check match offsets, it is now obvious that both of these fast/withPrefix64k instantiations are simply redundant. Exactly because LZ4_decompress_fast doesn't check offsets, it serves well with any prefixed dictionary. There's a difference, though, with LZ4_decompress_safe_withPrefix64k. It also passes 64 KB as the last argument, and if you change that to 0, as in LZ4_decompress_usingDict_generic, you get a completely different binary code. It seems that passing 0 enables offset checking: const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); However, the resulting code seems to run a bit faster. How come enabling extra checks can make the code run faster? Curiouser and curiouser! This needs extra study. Currently I take the view that the dictSize should be set to non-zero when nothing else will do, i.e. when passing the external dictionary via dictStart. Otherwise, lowPrefix betrays just enough information about the dictionary. * * * Anyway, with this change, I instantiate all the necessary cases as functions with distinctive names, which also take fewer arguments and are therefore less error-prone. I also make the functions non-inline. (The compiler won't inline the functions because they are used more than once. Hence I attach LZ4_FORCE_O2_GCC_PPC64LE to the instances while removing from the callers.) The number of instances is now is reduced from 18 (safe+fast+partial+4*continue+4*prefix+4*dict+2*prefix64+forceExtDict) down to 7 (safe+fast+partial+2*prefix+2*dict). The size of the code is not the only issue here. Separate helper function are much more amenable to profile-guided optimization: it is enough to profile only a few basic functions, while the other less-often used functions, such as LZ4_decompress_*_continue, will benefit automatically. This is the list of LZ4_decompress* functions in liblz4.so, sorted by size. Exported functions are marked with a capital T. $ nm -S lib/liblz4.so |grep -wi T |grep LZ4_decompress |sort -k2 0000000000016260 0000000000000005 T LZ4_decompress_fast_withPrefix64k 0000000000016dc0 0000000000000025 T LZ4_decompress_fast_usingDict 0000000000016d80 0000000000000040 T LZ4_decompress_safe_usingDict 0000000000016d10 000000000000006b T LZ4_decompress_fast_continue 0000000000016c70 000000000000009f T LZ4_decompress_safe_continue 00000000000156c0 000000000000059c T LZ4_decompress_fast 0000000000014a90 00000000000005fa T LZ4_decompress_safe 0000000000015c60 00000000000005fa T LZ4_decompress_safe_withPrefix64k 0000000000002280 00000000000005fa t LZ4_decompress_safe_withSmallPrefix 0000000000015090 000000000000062f T LZ4_decompress_safe_partial 0000000000002880 00000000000008ea t LZ4_decompress_fast_extDict 0000000000016270 0000000000000993 t LZ4_decompress_safe_forceExtDict
This commit is contained in:
parent
62d7cdcc74
commit
b4eda8d08f
132
lib/lz4.c
132
lib/lz4.c
@ -92,6 +92,7 @@
|
||||
* Dependency
|
||||
**************************************/
|
||||
#define LZ4_STATIC_LINKING_ONLY
|
||||
#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
|
||||
#include "lz4.h"
|
||||
/* see also "memory routines" below */
|
||||
|
||||
@ -1666,6 +1667,8 @@ _output_error:
|
||||
}
|
||||
|
||||
|
||||
/*===== Instantiate the API decoding functions. =====*/
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
|
||||
{
|
||||
@ -1687,9 +1690,63 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, 0, originalSize,
|
||||
endOnOutputSize, full, 0, withPrefix64k,
|
||||
(BYTE*)(dest - 64 KB), NULL, 64 KB);
|
||||
(BYTE*)dest - 64 KB, NULL, 0);
|
||||
}
|
||||
|
||||
/*===== Instantiate a few more decoding cases, used more than once. =====*/
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
|
||||
int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
|
||||
endOnInputSize, full, 0, withPrefix64k,
|
||||
(BYTE*)dest - 64 KB, NULL, 0);
|
||||
}
|
||||
|
||||
/* Another obsolete API function, paired with the previous one. */
|
||||
int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
|
||||
{
|
||||
/* LZ4_decompress_fast doesn't validate match offsets,
|
||||
* and thus serves well with any prefixed dictionary. */
|
||||
return LZ4_decompress_fast(source, dest, originalSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
|
||||
size_t dictSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
|
||||
endOnInputSize, full, 0, noDict,
|
||||
(BYTE*)dest-dictSize, NULL, 0);
|
||||
}
|
||||
|
||||
LZ4_FORCE_INLINE
|
||||
int LZ4_decompress_safe_withPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
|
||||
size_t dictSize)
|
||||
{
|
||||
if (dictSize >= 64 KB - 1)
|
||||
return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
|
||||
return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE /* Exported under another name, for tests/fullbench.c */
|
||||
#define LZ4_decompress_safe_extDict LZ4_decompress_safe_forceExtDict
|
||||
int LZ4_decompress_safe_extDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
|
||||
const char* dictStart, size_t dictSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
|
||||
endOnInputSize, full, 0, usingExtDict,
|
||||
(BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
|
||||
const char* dictStart, size_t dictSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, 0, originalSize,
|
||||
endOnOutputSize, full, 0, usingExtDict,
|
||||
(BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
}
|
||||
|
||||
/*===== streaming decompression functions =====*/
|
||||
|
||||
@ -1730,25 +1787,26 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
|
||||
If it's not possible, save the relevant part of decoded data into a safe buffer,
|
||||
and indicate where it stands using LZ4_setStreamDecode()
|
||||
*/
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
|
||||
{
|
||||
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
|
||||
int result;
|
||||
|
||||
if (lz4sd->prefixEnd == (BYTE*)dest) {
|
||||
result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
|
||||
endOnInputSize, full, 0,
|
||||
usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
|
||||
if (lz4sd->prefixSize == 0) {
|
||||
result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
|
||||
if (result <= 0) return result;
|
||||
lz4sd->prefixSize = result;
|
||||
lz4sd->prefixEnd = (BYTE*)dest + result;
|
||||
} else if (lz4sd->prefixEnd == (BYTE*)dest) {
|
||||
result = LZ4_decompress_safe_withPrefix(source, dest, compressedSize, maxOutputSize, lz4sd->prefixSize);
|
||||
if (result <= 0) return result;
|
||||
lz4sd->prefixSize += result;
|
||||
lz4sd->prefixEnd += result;
|
||||
} else {
|
||||
lz4sd->extDictSize = lz4sd->prefixSize;
|
||||
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
|
||||
result = LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
|
||||
endOnInputSize, full, 0,
|
||||
usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
|
||||
result = LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize,
|
||||
(const char*)lz4sd->externalDict, lz4sd->extDictSize);
|
||||
if (result <= 0) return result;
|
||||
lz4sd->prefixSize = result;
|
||||
lz4sd->prefixEnd = (BYTE*)dest + result;
|
||||
@ -1757,25 +1815,21 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
|
||||
return result;
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
|
||||
{
|
||||
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
|
||||
int result;
|
||||
|
||||
if (lz4sd->prefixEnd == (BYTE*)dest) {
|
||||
result = LZ4_decompress_generic(source, dest, 0, originalSize,
|
||||
endOnOutputSize, full, 0,
|
||||
usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
|
||||
if (lz4sd->prefixSize == 0 || lz4sd->prefixEnd == (BYTE*)dest) {
|
||||
result = LZ4_decompress_fast(source, dest, originalSize);
|
||||
if (result <= 0) return result;
|
||||
lz4sd->prefixSize += originalSize;
|
||||
lz4sd->prefixEnd += originalSize;
|
||||
} else {
|
||||
lz4sd->extDictSize = lz4sd->prefixSize;
|
||||
lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
|
||||
result = LZ4_decompress_generic(source, dest, 0, originalSize,
|
||||
endOnOutputSize, full, 0,
|
||||
usingExtDict, (BYTE*)dest, lz4sd->externalDict, lz4sd->extDictSize);
|
||||
result = LZ4_decompress_fast_extDict(source, dest, originalSize,
|
||||
(const char*)lz4sd->externalDict, lz4sd->extDictSize);
|
||||
if (result <= 0) return result;
|
||||
lz4sd->prefixSize = originalSize;
|
||||
lz4sd->prefixEnd = (BYTE*)dest + originalSize;
|
||||
@ -1792,36 +1846,20 @@ Advanced decoding functions :
|
||||
the dictionary must be explicitly provided within parameters
|
||||
*/
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
|
||||
{
|
||||
if (dictSize==0)
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest, NULL, 0);
|
||||
if (dictStart+dictSize == dest) {
|
||||
if (dictSize >= (int)(64 KB - 1))
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, withPrefix64k, (BYTE*)dest-64 KB, NULL, 0);
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, noDict, (BYTE*)dest-dictSize, NULL, 0);
|
||||
}
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
|
||||
if (dictSize==0)
|
||||
return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
|
||||
if (dictStart+dictSize == dest)
|
||||
return LZ4_decompress_safe_withPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
|
||||
return LZ4_decompress_safe_extDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
|
||||
}
|
||||
|
||||
/* debug function */
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
if (dictSize==0 || dictStart+dictSize == dest)
|
||||
return LZ4_decompress_fast(source, dest, originalSize);
|
||||
return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
|
||||
}
|
||||
|
||||
|
||||
@ -1892,16 +1930,4 @@ char* LZ4_slideInputBuffer (void* state)
|
||||
return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
|
||||
}
|
||||
|
||||
/* Obsolete streaming decompression functions */
|
||||
|
||||
int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
|
||||
}
|
||||
|
||||
int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)dest - 64 KB, NULL, 64 KB);
|
||||
}
|
||||
|
||||
#endif /* LZ4_COMMONDEFS_ONLY */
|
||||
|
Loading…
Reference in New Issue
Block a user