Merge branch 'dev' of github.com:Cyan4973/lz4 into dev
This commit is contained in:
commit
1aa997b594
35
lib/lz4.c
35
lib/lz4.c
@ -117,6 +117,28 @@
|
||||
# endif /* _MSC_VER */
|
||||
#endif /* LZ4_FORCE_INLINE */
|
||||
|
||||
/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
|
||||
* Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
|
||||
* together with a simple 8-byte copy loop as a fall-back path.
|
||||
* However, this optimization hurts the decompression speed by >30%,
|
||||
* because the execution does not go to the optimized loop
|
||||
* for typical compressible data, and all of the preamble checks
|
||||
* before going to the fall-back path become useless overhead.
|
||||
* This optimization happens only with the -O3 flag, and -O2 generates
|
||||
* a simple 8-byte copy loop.
|
||||
* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
|
||||
* functions are annotated with __attribute__((optimize("O2"))),
|
||||
* and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
|
||||
* of LZ4_wildCopy does not affect the compression speed.
|
||||
*/
|
||||
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
|
||||
# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
|
||||
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
|
||||
#else
|
||||
# define LZ4_FORCE_O2_GCC_PPC64LE
|
||||
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
|
||||
#endif
|
||||
|
||||
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
|
||||
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
|
||||
#else
|
||||
@ -253,7 +275,8 @@ static void LZ4_copy8(void* dst, const void* src)
|
||||
}
|
||||
|
||||
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
|
||||
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
|
||||
void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||
{
|
||||
BYTE* d = (BYTE*)dstPtr;
|
||||
const BYTE* s = (const BYTE*)srcPtr;
|
||||
@ -1112,6 +1135,7 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
|
||||
* Note that it is important for performance that this function really get inlined,
|
||||
* in order to remove useless branches during compilation optimization.
|
||||
*/
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
LZ4_FORCE_INLINE int LZ4_decompress_generic(
|
||||
const char* const src,
|
||||
char* const dst,
|
||||
@ -1272,16 +1296,19 @@ _output_error:
|
||||
}
|
||||
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
|
||||
@ -1327,6 +1354,7 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
|
||||
If it's not possible, save the relevant part of decoded data into a safe buffer,
|
||||
and indicate where it stands using LZ4_setStreamDecode()
|
||||
*/
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
|
||||
{
|
||||
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
|
||||
@ -1353,6 +1381,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
|
||||
return result;
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
|
||||
{
|
||||
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
|
||||
@ -1387,6 +1416,7 @@ Advanced decoding functions :
|
||||
the dictionary must be explicitly provided within parameters
|
||||
*/
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
|
||||
{
|
||||
if (dictSize==0)
|
||||
@ -1399,17 +1429,20 @@ LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char*
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
|
||||
}
|
||||
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
|
||||
}
|
||||
|
||||
/* debug function */
|
||||
LZ4_FORCE_O2_GCC_PPC64LE
|
||||
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
|
||||
|
Loading…
Reference in New Issue
Block a user