Merge branch 'dev' of github.com:Cyan4973/lz4 into dev

This commit is contained in:
Yann Collet 2017-10-14 18:49:06 -07:00
commit 1aa997b594

View File

@ -117,6 +117,28 @@
# endif /* _MSC_VER */
#endif /* LZ4_FORCE_INLINE */
/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
* Gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy,
* together with a simple 8-byte copy loop as a fall-back path.
* However, this optimization hurts the decompression speed by >30%,
* because the execution does not go to the optimized loop
* for typical compressible data, and all of the preamble checks
* before going to the fall-back path become useless overhead.
* This optimization happens only with the -O3 flag, and -O2 generates
* a simple 8-byte copy loop.
* With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy
* functions are annotated with __attribute__((optimize("O2"))),
* and also LZ4_wildCopy is forcibly inlined, so that the O2 attribute
* of LZ4_wildCopy does not affect the compression speed.
*/
#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__)
# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
#else
# define LZ4_FORCE_O2_GCC_PPC64LE
# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
#endif
#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
# define expect(expr,value) (__builtin_expect ((expr),(value)) )
#else
@ -253,7 +275,8 @@ static void LZ4_copy8(void* dst, const void* src)
}
/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
{
BYTE* d = (BYTE*)dstPtr;
const BYTE* s = (const BYTE*)srcPtr;
@ -1112,6 +1135,7 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
* Note that it is important for performance that this function really get inlined,
* in order to remove useless branches during compilation optimization.
*/
LZ4_FORCE_O2_GCC_PPC64LE
LZ4_FORCE_INLINE int LZ4_decompress_generic(
const char* const src,
char* const dst,
@ -1272,16 +1296,19 @@ _output_error:
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, (BYTE*)dest, NULL, 0);
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, (BYTE*)dest, NULL, 0);
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
{
return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, (BYTE*)(dest - 64 KB), NULL, 64 KB);
@ -1327,6 +1354,7 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti
If it's not possible, save the relevant part of decoded data into a safe buffer,
and indicate where it stands using LZ4_setStreamDecode()
*/
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@ -1353,6 +1381,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
return result;
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
{
LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@ -1387,6 +1416,7 @@ Advanced decoding functions :
the dictionary must be explicitly provided within parameters
*/
LZ4_FORCE_O2_GCC_PPC64LE
LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char* dest, int compressedSize, int maxOutputSize, int safe, const char* dictStart, int dictSize)
{
if (dictSize==0)
@ -1399,17 +1429,20 @@ LZ4_FORCE_INLINE int LZ4_decompress_usingDict_generic(const char* source, char*
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, safe, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest, compressedSize, maxOutputSize, 1, dictStart, dictSize);
}
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_usingDict_generic(source, dest, 0, originalSize, 0, dictStart, dictSize);
}
/* debug function */
LZ4_FORCE_O2_GCC_PPC64LE
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
{
return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, usingExtDict, (BYTE*)dest, (const BYTE*)dictStart, dictSize);