created LZ4_FAST_DEC_LOOP build macro

This commit is contained in:
Yann Collet 2019-04-02 16:22:11 -07:00
parent 7d9d00f4df
commit 2589c4424f
2 changed files with 31 additions and 9 deletions

View File

@ -42,17 +42,28 @@ Should they be nonetheless needed, it's possible to force their publication
by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
#### Build macros
The following build macro can be determined at compilation time :
- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
Typically with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
#### Amalgamation
lz4 code is able to be amalgamated into a single file.
We can combine all source code in `lz4_all.c` by using following command,
lz4 source code can be amalgamated into a single file.
One can combine all source code into `lz4_all.c` by using following command:
```
cat lz4.c > lz4_all.c
cat lz4hc.c >> lz4_all.c
cat lz4frame.c >> lz4_all.c
```
and compile `lz4_all.c`.
It's necessary to include all `*.h` files present in `/lib` together with `lz4_all.c`.
(`cat` file order is important) then compile `lz4_all.c`.
All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`.
#### Windows : using MinGW+MSYS to create DLL

View File

@ -202,6 +202,7 @@
typedef size_t reg_t; /* 32-bits in x32 mode */
#endif
/*-************************************
* Reading and writing into memory
**************************************/
@ -235,7 +236,7 @@ static reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArc
static void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
#else /* safe and portable access through memcpy() */
#else /* safe and portable access using memcpy() */
static U16 LZ4_read16(const void* memPtr)
{
@ -301,7 +302,15 @@ static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
#if defined(__i386__) || defined(__x86_64__)
#ifndef LZ4_FAST_DEC_LOOP
# if defined(__i386__) || defined(__x86_64__)
# define LZ4_FAST_DEC_LOOP 1
# else
# define LZ4_FAST_DEC_LOOP 0
# endif
#endif
#if LZ4_FAST_DEC_LOOP
LZ4_FORCE_O2_INLINE_GCC_PPC64LE
void LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) {
if (offset < 8) {
@ -367,6 +376,8 @@ void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
}
}
#endif
/*-************************************
* Common Constants
**************************************/
@ -1590,7 +1601,7 @@ LZ4_decompress_generic(
if ((endOnInput) && unlikely(srcSize==0)) return -1;
/* Currently the fast loop shows a regression on qualcomm arm chips. */
#if defined(__i386__) || defined(__x86_64__)
#if LZ4_FAST_DEC_LOOP
if ((oend - op) < FASTLOOP_SAFE_DISTANCE)
goto safe_decode;
@ -1773,7 +1784,7 @@ LZ4_decompress_generic(
/* copy literals */
cpy = op+length;
#if defined(__i386__) || defined(__x86_64__)
#if LZ4_FAST_DEC_LOOP
safe_literal_copy:
#endif
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
@ -1823,7 +1834,7 @@ LZ4_decompress_generic(
}
length += MINMATCH;
#if defined(__i386__) || defined(__x86_64__)
#if LZ4_FAST_DEC_LOOP
safe_match_copy:
#endif
/* match starting within external dictionary */