Improved performance on ARMv6

This commit is contained in:
Yann Collet 2015-08-16 01:54:55 +01:00
parent fb4d3ef2c4
commit 0f2bf0c54e
2 changed files with 36 additions and 16 deletions

View File

@ -53,6 +53,17 @@
/**************************************
* CPU Feature Detection
**************************************/
/* LZ4_FORCE_DIRECT_MEMORY_ACCESS
* Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
* For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
* If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.
* If your CPU efficiently supports unaligned memory accesses and the compiler did not automatically detected it, you will witness large performance improvement.
* You can also enable this switch from compilation command line / Makefile.
*/
#if !defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS) && ( defined(__ARM_FEATURE_UNALIGNED) )
# define LZ4_FORCE_DIRECT_MEMORY_ACCESS 1
#endif
/*
* LZ4_FORCE_SW_BITCOUNT
* Define this parameter if your target system or compiler does not support hardware bit count
@ -141,6 +152,13 @@ static unsigned LZ4_isLittleEndian(void)
return one.c[0];
}
#if defined(LZ4_FORCE_DIRECT_MEMORY_ACCESS)
static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
static size_t LZ4_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; }
#else
static U16 LZ4_read16(const void* memPtr)
{
@ -149,6 +167,23 @@ static U16 LZ4_read16(const void* memPtr)
return val16;
}
static U32 LZ4_read32(const void* memPtr)
{
U32 val32;
memcpy(&val32, memPtr, 4);
return val32;
}
static size_t LZ4_read_ARCH(const void* memPtr)
{
size_t val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
#endif // LZ4_FORCE_DIRECT_MEMORY_ACCESS
static U16 LZ4_readLE16(const void* memPtr)
{
if (LZ4_isLittleEndian())
@ -176,21 +211,6 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
}
}
static U32 LZ4_read32(const void* memPtr)
{
U32 val32;
memcpy(&val32, memPtr, 4);
return val32;
}
static size_t LZ4_read_ARCH(const void* memPtr)
{
size_t val;
memcpy(&val, memPtr, sizeof(val));
return val;
}
/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
{

View File

@ -35,7 +35,7 @@ You can contact the author at :
/**************************************
* Tuning parameters
**************************************/
/* XXH_FORCE_DIRECT_UNALIGNED_MEMORY_ACCESS
/* XXH_FORCE_DIRECT_MEMORY_ACCESS
* Unaligned memory access is automatically enabled for "common" CPU, such as x86/x64.
* For others CPU, the compiler will be more cautious, and insert extra code to ensure proper working with unaligned memory accesses.
* If you know your target CPU efficiently supports unaligned memory accesses, you can force this option manually.