Fixed a few warnings from -fsanitize=undefined
This commit is contained in:
parent
2f8a4c32f9
commit
f344fbd3ca
132
lib/lz4.c
132
lib/lz4.c
@ -34,7 +34,7 @@
|
||||
|
||||
|
||||
/**************************************
|
||||
Tuning parameters
|
||||
* Tuning parameters
|
||||
**************************************/
|
||||
/*
|
||||
* HEAPMODE :
|
||||
@ -49,51 +49,10 @@
|
||||
*/
|
||||
#define ACCELERATION_DEFAULT 17
|
||||
|
||||
/*
|
||||
* CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS :
|
||||
* By default, the source code expects the compiler to correctly optimize
|
||||
* 4-bytes and 8-bytes read on architectures able to handle it efficiently.
|
||||
* This is not always the case. In some circumstances (ARM notably),
|
||||
* the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses.
|
||||
*
|
||||
* You can force the compiler to use unaligned memory access by uncommenting the line below.
|
||||
* One of the below scenarios will happen :
|
||||
* 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case).
|
||||
* You will witness large performance improvements (+50% and up).
|
||||
* Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c)
|
||||
* The goal is to automatically detect such situations by adding your target CPU within an exception list.
|
||||
* 2 - Your target CPU correctly handle unaligned access, and was already already optimized by compiler
|
||||
* No change will be experienced.
|
||||
* 3 - Your target CPU inefficiently handle unaligned access.
|
||||
* You will experience a performance loss. Comment back the line.
|
||||
* 4 - Your target CPU does not handle unaligned access.
|
||||
* Program will crash.
|
||||
* If uncommenting results in better performance (case 1)
|
||||
* please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c)
|
||||
* This way, an automatic detection macro can be added to match your case within later versions of the library.
|
||||
*/
|
||||
/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */
|
||||
|
||||
|
||||
/**************************************
|
||||
CPU Feature Detection
|
||||
* CPU Feature Detection
|
||||
**************************************/
|
||||
/*
|
||||
* Automated efficient unaligned memory access detection
|
||||
* Based on known hardware architectures
|
||||
* This list will be updated thanks to feedbacks
|
||||
*/
|
||||
#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
|
||||
|| defined(__ARM_FEATURE_UNALIGNED) \
|
||||
|| defined(__i386__) || defined(__x86_64__) \
|
||||
|| defined(_M_IX86) || defined(_M_X64) \
|
||||
|| defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
|
||||
|| (defined(_M_ARM) && (_M_ARM >= 7))
|
||||
# define LZ4_UNALIGNED_ACCESS 1
|
||||
#else
|
||||
# define LZ4_UNALIGNED_ACCESS 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* LZ4_FORCE_SW_BITCOUNT
|
||||
* Define this parameter if your target system or compiler does not support hardware bit count
|
||||
@ -142,7 +101,7 @@
|
||||
|
||||
|
||||
/**************************************
|
||||
Memory routines
|
||||
* Memory routines
|
||||
**************************************/
|
||||
#include <stdlib.h> /* malloc, calloc, free */
|
||||
#define ALLOCATOR(n,s) calloc(n,s)
|
||||
@ -152,13 +111,13 @@
|
||||
|
||||
|
||||
/**************************************
|
||||
Includes
|
||||
* Includes
|
||||
**************************************/
|
||||
#include "lz4.h"
|
||||
|
||||
|
||||
/**************************************
|
||||
Basic Types
|
||||
* Basic Types
|
||||
**************************************/
|
||||
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
|
||||
# include <stdint.h>
|
||||
@ -177,7 +136,7 @@
|
||||
|
||||
|
||||
/**************************************
|
||||
Reading and writing into memory
|
||||
* Reading and writing into memory
|
||||
**************************************/
|
||||
#define STEPSIZE sizeof(size_t)
|
||||
|
||||
@ -190,10 +149,19 @@ static unsigned LZ4_isLittleEndian(void)
|
||||
}
|
||||
|
||||
|
||||
static U16 LZ4_read16(const void* memPtr)
|
||||
{
|
||||
U16 val16;
|
||||
memcpy(&val16, memPtr, 2);
|
||||
return val16;
|
||||
}
|
||||
|
||||
static U16 LZ4_readLE16(const void* memPtr)
|
||||
{
|
||||
if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian()))
|
||||
return *(U16*)memPtr;
|
||||
if (LZ4_isLittleEndian())
|
||||
{
|
||||
return LZ4_read16(memPtr);
|
||||
}
|
||||
else
|
||||
{
|
||||
const BYTE* p = (const BYTE*)memPtr;
|
||||
@ -203,10 +171,9 @@ static U16 LZ4_readLE16(const void* memPtr)
|
||||
|
||||
static void LZ4_writeLE16(void* memPtr, U16 value)
|
||||
{
|
||||
if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian()))
|
||||
if (LZ4_isLittleEndian())
|
||||
{
|
||||
*(U16*)memPtr = value;
|
||||
return;
|
||||
memcpy(memPtr, &value, 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -216,41 +183,18 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static U16 LZ4_read16(const void* memPtr)
|
||||
{
|
||||
if (LZ4_UNALIGNED_ACCESS)
|
||||
return *(U16*)memPtr;
|
||||
else
|
||||
{
|
||||
U16 val16;
|
||||
memcpy(&val16, memPtr, 2);
|
||||
return val16;
|
||||
}
|
||||
}
|
||||
|
||||
static U32 LZ4_read32(const void* memPtr)
|
||||
{
|
||||
if (LZ4_UNALIGNED_ACCESS)
|
||||
return *(U32*)memPtr;
|
||||
else
|
||||
{
|
||||
U32 val32;
|
||||
memcpy(&val32, memPtr, 4);
|
||||
return val32;
|
||||
}
|
||||
U32 val32;
|
||||
memcpy(&val32, memPtr, 4);
|
||||
return val32;
|
||||
}
|
||||
|
||||
static U64 LZ4_read64(const void* memPtr)
|
||||
{
|
||||
if (LZ4_UNALIGNED_ACCESS)
|
||||
return *(U64*)memPtr;
|
||||
else
|
||||
{
|
||||
U64 val64;
|
||||
memcpy(&val64, memPtr, 8);
|
||||
return val64;
|
||||
}
|
||||
U64 val64;
|
||||
memcpy(&val64, memPtr, 8);
|
||||
return val64;
|
||||
}
|
||||
|
||||
static size_t LZ4_read_ARCH(const void* p)
|
||||
@ -262,31 +206,9 @@ static size_t LZ4_read_ARCH(const void* p)
|
||||
}
|
||||
|
||||
|
||||
static void LZ4_copy4(void* dstPtr, const void* srcPtr)
|
||||
{
|
||||
if (LZ4_UNALIGNED_ACCESS)
|
||||
{
|
||||
*(U32*)dstPtr = *(U32*)srcPtr;
|
||||
return;
|
||||
}
|
||||
memcpy(dstPtr, srcPtr, 4);
|
||||
}
|
||||
static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); }
|
||||
|
||||
static void LZ4_copy8(void* dstPtr, const void* srcPtr)
|
||||
{
|
||||
#if GCC_VERSION!=409 /* disabled on GCC 4.9, as it generates invalid opcode (crash) */
|
||||
if (LZ4_UNALIGNED_ACCESS)
|
||||
{
|
||||
if (LZ4_64bits())
|
||||
*(U64*)dstPtr = *(U64*)srcPtr;
|
||||
else
|
||||
((U32*)dstPtr)[0] = ((U32*)srcPtr)[0],
|
||||
((U32*)dstPtr)[1] = ((U32*)srcPtr)[1];
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
memcpy(dstPtr, srcPtr, 8);
|
||||
}
|
||||
static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); }
|
||||
|
||||
/* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
|
||||
static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||
|
@ -130,6 +130,7 @@ typedef struct
|
||||
LZ4F_frameInfo_t frameInfo;
|
||||
U32 version;
|
||||
U32 dStage;
|
||||
U64 frameRemainingSize;
|
||||
size_t maxBlockSize;
|
||||
size_t maxBufferSize;
|
||||
const BYTE* srcExpect;
|
||||
@ -187,7 +188,7 @@ static U32 LZ4F_readLE32 (const BYTE* srcPtr)
|
||||
U32 value32 = srcPtr[0];
|
||||
value32 += (srcPtr[1]<<8);
|
||||
value32 += (srcPtr[2]<<16);
|
||||
value32 += (srcPtr[3]<<24);
|
||||
value32 += ((U32)srcPtr[3])<<24;
|
||||
return value32;
|
||||
}
|
||||
|
||||
@ -302,7 +303,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf
|
||||
prefs.frameInfo.contentSize = (U64)srcSize;
|
||||
}
|
||||
if (prefs.frameInfo.contentSize != 0)
|
||||
prefs.frameInfo.contentSize = (U64)srcSize; /* correct content size if selected (!=0) */
|
||||
prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */
|
||||
|
||||
if (prefs.compressionLevel < minHClevel)
|
||||
{
|
||||
@ -871,7 +872,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const void* srcVo
|
||||
dctxPtr->frameInfo.blockSizeID = (blockSizeID_t)blockSizeID;
|
||||
dctxPtr->maxBlockSize = LZ4F_getBlockSize(blockSizeID);
|
||||
if (contentSizeFlag)
|
||||
dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
|
||||
dctxPtr->frameRemainingSize = dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6);
|
||||
|
||||
/* init */
|
||||
if (contentChecksumFlag) XXH32_reset(&(dctxPtr->xxh), 0);
|
||||
@ -1158,7 +1159,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
|
||||
if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr;
|
||||
memcpy(dstPtr, srcPtr, sizeToCopy);
|
||||
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy);
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= sizeToCopy;
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= sizeToCopy;
|
||||
|
||||
/* dictionary management */
|
||||
if (dctxPtr->frameInfo.blockMode==blockLinked)
|
||||
@ -1231,7 +1232,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
|
||||
decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
|
||||
if (decodedSize < 0) return (size_t)-ERROR_GENERIC; /* decompression failed */
|
||||
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize);
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= decodedSize;
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
|
||||
|
||||
/* dictionary management */
|
||||
if (dctxPtr->frameInfo.blockMode==blockLinked)
|
||||
@ -1277,7 +1278,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
|
||||
decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize);
|
||||
if (decodedSize < 0) return (size_t)-ERROR_decompressionFailed; /* decompression failed */
|
||||
if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize);
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= decodedSize;
|
||||
if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize;
|
||||
dctxPtr->tmpOutSize = decodedSize;
|
||||
dctxPtr->tmpOutStart = 0;
|
||||
dctxPtr->dStage = dstage_flushOut;
|
||||
@ -1311,7 +1312,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
|
||||
case dstage_getSuffix:
|
||||
{
|
||||
size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4;
|
||||
if (dctxPtr->frameInfo.contentSize) return (size_t)-ERROR_frameSize_wrong; /* incorrect frame size decoded */
|
||||
if (dctxPtr->frameRemainingSize) return (size_t)-ERROR_frameSize_wrong; /* incorrect frame size decoded */
|
||||
if (suffixSize == 0) /* frame completed */
|
||||
{
|
||||
nextSrcSizeHint = 0;
|
||||
@ -1392,7 +1393,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext,
|
||||
selectedIn = dctxPtr->header + 4;
|
||||
}
|
||||
|
||||
/* case dstage_decodeSBlockSize: */ /* no direct access */
|
||||
/* case dstage_decodeSFrameSize: */ /* no direct access */
|
||||
{
|
||||
size_t SFrameSize = LZ4F_readLE32(selectedIn);
|
||||
dctxPtr->frameInfo.contentSize = SFrameSize;
|
||||
|
41
lib/xxhash.c
41
lib/xxhash.c
@ -117,35 +117,20 @@ typedef signed int S32;
|
||||
typedef unsigned long long U64;
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
|
||||
# define _PACKED __attribute__ ((packed))
|
||||
#else
|
||||
# define _PACKED
|
||||
#endif
|
||||
|
||||
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
||||
# ifdef __IBMC__
|
||||
# pragma pack(1)
|
||||
# else
|
||||
# pragma pack(push, 1)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef struct _U32_S
|
||||
static U32 XXH_read32(const void* memPtr)
|
||||
{
|
||||
U32 v;
|
||||
} _PACKED U32_S;
|
||||
typedef struct _U64_S
|
||||
U32 val32;
|
||||
memcpy(&val32, memPtr, 4);
|
||||
return val32;
|
||||
}
|
||||
|
||||
static U64 XXH_read64(const void* memPtr)
|
||||
{
|
||||
U64 v;
|
||||
} _PACKED U64_S;
|
||||
U64 val64;
|
||||
memcpy(&val64, memPtr, 8);
|
||||
return val64;
|
||||
}
|
||||
|
||||
#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
|
||||
# pragma pack(pop)
|
||||
#endif
|
||||
|
||||
#define A32(x) (((U32_S *)(x))->v)
|
||||
#define A64(x) (((U64_S *)(x))->v)
|
||||
|
||||
|
||||
/*****************************************
|
||||
@ -230,7 +215,7 @@ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
|
||||
FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
|
||||
return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
|
||||
}
|
||||
@ -243,7 +228,7 @@ FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
|
||||
FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
|
||||
{
|
||||
if (align==XXH_unaligned)
|
||||
return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
|
||||
return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
|
||||
else
|
||||
return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
|
||||
}
|
||||
|
@ -403,7 +403,7 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel)
|
||||
milliTime = BMK_GetMilliSpan(milliTime);
|
||||
|
||||
if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime/nbLoops;
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s \r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
|
||||
/* CRC Checking */
|
||||
crcCheck = XXH32(orig_buff, (unsigned int)benchedSize,0);
|
||||
@ -413,9 +413,9 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel)
|
||||
if (crcOrig==crcCheck)
|
||||
{
|
||||
if (ratio<100.)
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
else
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.);
|
||||
}
|
||||
totals += benchedSize;
|
||||
totalz += cSize;
|
||||
|
@ -420,11 +420,15 @@ int main(int argc, char** argv)
|
||||
|
||||
/* Modify Nb Iterations (benchmark only) */
|
||||
case 'i':
|
||||
if ((argument[1] >='1') && (argument[1] <='9'))
|
||||
{
|
||||
int iters = argument[1] - '0';
|
||||
unsigned iters = 0;
|
||||
while ((argument[1] >='0') && (argument[1] <='9'))
|
||||
{
|
||||
iters *= 10;
|
||||
iters += argument[1] - '0';
|
||||
argument++;
|
||||
}
|
||||
BMK_setNbIterations(iters);
|
||||
argument++;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -487,8 +487,15 @@ int LZ4IO_compressFilename(const char* input_filename, const char* output_filena
|
||||
/* Final Status */
|
||||
end = clock();
|
||||
DISPLAYLEVEL(2, "\r%79s\r", "");
|
||||
DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
|
||||
if (filesize == 0)
|
||||
{
|
||||
DISPLAYLEVEL(2, "Null size input; converted into %u lz4 stream\n", (unsigned)compressedfilesize);
|
||||
}
|
||||
else
|
||||
{
|
||||
DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
|
||||
(unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
|
||||
}
|
||||
{
|
||||
double seconds = (double)(end - start)/CLOCKS_PER_SEC;
|
||||
DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);
|
||||
|
Loading…
Reference in New Issue
Block a user