diff --git a/lib/lz4.c b/lib/lz4.c index 9d7e5b6..6ed6ab3 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -34,7 +34,7 @@ /************************************** - Tuning parameters +* Tuning parameters **************************************/ /* * HEAPMODE : @@ -49,51 +49,10 @@ */ #define ACCELERATION_DEFAULT 17 -/* - * CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS : - * By default, the source code expects the compiler to correctly optimize - * 4-bytes and 8-bytes read on architectures able to handle it efficiently. - * This is not always the case. In some circumstances (ARM notably), - * the compiler will issue cautious code even when target is able to correctly handle unaligned memory accesses. - * - * You can force the compiler to use unaligned memory access by uncommenting the line below. - * One of the below scenarios will happen : - * 1 - Your target CPU correctly handle unaligned access, and was not well optimized by compiler (good case). - * You will witness large performance improvements (+50% and up). - * Keep the line uncommented and send a word to upstream (https://groups.google.com/forum/#!forum/lz4c) - * The goal is to automatically detect such situations by adding your target CPU within an exception list. - * 2 - Your target CPU correctly handle unaligned access, and was already already optimized by compiler - * No change will be experienced. - * 3 - Your target CPU inefficiently handle unaligned access. - * You will experience a performance loss. Comment back the line. - * 4 - Your target CPU does not handle unaligned access. - * Program will crash. - * If uncommenting results in better performance (case 1) - * please report your configuration to upstream (https://groups.google.com/forum/#!forum/lz4c) - * This way, an automatic detection macro can be added to match your case within later versions of the library. - */ -/* #define CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS 1 */ - /************************************** - CPU Feature Detection +* CPU Feature Detection **************************************/ -/* - * Automated efficient unaligned memory access detection - * Based on known hardware architectures - * This list will be updated thanks to feedbacks - */ -#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ - || defined(__ARM_FEATURE_UNALIGNED) \ - || defined(__i386__) || defined(__x86_64__) \ - || defined(_M_IX86) || defined(_M_X64) \ - || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ - || (defined(_M_ARM) && (_M_ARM >= 7)) -# define LZ4_UNALIGNED_ACCESS 1 -#else -# define LZ4_UNALIGNED_ACCESS 0 -#endif - /* * LZ4_FORCE_SW_BITCOUNT * Define this parameter if your target system or compiler does not support hardware bit count @@ -142,7 +101,7 @@ /************************************** - Memory routines +* Memory routines **************************************/ #include /* malloc, calloc, free */ #define ALLOCATOR(n,s) calloc(n,s) @@ -152,13 +111,13 @@ /************************************** - Includes +* Includes **************************************/ #include "lz4.h" /************************************** - Basic Types +* Basic Types **************************************/ #if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ # include @@ -177,7 +136,7 @@ /************************************** - Reading and writing into memory +* Reading and writing into memory **************************************/ #define STEPSIZE sizeof(size_t) @@ -190,10 +149,19 @@ static unsigned LZ4_isLittleEndian(void) } +static U16 LZ4_read16(const void* memPtr) +{ + U16 val16; + memcpy(&val16, memPtr, 2); + return val16; +} + static U16 LZ4_readLE16(const void* memPtr) { - if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) - return *(U16*)memPtr; + if (LZ4_isLittleEndian()) + { + return LZ4_read16(memPtr); + } else { const BYTE* p = (const BYTE*)memPtr; @@ -203,10 +171,9 @@ static U16 LZ4_readLE16(const void* memPtr) static void LZ4_writeLE16(void* memPtr, U16 value) { - if ((LZ4_UNALIGNED_ACCESS) && (LZ4_isLittleEndian())) + if (LZ4_isLittleEndian()) { - *(U16*)memPtr = value; - return; + memcpy(memPtr, &value, 2); } else { @@ -216,41 +183,18 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } } - -static U16 LZ4_read16(const void* memPtr) -{ - if (LZ4_UNALIGNED_ACCESS) - return *(U16*)memPtr; - else - { - U16 val16; - memcpy(&val16, memPtr, 2); - return val16; - } -} - static U32 LZ4_read32(const void* memPtr) { - if (LZ4_UNALIGNED_ACCESS) - return *(U32*)memPtr; - else - { - U32 val32; - memcpy(&val32, memPtr, 4); - return val32; - } + U32 val32; + memcpy(&val32, memPtr, 4); + return val32; } static U64 LZ4_read64(const void* memPtr) { - if (LZ4_UNALIGNED_ACCESS) - return *(U64*)memPtr; - else - { - U64 val64; - memcpy(&val64, memPtr, 8); - return val64; - } + U64 val64; + memcpy(&val64, memPtr, 8); + return val64; } static size_t LZ4_read_ARCH(const void* p) @@ -262,31 +206,9 @@ static size_t LZ4_read_ARCH(const void* p) } -static void LZ4_copy4(void* dstPtr, const void* srcPtr) -{ - if (LZ4_UNALIGNED_ACCESS) - { - *(U32*)dstPtr = *(U32*)srcPtr; - return; - } - memcpy(dstPtr, srcPtr, 4); -} +static void LZ4_copy4(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 4); } -static void LZ4_copy8(void* dstPtr, const void* srcPtr) -{ -#if GCC_VERSION!=409 /* disabled on GCC 4.9, as it generates invalid opcode (crash) */ - if (LZ4_UNALIGNED_ACCESS) - { - if (LZ4_64bits()) - *(U64*)dstPtr = *(U64*)srcPtr; - else - ((U32*)dstPtr)[0] = ((U32*)srcPtr)[0], - ((U32*)dstPtr)[1] = ((U32*)srcPtr)[1]; - return; - } -#endif - memcpy(dstPtr, srcPtr, 8); -} +static void LZ4_copy8(void* dstPtr, const void* srcPtr) { memcpy(dstPtr, srcPtr, 8); } /* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */ static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) diff --git a/lib/lz4frame.c b/lib/lz4frame.c index 6c65d0b..b6dbd20 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -130,6 +130,7 @@ typedef struct LZ4F_frameInfo_t frameInfo; U32 version; U32 dStage; + U64 frameRemainingSize; size_t maxBlockSize; size_t maxBufferSize; const BYTE* srcExpect; @@ -187,7 +188,7 @@ static U32 LZ4F_readLE32 (const BYTE* srcPtr) U32 value32 = srcPtr[0]; value32 += (srcPtr[1]<<8); value32 += (srcPtr[2]<<16); - value32 += (srcPtr[3]<<24); + value32 += ((U32)srcPtr[3])<<24; return value32; } @@ -302,7 +303,7 @@ size_t LZ4F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf prefs.frameInfo.contentSize = (U64)srcSize; } if (prefs.frameInfo.contentSize != 0) - prefs.frameInfo.contentSize = (U64)srcSize; /* correct content size if selected (!=0) */ + prefs.frameInfo.contentSize = (U64)srcSize; /* auto-correct content size if selected (!=0) */ if (prefs.compressionLevel < minHClevel) { @@ -871,7 +872,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx_internal_t* dctxPtr, const void* srcVo dctxPtr->frameInfo.blockSizeID = (blockSizeID_t)blockSizeID; dctxPtr->maxBlockSize = LZ4F_getBlockSize(blockSizeID); if (contentSizeFlag) - dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6); + dctxPtr->frameRemainingSize = dctxPtr->frameInfo.contentSize = LZ4F_readLE64(srcPtr+6); /* init */ if (contentChecksumFlag) XXH32_reset(&(dctxPtr->xxh), 0); @@ -1158,7 +1159,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, if ((size_t)(dstEnd-dstPtr) < sizeToCopy) sizeToCopy = dstEnd - dstPtr; memcpy(dstPtr, srcPtr, sizeToCopy); if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), srcPtr, sizeToCopy); - if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= sizeToCopy; + if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= sizeToCopy; /* dictionary management */ if (dctxPtr->frameInfo.blockMode==blockLinked) @@ -1231,7 +1232,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); if (decodedSize < 0) return (size_t)-ERROR_GENERIC; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize); - if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= decodedSize; + if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize; /* dictionary management */ if (dctxPtr->frameInfo.blockMode==blockLinked) @@ -1277,7 +1278,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, decodedSize = decoder((const char*)selectedIn, (char*)dctxPtr->tmpOut, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); if (decodedSize < 0) return (size_t)-ERROR_decompressionFailed; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dctxPtr->tmpOut, decodedSize); - if (dctxPtr->frameInfo.contentSize) dctxPtr->frameInfo.contentSize -= decodedSize; + if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize; dctxPtr->tmpOutSize = decodedSize; dctxPtr->tmpOutStart = 0; dctxPtr->dStage = dstage_flushOut; @@ -1311,7 +1312,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, case dstage_getSuffix: { size_t suffixSize = dctxPtr->frameInfo.contentChecksumFlag * 4; - if (dctxPtr->frameInfo.contentSize) return (size_t)-ERROR_frameSize_wrong; /* incorrect frame size decoded */ + if (dctxPtr->frameRemainingSize) return (size_t)-ERROR_frameSize_wrong; /* incorrect frame size decoded */ if (suffixSize == 0) /* frame completed */ { nextSrcSizeHint = 0; @@ -1392,7 +1393,7 @@ size_t LZ4F_decompress(LZ4F_decompressionContext_t decompressionContext, selectedIn = dctxPtr->header + 4; } - /* case dstage_decodeSBlockSize: */ /* no direct access */ + /* case dstage_decodeSFrameSize: */ /* no direct access */ { size_t SFrameSize = LZ4F_readLE32(selectedIn); dctxPtr->frameInfo.contentSize = SFrameSize; diff --git a/lib/xxhash.c b/lib/xxhash.c index aca1e0a..a4a3fbe 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -117,35 +117,20 @@ typedef signed int S32; typedef unsigned long long U64; #endif -#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U32_S +static U32 XXH_read32(const void* memPtr) { - U32 v; -} _PACKED U32_S; -typedef struct _U64_S + U32 val32; + memcpy(&val32, memPtr, 4); + return val32; +} + +static U64 XXH_read64(const void* memPtr) { - U64 v; -} _PACKED U64_S; + U64 val64; + memcpy(&val64, memPtr, 8); + return val64; +} -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A32(x) (((U32_S *)(x))->v) -#define A64(x) (((U64_S *)(x))->v) /***************************************** @@ -230,7 +215,7 @@ typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); else return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr); } @@ -243,7 +228,7 @@ FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) { if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); else return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr); } diff --git a/programs/bench.c b/programs/bench.c index e1b5357..a5c72d6 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -403,7 +403,7 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel) milliTime = BMK_GetMilliSpan(milliTime); if ((double)milliTime < fastestD*nbLoops) fastestD = (double)milliTime/nbLoops; - DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s \r", loopNb, inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); /* CRC Checking */ crcCheck = XXH32(orig_buff, (unsigned int)benchedSize,0); @@ -413,9 +413,9 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel) if (crcOrig==crcCheck) { if (ratio<100.) - DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); else - DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", inFileName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); } totals += benchedSize; totalz += cSize; diff --git a/programs/lz4cli.c b/programs/lz4cli.c index 47cf7e0..f33ea82 100644 --- a/programs/lz4cli.c +++ b/programs/lz4cli.c @@ -420,11 +420,15 @@ int main(int argc, char** argv) /* Modify Nb Iterations (benchmark only) */ case 'i': - if ((argument[1] >='1') && (argument[1] <='9')) { - int iters = argument[1] - '0'; + unsigned iters = 0; + while ((argument[1] >='0') && (argument[1] <='9')) + { + iters *= 10; + iters += argument[1] - '0'; + argument++; + } BMK_setNbIterations(iters); - argument++; } break; diff --git a/programs/lz4io.c b/programs/lz4io.c index 991d9d7..278b766 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -487,8 +487,15 @@ int LZ4IO_compressFilename(const char* input_filename, const char* output_filena /* Final Status */ end = clock(); DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + if (filesize == 0) + { + DISPLAYLEVEL(2, "Null size input; converted into %u lz4 stream\n", (unsigned)compressedfilesize); + } + else + { + DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); + } { double seconds = (double)(end - start)/CLOCKS_PER_SEC; DISPLAYLEVEL(4, "Done in %.2f s ==> %.2f MB/s\n", seconds, (double)filesize / seconds / 1024 / 1024);