diff --git a/bench.c b/bench.c index ca26369..669c584 100644 --- a/bench.c +++ b/bench.c @@ -1,7 +1,7 @@ /* bench.c - Demo program to benchmark open-source compression algorithm Copyright (C) Yann Collet 2012-2013 - GPL v2 License + GPL v2 License This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,9 +17,9 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - You can contact the author at : - - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html - - LZ4 source repository : http://code.google.com/p/lz4/ + You can contact the author at : + - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html + - LZ4 source repository : http://code.google.com/p/lz4/ */ //************************************** @@ -77,19 +77,19 @@ //************************************** // Basic Types //************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -#define BYTE unsigned __int8 -#define U16 unsigned __int16 -#define U32 unsigned __int32 -#define S32 __int32 -#define U64 unsigned __int64 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; #else -#include -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; #endif @@ -109,17 +109,17 @@ //************************************** struct chunkParameters { - U32 id; - char* origBuffer; - char* compressedBuffer; - int origSize; - int compressedSize; + U32 id; + char* origBuffer; + char* compressedBuffer; + int origSize; + int compressedSize; }; struct compressionParameters { - int (*compressionFunction)(const char*, char*, int); - int (*decompressionFunction)(const char*, char*, int); + int (*compressionFunction)(const char*, char*, int); + int (*decompressionFunction)(const char*, char*, int); }; @@ -139,14 +139,14 @@ static int BMK_pause = 0; void BMK_SetBlocksize(int bsize) { - chunkSize = bsize; - DISPLAY("-Using Block Size of %i KB-\n", chunkSize>>10); + chunkSize = bsize; + DISPLAY("-Using Block Size of %i KB-\n", chunkSize>>10); } void BMK_SetNbIterations(int nbLoops) { - nbIterations = nbLoops; - DISPLAY("- %i iterations -\n", nbIterations); + nbIterations = nbLoops; + DISPLAY("- %i iterations -\n", nbIterations); } void BMK_SetPause() @@ -199,36 +199,36 @@ static int BMK_GetMilliSpan( int nTimeStart ) static size_t BMK_findMaxMem(U64 requiredMem) { - size_t step = (64U<<20); // 64 MB - BYTE* testmem=NULL; + size_t step = (64U<<20); // 64 MB + BYTE* testmem=NULL; - requiredMem = (((requiredMem >> 25) + 1) << 26); - if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + requiredMem = (((requiredMem >> 25) + 1) << 26); + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; - requiredMem += 2*step; - while (!testmem) - { - requiredMem -= step; - testmem = malloc ((size_t)requiredMem); - } + requiredMem += 2*step; + while (!testmem) + { + requiredMem -= step; + testmem = malloc ((size_t)requiredMem); + } - free (testmem); - return (size_t) (requiredMem - step); + free (testmem); + return (size_t) (requiredMem - step); } static U64 BMK_GetFileSize(char* infilename) { - int r; + int r; #if defined(_MSC_VER) - struct _stat64 statbuf; - r = _stat64(infilename, &statbuf); + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); #else - struct stat statbuf; - r = stat(infilename, &statbuf); + struct stat statbuf; + r = stat(infilename, &statbuf); #endif - if (r || !S_ISREG(statbuf.st_mode)) return 0; // No good... - return (U64)statbuf.st_size; + if (r || !S_ISREG(statbuf.st_mode)) return 0; // No good... + return (U64)statbuf.st_size; } @@ -274,156 +274,156 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel) // Loop for each file while (fileIdx largefilesize) benchedSize = (size_t)largefilesize; - if (benchedSize < largefilesize) - { - DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20)); - } + // Memory allocation & restrictions + largefilesize = BMK_GetFileSize(infilename); + benchedSize = (size_t) BMK_findMaxMem(largefilesize) / 2; + if ((U64)benchedSize > largefilesize) benchedSize = (size_t)largefilesize; + if (benchedSize < largefilesize) + { + DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", infilename, (int)(benchedSize>>20)); + } - // Alloc - chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); - orig_buff = malloc((size_t )benchedSize); - nbChunks = (int) (benchedSize / chunkSize) + 1; - maxCChunkSize = LZ4_compressBound(chunkSize); - compressed_buff_size = nbChunks * maxCChunkSize; - compressed_buff = malloc((size_t )compressed_buff_size); + // Alloc + chunkP = (struct chunkParameters*) malloc(((benchedSize / chunkSize)+1) * sizeof(struct chunkParameters)); + orig_buff = malloc((size_t )benchedSize); + nbChunks = (int) (benchedSize / chunkSize) + 1; + maxCChunkSize = LZ4_compressBound(chunkSize); + compressed_buff_size = nbChunks * maxCChunkSize; + compressed_buff = malloc((size_t )compressed_buff_size); - if(!orig_buff || !compressed_buff) - { - DISPLAY("\nError: not enough memory!\n"); - free(orig_buff); - free(compressed_buff); - fclose(fileIn); - return 12; - } + if(!orig_buff || !compressed_buff) + { + DISPLAY("\nError: not enough memory!\n"); + free(orig_buff); + free(compressed_buff); + fclose(fileIn); + return 12; + } - // Init chunks data - { - int i; - size_t remaining = benchedSize; - char* in = orig_buff; - char* out = compressed_buff; - for (i=0; i chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } - chunkP[i].compressedBuffer = out; out += maxCChunkSize; - chunkP[i].compressedSize = 0; - } - } + // Init chunks data + { + int i; + size_t remaining = benchedSize; + char* in = orig_buff; + char* out = compressed_buff; + for (i=0; i chunkSize) { chunkP[i].origSize = chunkSize; remaining -= chunkSize; } else { chunkP[i].origSize = (int)remaining; remaining = 0; } + chunkP[i].compressedBuffer = out; out += maxCChunkSize; + chunkP[i].compressedSize = 0; + } + } - // Fill input buffer - DISPLAY("Loading %s... \r", infilename); - readSize = fread(orig_buff, 1, benchedSize, fileIn); - fclose(fileIn); + // Fill input buffer + DISPLAY("Loading %s... \r", infilename); + readSize = fread(orig_buff, 1, benchedSize, fileIn); + fclose(fileIn); - if(readSize != benchedSize) - { - DISPLAY("\nError: problem reading file '%s' !! \n", infilename); - free(orig_buff); - free(compressed_buff); - return 13; - } + if(readSize != benchedSize) + { + DISPLAY("\nError: problem reading file '%s' !! \n", infilename); + free(orig_buff); + free(compressed_buff); + return 13; + } - // Calculating input Checksum - crcc = XXH32(orig_buff, (unsigned int)benchedSize,0); + // Calculating input Checksum + crcc = XXH32(orig_buff, (unsigned int)benchedSize,0); - // Bench - { - int loopNb, nb_loops, chunkNb; - size_t cSize=0; - int milliTime; - double fastestC = 100000000., fastestD = 100000000.; - double ratio=0.; + // Bench + { + int loopNb, nb_loops, chunkNb; + size_t cSize=0; + int milliTime; + double fastestC = 100000000., fastestD = 100000000.; + double ratio=0.; - DISPLAY("\r%79s\r", ""); - for (loopNb = 1; loopNb <= nbIterations; loopNb++) - { - // Compression - DISPLAY("%1i-%-14.14s : %9i ->\r", loopNb, infilename, (int)benchedSize); - { size_t i; for (i=0; i\r", loopNb, infilename, (int)benchedSize); + { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.); + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000.); - // Decompression - { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + if ((double)milliTime < fastestD*nb_loops) fastestD = (double)milliTime/nb_loops; + DISPLAY("%1i-%-14.14s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\r", loopNb, infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); - // CRC Checking - crcd = XXH32(orig_buff, (unsigned int)benchedSize,0); - if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; } - } + // CRC Checking + crcd = XXH32(orig_buff, (unsigned int)benchedSize,0); + if (crcc!=crcd) { DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", infilename, (unsigned)crcc, (unsigned)crcd); break; } + } - if (crcc==crcd) - { - if (ratio<100.) - DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); - else - DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); - } - totals += benchedSize; - totalz += cSize; - totalc += fastestC; - totald += fastestD; - } + if (crcc==crcd) + { + if (ratio<100.) + DISPLAY("%-16.16s : %9i -> %9i (%5.2f%%),%7.1f MB/s ,%7.1f MB/s\n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + else + DISPLAY("%-16.16s : %9i -> %9i (%5.1f%%),%7.1f MB/s ,%7.1f MB/s \n", infilename, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / fastestC / 1000., (double)benchedSize / fastestD / 1000.); + } + totals += benchedSize; + totalz += cSize; + totalc += fastestC; + totald += fastestD; + } - free(orig_buff); - free(compressed_buff); - free(chunkP); + free(orig_buff); + free(compressed_buff); + free(chunkP); } if (nbFiles > 1) - printf("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); + printf("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); if (BMK_pause) { printf("press enter...\n"); getchar(); } diff --git a/lz4.c b/lz4.c index 0cd962e..afbc6ef 100644 --- a/lz4.c +++ b/lz4.c @@ -46,9 +46,10 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" #define MEMORY_USAGE 14 // HEAPMODE : -// Select if compression algorithm will allocate space for its tables +// Select how default compression function will allocate memory for its hash table, // in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)). // Default allocation strategy is to use stack (HEAPMODE 0) +// Note : explicit functions *_stack* and *_heap* are unaffected by this setting #define HEAPMODE 0 // BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE : @@ -63,7 +64,9 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" // CPU Feature Detection //************************************** // 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) \ + || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) \ + || defined(__ia64__) ) // Detects 64 bits mode # define LZ4_ARCH64 1 #else # define LZ4_ARCH64 0 @@ -103,7 +106,7 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" //************************************** // Compiler Options //************************************** -#if __STDC_VERSION__ >= 199901L // C99 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 /* "restrict" is a known keyword */ #else # define restrict // Disable restrict @@ -150,30 +153,36 @@ Note : this source file requires "lz4_encoder.h" and "lz4_decoder.h" //************************************** // Basic Types //************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -# define BYTE unsigned __int8 -# define U16 unsigned __int16 -# define U32 unsigned __int32 -# define S32 __int32 -# define U64 unsigned __int64 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; #else -# include -# define BYTE uint8_t -# define U16 uint16_t -# define U32 uint32_t -# define S32 int32_t -# define U64 uint64_t + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; #endif -#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(push, 1) #endif -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; +typedef struct _U16_S { U16 v; } _PACKED U16_S; +typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U64_S { U64 v; } _PACKED U64_S; -#ifndef LZ4_FORCE_UNALIGNED_ACCESS +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) # pragma pack(pop) #endif @@ -241,8 +250,8 @@ typedef struct _U64_S { U64 v; } U64_S; //************************************** // Macros //************************************** -#define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d>8) > oend) return 0; // Check output limit + if unlikely(op + length + (2 + 1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit #endif -#ifdef _MSC_VER if (length>=(int)RUN_MASK) { int len = length-RUN_MASK; *token=(RUN_MASK<254) - { - do { *op++ = 255; len -= 255; } while (len>254); - *op++ = (BYTE)len; - memcpy(op, anchor, length); - op += length; - goto _next_match; - } - else + for(; len >= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } else *token = (BYTE)(length<=(int)RUN_MASK) - { - int len; - *token=(RUN_MASK< 254 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (length<>8) > oend) return 0; // Check output limit + if unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend) return 0; // Check output limit #endif if (length>=(int)ML_MASK) { *token += ML_MASK; length -= ML_MASK; for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } - if (length > 254) { length-=255; *op++ = 255; } + if (length >= 255) { length-=255; *op++ = 255; } *op++ = (BYTE)length; } else *token += (BYTE)length; @@ -241,7 +221,7 @@ _last_literals: #ifdef LIMITED_OUTPUT if (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize) return 0; // Check output limit #endif - if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun< build one automatically (for compression only) if (!output_filename) { - if (!decode) + if (!decode) // compression { int i=0, l=0; while (input_filename[l]!=0) l++; @@ -829,9 +829,15 @@ int main(int argc, char** argv) } else { - badusage(exename); - return 1; + int inl=0,outl; + while (input_filename[inl]!=0) inl++; + output_filename = (char*)calloc(1,inl+1); + for (outl=0;outl4) + while ((outl >= inl-4) && (input_filename[outl] == extension[outl-inl+4])) output_filename[outl--]=0; + if (outl != inl-5) output_filename = NULL; } + if (!output_filename) { badusage(exename); return 1; } } if (decode) return decodeFile(input_filename, output_filename); diff --git a/lz4hc.c b/lz4hc.c index f9f827a..385e1b2 100644 --- a/lz4hc.c +++ b/lz4hc.c @@ -36,11 +36,23 @@ Note : this source file requires "lz4hc_encoder.h" */ +//************************************** +// Memory routines +//************************************** +#include // calloc, free +#define ALLOCATOR(s) calloc(1,s) +#define FREEMEM free +#include // memset, memcpy +#define MEM_INIT memset + + //************************************** // CPU Feature Detection //************************************** // 32 or 64 bits ? -#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) ) // Detects 64 bits mode +#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || defined(__amd64) \ + || defined(__ppc64__) || defined(_WIN64) || defined(__LP64__) || defined(_LP64) \ + || defined(__ia64__) ) // Detects 64 bits mode # define LZ4_ARCH64 1 #else # define LZ4_ARCH64 0 @@ -80,7 +92,7 @@ Note : this source file requires "lz4hc_encoder.h" //************************************** // Compiler Options //************************************** -#if __STDC_VERSION__ >= 199901L // C99 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 /* "restrict" is a known keyword */ #else # define restrict // Disable restrict @@ -117,44 +129,44 @@ Note : this source file requires "lz4hc_encoder.h" //************************************** // Includes //************************************** -#include // calloc, free -#include // memset, memcpy #include "lz4hc.h" #include "lz4.h" -#define ALLOCATOR(s) calloc(1,s) -#define FREEMEM free -#define MEM_INIT memset - //************************************** // Basic Types //************************************** -#if defined(_MSC_VER) // Visual Studio does not support 'stdint' natively -#define BYTE unsigned __int8 -#define U16 unsigned __int16 -#define U32 unsigned __int32 -#define S32 __int32 -#define U64 unsigned __int64 +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; #else -#include -#define BYTE uint8_t -#define U16 uint16_t -#define U32 uint32_t -#define S32 int32_t -#define U64 uint64_t + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; #endif -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(push, 1) +#if defined(__GNUC__) && !defined(LZ4_FORCE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED #endif -typedef struct _U16_S { U16 v; } U16_S; -typedef struct _U32_S { U32 v; } U32_S; -typedef struct _U64_S { U64 v; } U64_S; +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(push, 1) +#endif -#ifndef LZ4_FORCE_UNALIGNED_ACCESS -#pragma pack(pop) +typedef struct _U16_S { U16 v; } _PACKED U16_S; +typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U64_S { U64 v; } _PACKED U64_S; + +#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) #endif #define A64(x) (((U64_S *)(x))->v) @@ -193,30 +205,30 @@ typedef struct _U64_S { U64 v; } U64_S; //************************************** // Architecture-specific macros //************************************** -#if LZ4_ARCH64 // 64-bit -#define STEPSIZE 8 -#define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; -#define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) -#define UARCH U64 -#define AARCH A64 -#define HTYPE U32 -#define INITBASE(b,s) const BYTE* const b = s -#else // 32-bit -#define STEPSIZE 4 -#define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; -#define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); -#define UARCH U32 -#define AARCH A32 -#define HTYPE const BYTE* -#define INITBASE(b,s) const int b = 0 +#if LZ4_ARCH64 // 64-bit +# define STEPSIZE 8 +# define LZ4_COPYSTEP(s,d) A64(d) = A64(s); d+=8; s+=8; +# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d) +# define UARCH U64 +# define AARCH A64 +# define HTYPE U32 +# define INITBASE(b,s) const BYTE* const b = s +#else // 32-bit +# define STEPSIZE 4 +# define LZ4_COPYSTEP(s,d) A32(d) = A32(s); d+=4; s+=4; +# define LZ4_COPYPACKET(s,d) LZ4_COPYSTEP(s,d); LZ4_COPYSTEP(s,d); +# define UARCH U32 +# define AARCH A32 +# define HTYPE const BYTE* +# define INITBASE(b,s) const int b = 0 #endif #if defined(LZ4_BIG_ENDIAN) -#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } -#define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } -#else // Little Endian -#define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } -#define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } +# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } +# define LZ4_WRITE_LITTLEENDIAN_16(p,i) { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; } +#else // Little Endian +# define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); } +# define LZ4_WRITE_LITTLEENDIAN_16(p,v) { A16(p) = v; p+=2; } #endif @@ -237,11 +249,11 @@ typedef struct //************************************** #define LZ4_WILDCOPY(s,d,e) do { LZ4_COPYPACKET(s,d) } while (d> ((MINMATCH*8)-HASH_LOG)) -#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) -#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) -#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] -#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) +#define HASH_VALUE(p) HASH_FUNCTION(A32(p)) +#define HASH_POINTER(p) (HashTable[HASH_VALUE(p)] + base) +#define DELTANEXT(p) chainTable[(size_t)(p) & MAXD_MASK] +#define GETNEXT(p) ((p) - (size_t)DELTANEXT(p)) //************************************** @@ -252,30 +264,30 @@ typedef struct inline static int LZ4_NbCommonBytes (register U64 val) { #if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanReverse64( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clzll(val) >> 3); - #else +# else int r; if (!(val>>32)) { r=4; } else { r=0; val>>=32; } if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } r += (!val); return r; - #endif +# endif #else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; _BitScanForward64( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctzll(val) >> 3); - #else +# else static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >> 58]; - #endif +# endif #endif } @@ -284,29 +296,29 @@ inline static int LZ4_NbCommonBytes (register U64 val) inline static int LZ4_NbCommonBytes (register U32 val) { #if defined(LZ4_BIG_ENDIAN) - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; _BitScanReverse( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_clz(val) >> 3); - #else +# else int r; if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } r += (!val); return r; - #endif +# endif #else - #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; _BitScanForward( &r, val ); return (int)(r>>3); - #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) +# elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT) return (__builtin_ctz(val) >> 3); - #else +# else static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; - #endif +# endif #endif } diff --git a/lz4hc_encoder.h b/lz4hc_encoder.h index 7efba9c..0d10e7c 100644 --- a/lz4hc_encoder.h +++ b/lz4hc_encoder.h @@ -332,6 +332,7 @@ int FUNCTION_NAME (const char* source, // Locally Generated #undef ENCODE_SEQUENCE +#undef ENCODE_SEQUENCE_NAME // Optional defines #ifdef LIMITED_OUTPUT diff --git a/xxhash.c b/xxhash.c index 32b6994..3c5f560 100644 --- a/xxhash.c +++ b/xxhash.c @@ -1,33 +1,33 @@ /* - xxHash - Fast Hash algorithm - Copyright (C) 2012-2013, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) +xxHash - Fast Hash algorithm +Copyright (C) 2012-2013, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ +You can contact the author at : +- xxHash source repository : http://code.google.com/p/xxhash/ */ @@ -35,14 +35,21 @@ //************************************** // Tuning parameters //************************************** -// FORCE_NATIVE_FORMAT : -// By default, xxHash library provides endian-independant Hash values. -// Results are therefore identical for big-endian and little-endian CPU. +// XXH_ACCEPT_NULL_INPUT_POINTER : +// If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input. +// If this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// This option has a very small performance cost (only measurable on small inputs). +// By default, this option is disabled. To enable it, uncomment below define : +//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 + +// XXH_FORCE_NATIVE_FORMAT : +// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. +// Results are therefore identical for little-endian and big-endian CPU. // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. // Should endian-independance be of no importance to your application, you may uncomment the #define below // It will improve speed for Big-endian CPU. // This option has no impact on Little_Endian CPU. -//#define FORCE_NATIVE_FORMAT 1 +//#define XXH_FORCE_NATIVE_FORMAT 1 @@ -60,7 +67,7 @@ //************************************** // Little Endian or Big Endian ? // You can overwrite the #define below if you know your architecture endianess -#if defined(FORCE_NATIVE_FORMAT) && (FORCE_NATIVE_FORMAT==1) +#if defined(XXH_FORCE_NATIVE_FORMAT) && (XXH_FORCE_NATIVE_FORMAT==1) // Force native format. The result will be endian dependant. # define XXH_BIG_ENDIAN 0 #elif defined (__GLIBC__) @@ -71,9 +78,9 @@ #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) # define XXH_BIG_ENDIAN 1 #elif defined(__sparc) || defined(__sparc__) \ - || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ - || defined(__hpux) || defined(__hppa) \ - || defined(_MIPSEB) || defined(__s390__) + || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ + || defined(__hpux) || defined(__hppa) \ + || defined(_MIPSEB) || defined(__s390__) # define XXH_BIG_ENDIAN 1 #endif @@ -83,6 +90,24 @@ #endif +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + //************************************** // Compiler-specific Options & Functions @@ -101,16 +126,14 @@ #elif GCC_VERSION >= 403 # define XXH_swap32 __builtin_bswap32 #else -static inline unsigned int XXH_swap32 (unsigned int x) { - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); - } +static inline U32 XXH_swap32 (U32 x) { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} #endif - //************************************** // Constants //************************************** @@ -121,11 +144,10 @@ static inline unsigned int XXH_swap32 (unsigned int x) { #define PRIME32_5 374761393U - //************************************** // Macros //************************************** -#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(unsigned int*)(p)) : *(unsigned int*)(p)) +#define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p)) @@ -133,65 +155,69 @@ static inline unsigned int XXH_swap32 (unsigned int x) { // Simple Hash Functions //**************************** -unsigned int XXH32(const void* input, int len, unsigned int seed) +U32 XXH32(const void* input, int len, U32 seed) { #if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); #else - const unsigned char* p = (const unsigned char*)input; - const unsigned char* const bEnd = p + len; - unsigned int h32; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U32 h32; - if (len>=16) - { - const unsigned char* const limit = bEnd - 16; - unsigned int v1 = seed + PRIME32_1 + PRIME32_2; - unsigned int v2 = seed + PRIME32_2; - unsigned int v3 = seed + 0; - unsigned int v4 = seed - PRIME32_1; +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; p=(const BYTE*)16; } +#endif - do - { - v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit) ; + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } - else - { - h32 = seed + PRIME32_5; - } + do + { + v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); - h32 += (unsigned int) len; - - while (p<=bEnd-4) - { - h32 += XXH_LE32(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; + while (p<=bEnd-4) + { + h32 += XXH_LE32(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } - return h32; + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; #endif } @@ -203,142 +229,155 @@ unsigned int XXH32(const void* input, int len, unsigned int seed) struct XXH_state32_t { - unsigned int seed; - unsigned int v1; - unsigned int v2; - unsigned int v3; - unsigned int v4; - unsigned long long total_len; - char memory[16]; - int memsize; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + U64 total_len; + char memory[16]; + int memsize; }; -void* XXH32_init (unsigned int seed) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) malloc ( sizeof(struct XXH_state32_t)); - state->seed = seed; - state->v1 = seed + PRIME32_1 + PRIME32_2; - state->v2 = seed + PRIME32_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME32_1; - state->total_len = 0; - state->memsize = 0; +int XXH32_sizeofState() { return sizeof(struct XXH_state32_t); } - return (void*)state; + +XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return OK; +} + + +void* XXH32_init (U32 seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) malloc (sizeof(struct XXH_state32_t)); + XXH32_resetState(state, seed); + return (void*)state; } XXH_errorcode XXH32_update (void* state_in, const void* input, int len) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const unsigned char* p = (const unsigned char*)input; - const unsigned char* const bEnd = p + len; + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif - if (input==NULL) return XXH_ERROR; - state->total_len += len; - - if (state->memsize + len < 16) // fill in tmp buffer - { - memcpy(state->memory + state->memsize, input, len); - state->memsize += len; - return OK; - } - if (state->memsize) // some data left from previous update - { - memcpy(state->memory + state->memsize, input, 16-state->memsize); - { - const unsigned int* p32 = (const unsigned int*)state->memory; - state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } + if (state->memsize + len < 16) // fill in tmp buffer + { + memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return OK; + } - { - const unsigned char* const limit = bEnd - 16; - unsigned int v1 = state->v1; - unsigned int v2 = state->v2; - unsigned int v3 = state->v3; - unsigned int v4 = state->v4; + if (state->memsize) // some data left from previous update + { + memcpy(state->memory + state->memsize, input, 16-state->memsize); + { + const U32* p32 = (const U32*)state->memory; + state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } - while (p<=limit) - { - v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } + do + { + v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); - if (p < bEnd) - { - memcpy(state->memory, p, bEnd-p); - state->memsize = (int)(bEnd-p); - } + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } - return OK; + if (p < bEnd) + { + memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return OK; } -unsigned int XXH32_intermediateDigest (void* state_in) +U32 XXH32_intermediateDigest (void* state_in) { - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - unsigned char * p = (unsigned char*)state->memory; - unsigned char* bEnd = (unsigned char*)state->memory + state->memsize; - unsigned int h32; + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + BYTE * p = (BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U32 h32; - if (state->total_len >= 16) - { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } - else - { - h32 = state->seed + PRIME32_5; - } + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } - h32 += (unsigned int) state->total_len; - - while (p<=bEnd-4) - { - h32 += XXH_LE32(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } + h32 += (U32) state->total_len; - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; } -unsigned int XXH32_digest (void* state_in) +U32 XXH32_digest (void* state_in) { - unsigned int h32 = XXH32_intermediateDigest(state_in); + U32 h32 = XXH32_intermediateDigest(state_in); - free(state_in); + free(state_in); - return h32; + return h32; } diff --git a/xxhash.h b/xxhash.h index c525e0b..afdf243 100644 --- a/xxhash.h +++ b/xxhash.h @@ -121,6 +121,18 @@ Memory will be freed by XXH32_digest(). */ +int XXH32_sizeofState(); +XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed); +/* +These functions are the basic elements of XXH32_init(); +The objective is to allow user application to make its own allocation. + +XXH32_sizeofState() is used to know how much space must be allocated by the application. +This space must be referenced by a void* pointer. +This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state. +*/ + + unsigned int XXH32_intermediateDigest (void* state); /* This function does the same as XXH32_digest(), generating a 32-bit hash,