diff --git a/Makefile b/Makefile index c931d1e..f9775db 100644 --- a/Makefile +++ b/Makefile @@ -31,16 +31,17 @@ # ################################################################ # Version numbers -export RELEASE=r119 +VERSION=120 +export RELEASE=r$(VERSION) LIBVER_MAJOR=1 -LIBVER_MINOR=2 +LIBVER_MINOR=3 LIBVER_PATCH=0 LIBVER=$(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH) DESTDIR= PREFIX = /usr CC := $(CC) -CFLAGS+= -I. -std=c99 -O3 -Wall -W -Wundef -DLZ4_VERSION=\"$(RELEASE)\" +CFLAGS+= -I. -std=c99 -O3 -Wall -Wextra -Wundef -Wshadow -Wstrict-prototypes -DLZ4_VERSION=\"$(RELEASE)\" LIBDIR?= $(PREFIX)/lib INCLUDEDIR=$(PREFIX)/include @@ -58,10 +59,10 @@ endif # OS X linker doesn't support -soname, and use different extension # see : https://developer.apple.com/library/mac/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html ifeq ($(shell uname), Darwin) - SONAME_FLAGS = SHARED_EXT = dylib SHARED_EXT_MAJOR = $(LIBVER_MAJOR).$(SHARED_EXT) SHARED_EXT_VER = $(LIBVER).$(SHARED_EXT) + SONAME_FLAGS = -install_name $(PREFIX)/lib/liblz4.$(SHARED_EXT_MAJOR) -compatibility_version $(LIBVER_MAJOR) -current_version $(LIBVER) else SONAME_FLAGS = -Wl,-soname=liblz4.$(SHARED_EXT).$(LIBVER_MAJOR) SHARED_EXT = so @@ -101,19 +102,29 @@ liblz4: lz4.c lz4hc.c @ln -sf $@.$(SHARED_EXT_VER) $@.$(SHARED_EXT) clean: - @rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* $(DISTRIBNAME) *.sha1 + @rm -f core *.o *.a *.$(SHARED_EXT) *.$(SHARED_EXT).* $(DISTRIBNAME) *.sha1 liblz4.pc @cd $(PRGDIR); $(MAKE) clean @echo Cleaning completed +#------------------------------------------------------------------------ #make install option is designed for Linux & OSX targets only + ifneq (,$(filter $(shell uname),Linux Darwin)) -install: liblz4 - @install -d -m 755 $(DESTDIR)$(LIBDIR)/ $(DESTDIR)$(INCLUDEDIR)/ +liblz4.pc: liblz4.pc.in Makefile + sed -e 's|@PREFIX@|$(PREFIX)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + $< >$@ + +install: liblz4 liblz4.pc + @install -d -m 755 $(DESTDIR)$(LIBDIR)/pkgconfig/ $(DESTDIR)$(INCLUDEDIR)/ @install -m 755 liblz4.$(SHARED_EXT_VER) $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) @cp -a liblz4.$(SHARED_EXT_MAJOR) $(DESTDIR)$(LIBDIR) @cp -a liblz4.$(SHARED_EXT) $(DESTDIR)$(LIBDIR) + @cp -a liblz4.pc $(DESTDIR)$(LIBDIR)/pkgconfig/ @install -m 644 liblz4.a $(DESTDIR)$(LIBDIR)/liblz4.a @install -m 644 lz4.h $(DESTDIR)$(INCLUDEDIR)/lz4.h @install -m 644 lz4hc.h $(DESTDIR)$(INCLUDEDIR)/lz4hc.h @@ -123,6 +134,7 @@ install: liblz4 uninstall: rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT) rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_MAJOR) + rm -f $(DESTDIR)$(LIBDIR)/pkgconfig/liblz4.pc [ -x $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.$(SHARED_EXT_VER) [ -f $(DESTDIR)$(LIBDIR)/liblz4.a ] && rm -f $(DESTDIR)$(LIBDIR)/liblz4.a [ -f $(DESTDIR)$(INCLUDEDIR)/lz4.h ] && rm -f $(DESTDIR)$(INCLUDEDIR)/lz4.h diff --git a/NEWS b/NEWS index ff2cd53..758a411 100644 --- a/NEWS +++ b/NEWS @@ -1,12 +1,17 @@ +r120: +Fix : OS-X : library install name, thanks to Clemens Lang +Updated : Makefile : stricter compilation flags +Updated : xxHash to r35 + r119: -Fix : overflow address, 32-bits mode (issue 134) +Fix : Issue 134 : extended malicious address space overflow in 32-bits mode for some specific configurations r118: New : LZ4 Streaming API (Fast version), special thanks to Takayuki Matsuoka New : datagen : parametrable synthetic data generator for tests Improved : fuzzer, support more test cases, more parameters, ability to jump to specific test fix : support ppc64le platform (issue 131) -fix : Issue 52 (malicious address space overflow in 32-bits mode when using custom format) +fix : Issue 52 (malicious address space overflow in 32-bits mode when using large custom format) fix : Makefile : minor issue 130 : header files permissions r117: diff --git a/liblz4.pc.in b/liblz4.pc.in new file mode 100644 index 0000000..0d05152 --- /dev/null +++ b/liblz4.pc.in @@ -0,0 +1,14 @@ +# LZ4 - Fast LZ compression algorithm +# Copyright (C) 2011-2014, Yann Collet. +# BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: lz4 +Description: fast lossless compression algorithm library +URL: http://code.google.com/p/lz4/ +Version: @VERSION@ +Libs: -L@LIBDIR@ -llz4 +Cflags: -I@INCLUDEDIR@ diff --git a/lz4.c b/lz4.c index 482a8ed..2484fc6 100644 --- a/lz4.c +++ b/lz4.c @@ -55,6 +55,8 @@ #else # define LZ4_ARCH64 0 #endif +#define LZ4_32BITS (sizeof(void*)==4) +#define LZ4_64BITS (sizeof(void*)==8) /* * Little Endian or Big Endian ? @@ -298,7 +300,7 @@ typedef enum { full = 0, partial = 1 } earlyEnd_directive; ****************************/ #if LZ4_ARCH64 -int LZ4_NbCommonBytes (register U64 val) +static int LZ4_NbCommonBytes (register U64 val) { # if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -330,7 +332,7 @@ int LZ4_NbCommonBytes (register U64 val) #else -int LZ4_NbCommonBytes (register U32 val) +static int LZ4_NbCommonBytes (register U32 val) { # if defined(LZ4_BIG_ENDIAN) # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) @@ -417,7 +419,7 @@ static unsigned LZ4_count(const BYTE* pIn, const BYTE* pRef, const BYTE* pInLimi pIn += LZ4_NbCommonBytes(diff); return (unsigned)(pIn - pStart); } - if (sizeof(void*)==8) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; } + if (LZ4_64BITS) if ((pIn<(pInLimit-3)) && (A32(pRef) == A32(pIn))) { pIn+=4; pRef+=4; } if ((pIn<(pInLimit-1)) && (A16(pRef) == A16(pIn))) { pIn+=2; pRef+=2; } if ((pIn> skipStrength; - //if (step>8) step=8; // required for valid forwardIp ; slows down uncompressible data a bit if (unlikely(forwardIp > mflimit)) goto _last_literals; @@ -651,7 +652,7 @@ int LZ4_compress(const char* source, char* dest, int inputSize) if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -671,7 +672,7 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in if (inputSize < (int)LZ4_64KLIMIT) result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); + result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); #if (HEAPMODE) FREEMEM(ctx); @@ -684,29 +685,39 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in Experimental : Streaming functions *****************************************/ -void* LZ4_createStream() +/* + * LZ4_initStream + * Use this function once, to init a newly allocated LZ4_stream_t structure + * Return : 1 if OK, 0 if error + */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { - void* lz4s = ALLOCATOR(4, LZ4_STREAMSIZE_U32); - MEM_INIT(lz4s, 0, LZ4_STREAMSIZE); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); +} + +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* lz4s = (LZ4_stream_t*)ALLOCATOR(4, LZ4_STREAMSIZE_U32); + LZ4_resetStream(lz4s); return lz4s; } -int LZ4_free (void* LZ4_stream) +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) { FREEMEM(LZ4_stream); return (0); } -int LZ4_loadDict (void* LZ4_dict, const char* dictionary, int dictSize) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) { LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; const BYTE* p = (const BYTE*)dictionary; const BYTE* const dictEnd = p + dictSize; const BYTE* base; - LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ - if (dict->initCheck) MEM_INIT(dict, 0, sizeof(LZ4_stream_t_internal)); /* Uninitialized structure detected */ + LZ4_STATIC_ASSERT(LZ4_STREAMSIZE >= sizeof(LZ4_stream_t_internal)); /* A compilation error here means LZ4_STREAMSIZE is not large enough */ + if (dict->initCheck) LZ4_resetStream(LZ4_dict); /* Uninitialized structure detected */ if (dictSize < MINMATCH) { @@ -731,7 +742,7 @@ int LZ4_loadDict (void* LZ4_dict, const char* dictionary, int dictSize) } -void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, const BYTE* src) { if ((LZ4_dict->currentOffset > 0x80000000) || ((size_t)LZ4_dict->currentOffset > (size_t)src)) /* address space overflow */ @@ -802,19 +813,18 @@ FORCE_INLINE int LZ4_compress_continue_generic (void* LZ4_stream, const char* so } } - -int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize) +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) { return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, 0, notLimited); } -int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize) +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize) { return LZ4_compress_continue_generic(LZ4_stream, source, dest, inputSize, maxOutputSize, limitedOutput); } -// Hidden debug function, to force separate dictionary mode +/* Hidden debug function, to force separate dictionary mode */ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int inputSize) { LZ4_stream_t_internal* streamPtr = (LZ4_stream_t_internal*)LZ4_dict; @@ -835,7 +845,7 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* } -int LZ4_saveDict (void* LZ4_dict, char* safeBuffer, int dictSize) +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) { LZ4_stream_t_internal* dict = (LZ4_stream_t_internal*) LZ4_dict; const BYTE* previousDictEnd = dict->dictionary + dict->dictSize; @@ -888,20 +898,16 @@ FORCE_INLINE int LZ4_decompress_generic( const BYTE* const lowLimit = (const BYTE*)dest - dictSize; const BYTE* const dictEnd = (const BYTE*)dictStart + dictSize; -//#define OLD -#ifdef OLD - const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ -#else - const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* static reduces speed for LZ4_decompress_safe() on GCC64 */ -#endif + const size_t dec32table[] = {4-0, 4-3, 4-2, 4-3, 4-0, 4-0, 4-0, 4-0}; /* note : static reduces speed for LZ4_decompress_safe() on GCC64 */ static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; - const int checkOffset = (endOnInput) && (dictSize < (int)(64 KB)); + const int safeDecode = (endOnInput==endOnInputSize); + const int checkOffset = ((safeDecode) && (dictSize < (int)(64 KB))); /* Special cases */ - if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ - if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ + if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ + if ((endOnInput) && (unlikely(outputSize==0))) return ((inputSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */ if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0?1:-1); @@ -922,9 +928,8 @@ FORCE_INLINE int LZ4_decompress_generic( length += s; } while (likely((endOnInput)?ipLZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */ - if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* quickfix issue 134 */ - if ((endOnInput) && (sizeof(void*)==4) && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* quickfix issue 134 */ + if ((safeDecode) && LZ4_32BITS && unlikely((size_t)(op+length)<(size_t)(op))) goto _output_error; /* overflow detection */ + if ((safeDecode) && LZ4_32BITS && unlikely((size_t)(ip+length)<(size_t)(ip))) goto _output_error; /* overflow detection */ } /* copy literals */ @@ -945,7 +950,7 @@ FORCE_INLINE int LZ4_decompress_generic( memcpy(op, ip, length); ip += length; op += length; - break; /* Necessarily EOF, due to parsing restrictions */ + break; /* Necessarily EOF, due to parsing restrictions */ } LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy; @@ -963,8 +968,7 @@ FORCE_INLINE int LZ4_decompress_generic( s = *ip++; length += s; } while (s==255); - //if ((sizeof(void*)==4) && unlikely(length>LZ4_MAX_INPUT_SIZE)) goto _output_error; /* overflow detection */ - if ((sizeof(void*)==4) && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* quickfix issue 134 */ + if ((safeDecode) && LZ4_32BITS && unlikely((size_t)(op+length)<(size_t)op)) goto _output_error; /* overflow detection */ } /* check external dictionary */ @@ -986,9 +990,9 @@ FORCE_INLINE int LZ4_decompress_generic( copySize = length+MINMATCH - copySize; if (copySize > (size_t)((char*)op-dest)) /* overlap */ { - BYTE* const cpy = op + copySize; - const BYTE* ref = (BYTE*)dest; - while (op < cpy) *op++ = *ref++; + BYTE* const endOfMatch = op + copySize; + const BYTE* copyFrom = (BYTE*)dest; + while (op < endOfMatch) *op++ = *copyFrom++; } else { @@ -1002,20 +1006,14 @@ FORCE_INLINE int LZ4_decompress_generic( /* copy repeated sequence */ if (unlikely((op-ref)<(int)STEPSIZE)) { - const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref]; + const size_t dec64 = dec64table[LZ4_32BITS ? 0 : op-ref]; op[0] = ref[0]; op[1] = ref[1]; op[2] = ref[2]; op[3] = ref[3]; -#ifdef OLD - op += 4, ref += 4; ref -= dec32table[op-ref]; - A32(op) = A32(ref); - op += STEPSIZE-4; ref -= dec64; -#else ref += dec32table[op-ref]; A32(op+4) = A32(ref); op += STEPSIZE; ref -= dec64; -#endif } else { LZ4_COPYSTEP(op,ref); } cpy = op + length - (STEPSIZE-4); @@ -1043,26 +1041,23 @@ _output_error: } -int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxOutputSize) +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, full, 0, noDict, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, full, 0, noDict, NULL, 0); } -int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize) +int LZ4_decompress_safe_partial(const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, endOnInputSize, partial, targetOutputSize, noDict, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, endOnInputSize, partial, targetOutputSize, noDict, NULL, 0); } int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { - return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 0); + return LZ4_decompress_generic(source, dest, 0, originalSize, endOnOutputSize, full, 0, withPrefix64k, NULL, 64 KB); } /* streaming decompression functions */ -//#define LZ4_STREAMDECODESIZE_U32 4 -//#define LZ4_STREAMDECODESIZE (LZ4_STREAMDECODESIZE_U32 * sizeof(unsigned int)) -//typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t; typedef struct { const char* dictionary; @@ -1074,21 +1069,27 @@ typedef struct * LZ4_createStreamDecode() * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. */ -void* LZ4_createStreamDecode() +LZ4_streamDecode_t* LZ4_createStreamDecode(void) { - void* lz4s = ALLOCATOR(sizeof(U32), LZ4_STREAMDECODESIZE_U32); + LZ4_streamDecode_t* lz4s = (LZ4_streamDecode_t*) ALLOCATOR(sizeof(U32), LZ4_STREAMDECODESIZE_U32); MEM_INIT(lz4s, 0, LZ4_STREAMDECODESIZE); return lz4s; } +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + FREEMEM(LZ4_stream); + return 0; +} + /* - * LZ4_setDictDecode + * LZ4_setStreamDecode * Use this function to instruct where to find the dictionary * This function is not necessary if previous data is still available where it was decoded. * Loading a size of 0 is allowed (same effect as no dictionary). * Return : 1 if OK, 0 if error */ -int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictSize) +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) { LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; lz4sd->dictionary = dictionary; @@ -1103,7 +1104,7 @@ int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictS If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setDictDecode() */ -int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; int result; @@ -1123,7 +1124,7 @@ int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, ch return result; } -int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize) +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = (LZ4_streamDecode_t_internal*) LZ4_streamDecode; int result; @@ -1179,7 +1180,7 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } -void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base) +static void LZ4_init(LZ4_stream_t_internal* lz4ds, const BYTE* base) { MEM_INIT(lz4ds, 0, LZ4_STREAMSIZE); lz4ds->bufferStart = base; @@ -1220,7 +1221,7 @@ int LZ4_compress_withState (void* state, const char* source, char* dest, int inp if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); } int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -1231,7 +1232,7 @@ int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* if (inputSize < (int)LZ4_64KLIMIT) return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue); else - return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, (sizeof(void*)==8) ? byU32 : byPtr, noDict, noDictIssue); + return LZ4_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ4_64BITS ? byU32 : byPtr, noDict, noDictIssue); } /* Obsolete streaming decompression functions */ diff --git a/lz4.h b/lz4.h index 1064fa1..2e90f04 100644 --- a/lz4.h +++ b/lz4.h @@ -64,7 +64,7 @@ extern "C" { **************************************/ int LZ4_compress (const char* source, char* dest, int inputSize); -int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxOutputSize); +int LZ4_decompress_safe (const char* source, char* dest, int compressedSize, int maxDecompressedSize); /* LZ4_compress() : @@ -72,14 +72,14 @@ LZ4_compress() : Destination buffer must be already allocated, and must be sized to handle worst cases situations (input data not compressible) Worst case size evaluation is provided by function LZ4_compressBound() - inputSize : Max supported value is LZ4_MAX_INPUT_VALUE + inputSize : Max supported value is LZ4_MAX_INPUT_SIZE return : the number of bytes written in buffer dest or 0 if the compression fails LZ4_decompress_safe() : compressedSize : is obviously the source size - maxOutputSize : is the size of the destination buffer, which must be already allocated. - return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + maxDecompressedSize : is the size of the destination buffer, which must be already allocated. + return : the number of bytes decompressed into the destination buffer (necessarily <= maxDecompressedSize) If the destination buffer is not large enough, decoding will stop and output an error code (<0). If the source stream is detected malformed, the function will stop decoding and return a negative result. This function is protected against buffer overflow exploits : @@ -128,16 +128,27 @@ LZ4_compress_limitedOutput() : int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); +/* +LZ4_compress_withState() : + Same compression functions, but using an externally allocated memory space to store compression state. + Use LZ4_sizeofState() to know how much memory must be allocated, + and then, provide it as 'void* state' to compression functions. +*/ +int LZ4_sizeofState(void); +int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); + + /* LZ4_decompress_fast() : originalSize : is the original and therefore uncompressed size return : the number of bytes read from the source buffer (in other words, the compressed size) - If the source stream is malformed, the function will stop decoding and return a negative result. + If the source stream is detected malformed, the function will stop decoding and return a negative result. Destination buffer must be already allocated. Its size must be a minimum of 'originalSize' bytes. - note : This function is a bit faster than LZ4_decompress_safe() - It provides fast decompression and fully respect memory boundaries for properly formed compressed data. - It does not provide full protection against intentionnally modified data stream. - Use this function in a trusted environment (data to decode comes from a trusted source). + note : This function fully respect memory boundaries for properly formed compressed data. + It is a bit faster than LZ4_decompress_safe(). + However, it does not provide any protection against intentionnally modified data stream (malicious input). + Use this function in trusted environment only (data to decode comes from a trusted source). */ int LZ4_decompress_fast (const char* source, char* dest, int originalSize); @@ -145,16 +156,16 @@ int LZ4_decompress_fast (const char* source, char* dest, int originalSize); /* LZ4_decompress_safe_partial() : This function decompress a compressed block of size 'compressedSize' at position 'source' - into output buffer 'dest' of size 'maxOutputSize'. + into destination buffer 'dest' of size 'maxDecompressedSize'. The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached, reducing decompression time. - return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize) + return : the number of bytes decoded in the destination buffer (necessarily <= maxDecompressedSize) Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller. Always control how many bytes were decoded. If the source stream is detected malformed, the function will stop decoding and return a negative result. This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets */ -int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxOutputSize); +int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedSize, int targetOutputSize, int maxDecompressedSize); /*********************************************** @@ -166,53 +177,58 @@ int LZ4_decompress_safe_partial (const char* source, char* dest, int compressedS /* * LZ4_stream_t * information structure to track an LZ4 stream. - * important : set this structure content to zero before first use ! + * important : init this structure content before first use ! */ typedef struct { unsigned int table[LZ4_STREAMSIZE_U32]; } LZ4_stream_t; /* - * If you prefer dynamic allocation methods, - * LZ4_createStream - * provides a pointer (void*) towards an initialized LZ4_stream_t structure. - * LZ4_free just frees it. + * LZ4_resetStream + * Use this function to init a newly allocated LZ4_stream_t structure + * You can also reset an existing LZ4_stream_t structure */ -void* LZ4_createStream(); -int LZ4_free (void* LZ4_stream); +void LZ4_resetStream (LZ4_stream_t* LZ4_stream); +/* + * If you prefer dynamic allocation methods, + * LZ4_createStream will allocate and initialize an LZ4_stream_t structure + * LZ4_freeStream releases its memory. + */ +LZ4_stream_t* LZ4_createStream(void); +int LZ4_freeStream (LZ4_stream_t* LZ4_stream); /* * LZ4_loadDict * Use this function to load a static dictionary into LZ4_stream. * Any previous data will be forgotten, only 'dictionary' will remain in memory. - * Loading a size of 0 is allowed (same effect as init). + * Loading a size of 0 is allowed. * Return : 1 if OK, 0 if error */ -int LZ4_loadDict (void* LZ4_stream, const char* dictionary, int dictSize); +int LZ4_loadDict (LZ4_stream_t* LZ4_stream, const char* dictionary, int dictSize); /* * LZ4_compress_continue * Compress data block 'source', using blocks compressed before as dictionary to improve compression ratio * Previous data blocks are assumed to still be present at their previous location. */ -int LZ4_compress_continue (void* LZ4_stream, const char* source, char* dest, int inputSize); +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); /* * LZ4_compress_limitedOutput_continue * Same as before, but also specify a maximum target compressed size (maxOutputSize) * If objective cannot be met, compression exits, and returns a zero. */ -int LZ4_compress_limitedOutput_continue (void* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize); +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize, int maxOutputSize); /* * LZ4_saveDict - * If previously compressed data block is not guaranteed to remain at its previous memory location + * If previously compressed data block is not guaranteed to remain available at its memory location * save it into a safe place (char* safeBuffer) * Note : you don't need to call LZ4_loadDict() afterwards, * dictionary is immediately usable, you can therefore call again LZ4_compress_continue() * Return : 1 if OK, 0 if error * Note : any dictSize > 64 KB will be interpreted as 64KB. */ -int LZ4_saveDict (void* LZ4_stream, char* safeBuffer, int dictSize); +int LZ4_saveDict (LZ4_stream_t* LZ4_stream, char* safeBuffer, int dictSize); /************************************************ @@ -224,38 +240,37 @@ int LZ4_saveDict (void* LZ4_stream, char* safeBuffer, int dictSize); /* * LZ4_streamDecode_t * information structure to track an LZ4 stream. - * important : set this structure content to zero before first use ! + * important : init this structure content using LZ4_setStreamDecode or memset() before first use ! */ typedef struct { unsigned int table[LZ4_STREAMDECODESIZE_U32]; } LZ4_streamDecode_t; /* - * If you prefer dynamic allocation methods, - * LZ4_createStreamDecode() - * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. - * LZ4_free just frees it. - */ -void* LZ4_createStreamDecode(); -int LZ4_free (void* LZ4_stream); /* yes, it's the same one as for compression */ - -/* -*_continue() : - These decoding functions allow decompression of multiple blocks in "streaming" mode. - Previously decoded blocks must still be available at the memory position where they were decoded. - If it's not possible, save the relevant part of decoded data into a safe buffer, - and indicate where it stands using LZ4_setDictDecode() -*/ -int LZ4_decompress_safe_continue (void* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize); -int LZ4_decompress_fast_continue (void* LZ4_streamDecode, const char* source, char* dest, int originalSize); - -/* - * LZ4_setDictDecode + * LZ4_setStreamDecode * Use this function to instruct where to find the dictionary. * This function can be used to specify a static dictionary, * or to instruct where to find some previously decoded data saved into a different memory space. * Setting a size of 0 is allowed (same effect as no dictionary). * Return : 1 if OK, 0 if error */ -int LZ4_setDictDecode (void* LZ4_streamDecode, const char* dictionary, int dictSize); +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/* + * If you prefer dynamic allocation methods, + * LZ4_createStreamDecode will allocate and initialize an LZ4_streamDecode_t structure + * LZ4_freeStreamDecode releases its memory. + */ +LZ4_streamDecode_t* LZ4_createStreamDecode(void); +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where its new address using LZ4_setDictDecode() +*/ +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize); +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize); /* @@ -271,7 +286,6 @@ int LZ4_decompress_fast_usingDict (const char* source, char* dest, int originalS - /************************************** Obsolete Functions **************************************/ @@ -281,14 +295,19 @@ These function names are deprecated and should no longer be used. They are only provided here for compatibility with older user programs. - LZ4_uncompress is the same as LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is the same as LZ4_decompress_safe -*/ -int LZ4_uncompress (const char* source, char* dest, int outputSize); -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); +These function prototypes are now disabled; uncomment them if you really need them. +It is highly recommended to stop using these functions and migrated to newer ones */ +/* int LZ4_uncompress (const char* source, char* dest, int outputSize); */ +/* int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); */ -/* Obsolete functions for externally allocated state; use streaming interface instead */ -int LZ4_sizeofState(void); -int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); -int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +/* + * If you prefer dynamic allocation methods, + * LZ4_createStreamDecode() + * provides a pointer (void*) towards an initialized LZ4_streamDecode_t structure. + * LZ4_free just frees it. + */ +/* void* LZ4_createStreamDecode(void); */ +/*int LZ4_free (void* LZ4_stream); yes, it's the same one as for compression */ /* Obsolete streaming functions; use new streaming interface whenever possible */ void* LZ4_create (const char* inputBuffer); diff --git a/programs/Makefile b/programs/Makefile index a3e01a4..5127c5d 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -1,4 +1,4 @@ -# ################################################################ +# ########################################################################## # LZ4 programs - Makefile # Copyright (C) Yann Collet 2011-2014 # GPL v2 License @@ -20,7 +20,7 @@ # You can contact the author at : # - LZ4 source repository : http://code.google.com/p/lz4/ # - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c -# ################################################################ +# ########################################################################## # lz4 : Command Line Utility, supporting gzip-like arguments # lz4c : CLU, supporting also legacy lz4demo arguments # lz4c32: Same as lz4c, but forced to compile in 32-bits mode @@ -28,13 +28,13 @@ # fuzzer32: Same as fuzzer, but forced to compile in 32-bits mode # fullbench : Precisely measure speed for each LZ4 function variant # fullbench32: Same as fullbench, but forced to compile in 32-bits mode -# ################################################################ +# ########################################################################## -RELEASE=r119 +RELEASE=rc120 DESTDIR= PREFIX=/usr CC:=$(CC) -CFLAGS+= -std=c99 -O3 -Wall -W -Wundef -DLZ4_VERSION=\"$(RELEASE)\" +CFLAGS+= -std=c99 -O3 -Wall -Wextra -Wundef -Wshadow -Wstrict-prototypes -DLZ4_VERSION=\"$(RELEASE)\" FLAGS= -I.. $(CFLAGS) BINDIR=$(PREFIX)/bin diff --git a/programs/bench.c b/programs/bench.c index 8d68098..6af86ac 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -32,7 +32,7 @@ // Unix Large Files support (>4GB) #define _FILE_OFFSET_BITS 64 #if (defined(__sun__) && (!defined(__LP64__))) // Sun Solaris 32-bits requires specific definitions -# define _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE #elif ! defined(__LP64__) // No point defining Large file for 64 bit # define _LARGEFILE64_SOURCE #endif @@ -155,7 +155,7 @@ void BMK_SetNbIterations(int nbLoops) DISPLAY("- %i iterations -\n", nbIterations); } -void BMK_SetPause() { BMK_pause = 1; } +void BMK_SetPause(void) { BMK_pause = 1; } //********************************************************* @@ -178,7 +178,7 @@ static int BMK_GetMilliStart() #else -static int BMK_GetMilliStart() +static int BMK_GetMilliStart(void) { // Based on newer gettimeofday() // Use GetMilliSpan to correct for rollover diff --git a/programs/bench.h b/programs/bench.h index a0b7ed2..7e59e52 100644 --- a/programs/bench.h +++ b/programs/bench.h @@ -32,7 +32,7 @@ int BMK_benchFile(char** fileNamesTable, int nbFiles, int cLevel); // Parameters void BMK_SetBlocksize(int bsize); void BMK_SetNbIterations(int nbLoops); -void BMK_SetPause(); +void BMK_SetPause(void); diff --git a/programs/datagen.c b/programs/datagen.c index 0109d54..706c30f 100644 --- a/programs/datagen.c +++ b/programs/datagen.c @@ -184,7 +184,7 @@ static void CDG_generate(U64 size, U32* seed, double proba) } -int CDG_usage() +int CDG_usage(void) { DISPLAY( "Compressible data generator\n"); DISPLAY( "Usage :\n"); diff --git a/programs/fullbench.c b/programs/fullbench.c index 0ed8488..dd02357 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -167,7 +167,7 @@ void BMK_SetNbIterations(int nbLoops) DISPLAY("- %i iterations -\n", nbIterations); } -void BMK_SetPause() +void BMK_SetPause(void) { BMK_pause = 1; } @@ -192,7 +192,7 @@ static int BMK_GetMilliStart() #else -static int BMK_GetMilliStart() +static int BMK_GetMilliStart(void) { // Based on newer gettimeofday() // Use GetMilliSpan to correct for rollover @@ -631,7 +631,7 @@ int usage(char* exename) return 0; } -int usage_advanced() +int usage_advanced(void) { DISPLAY( "\nAdvanced options :\n"); DISPLAY( " -c# : test only compression function # [1-%i]\n", NB_COMPRESSION_ALGORITHMS); diff --git a/programs/fuzzer.c b/programs/fuzzer.c index b745be9..b302078 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -28,6 +28,7 @@ #define _CRT_SECURE_NO_WARNINGS // fgets #ifdef _MSC_VER /* Visual Studio */ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ #endif @@ -66,7 +67,7 @@ Constants **************************************/ #ifndef LZ4_VERSION -# define LZ4_VERSION "rc118" +# define LZ4_VERSION "" #endif #define NB_ATTEMPTS (1<<16) @@ -102,7 +103,7 @@ static int displayLevel = 2; /********************************************************* Fuzzer functions *********************************************************/ -static int FUZ_GetMilliStart() +static int FUZ_GetMilliStart(void) { struct timeb tb; int nCount; @@ -172,7 +173,8 @@ void FUZ_fillCompressibleNoiseBuffer(void* buffer, int bufferSize, double proba, } -int FUZ_Issue52() +// No longer useful; included into issue 134 +int FUZ_Issue52(void) { char* output; char* input; @@ -185,8 +187,7 @@ int FUZ_Issue52() input[0] = 0x0F; input[1] = 0x00; input[2] = 0x00; - for(i = 3; i < 16840000; i++) - input[i] = 0xff; + for(i = 3; i < 16840000; i++) input[i] = 0xff; r = LZ4_decompress_safe(input, output, 20<<20, 20<<20); free(input); @@ -197,46 +198,48 @@ int FUZ_Issue52() #define MAX_NB_BUFF_I134 150 -#define BLOCKSIZE_I134 64 MB -int FUZ_Issue134() +#define BLOCKSIZE_I134 (32 MB) +int FUZ_Issue134(void) { char* buffers[MAX_NB_BUFF_I134+1] = {0}; - int i, nbBuff; + int i, nbBuff=0; + int highAddress = 0; - printf("Overflow test issue 134 : "); + printf("Overflow tests : "); // Only possible in 32-bits if (sizeof(void*)==8) { - printf("64 bits mode : not applicable \n"); + printf("64 bits mode : no overflow \n"); + fflush(stdout); return 0; } - printf(" "); - for (nbBuff=0; nbBuff < MAX_NB_BUFF_I134; nbBuff++) + buffers[0] = (char*)malloc(BLOCKSIZE_I134); + buffers[1] = (char*)malloc(BLOCKSIZE_I134); + if ((!buffers[0]) || (!buffers[1])) { - printf("\b\b\b\b%3i ", nbBuff); + printf("not enough memory for tests \n"); + return 0; + } + for (nbBuff=2; nbBuff < MAX_NB_BUFF_I134; nbBuff++) + { + printf("%3i \b\b\b\b", nbBuff); buffers[nbBuff] = (char*)malloc(BLOCKSIZE_I134); - if (buffers[nbBuff]==NULL) + //printf("%08X ", (U32)(size_t)(buffers[nbBuff])); + fflush(stdout); + + if (((size_t)buffers[nbBuff] > (size_t)0x80000000) && (!highAddress)) { - printf(" : unable to allocate more memory\n"); - for (i=0 ; i 0) // (size_t) 0x80000000) + if (buffers[nbBuff]==NULL) goto _endOfTests; + { - printf("Testing memory buffer address %X , ", (U32)(size_t)(buffers[nbBuff])); - printf("Creating a payload designed to fail\n"); - buffers[++nbBuff] = (char*)malloc(BLOCKSIZE_I134); - if (buffers[nbBuff]==NULL) - { - printf("failed to test (no more memory)\n"); - for (i=0 ; i0) goto _overflowError; input[0] = 0x1F; // Match length overflow input[1] = 0x01; input[2] = 0x01; input[3] = 0x00; r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); - printf(" Match overflow detected (return = %i < 0)\n",r); - if (nbBuff>=2) - { - output = buffers[nbBuff-2]; - memset(input, 0, BLOCKSIZE_I134); - input[0] = 0xF0; // Literal length overflow - input[1] = 0xFF; - input[2] = 0xFF; - input[3] = 0xFF; - r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); - printf(" Literal overflow detected (return = %i < 0)\n",r); - input[0] = 0x1F; // Match length overflow - input[1] = 0x01; - input[2] = 0x01; - input[3] = 0x00; - r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); - printf(" Match overflow detected (return = %i < 0)\n",r); - } - } - free (buffers[nbBuff]); nbBuff--; + if (r>0) goto _overflowError; + + output = buffers[nbBuff-2]; // Reverse in/out pointer order + input[0] = 0xF0; // Literal length overflow + input[1] = 0xFF; + input[2] = 0xFF; + input[3] = 0xFF; + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) goto _overflowError; + input[0] = 0x1F; // Match length overflow + input[1] = 0x01; + input[2] = 0x01; + input[3] = 0x00; + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) goto _overflowError; } } + nbBuff++; +_endOfTests: for (i=0 ; i= 1) usage(); @@ -281,7 +281,7 @@ int badusage() } -void waitEnter() +void waitEnter(void) { DISPLAY("Press enter to continue...\n"); getchar(); diff --git a/programs/lz4io.c b/programs/lz4io.c index 49caed3..8bd10fc 100644 --- a/programs/lz4io.c +++ b/programs/lz4io.c @@ -149,7 +149,7 @@ static const int one = 1; //************************************** static int displayLevel = 0; // 0 : no display // 1: errors // 2 : + result + interaction + warnings ; // 3 : + progression; // 4 : + information static int overwrite = 1; -static int blockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; +static int globalBlockSizeId = LZ4S_BLOCKSIZEID_DEFAULT; static int blockChecksum = 0; static int streamChecksum = 1; static int blockIndependence = 1; @@ -198,18 +198,16 @@ int LZ4IO_setBlockSizeID(int bsid) { static const int blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB }; if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return -1; - blockSizeId = bsid; - return blockSizeTable[blockSizeId-minBlockSizeID]; + globalBlockSizeId = bsid; + return blockSizeTable[globalBlockSizeId-minBlockSizeID]; } - int LZ4IO_setBlockMode(blockMode_t blockMode) { blockIndependence = (blockMode == independentBlocks); return blockIndependence; } - /* Default setting : no checksum */ int LZ4IO_setBlockChecksumMode(int xxhash) { @@ -217,7 +215,6 @@ int LZ4IO_setBlockChecksumMode(int xxhash) return blockChecksum; } - /* Default setting : checksum enabled */ int LZ4IO_setStreamChecksumMode(int xxhash) { @@ -225,7 +222,6 @@ int LZ4IO_setStreamChecksumMode(int xxhash) return streamChecksum; } - /* Default setting : 0 (no notification) */ int LZ4IO_setNotificationLevel(int level) { @@ -303,7 +299,6 @@ int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, i char* out_buff; FILE* finput; FILE* foutput; - int displayLevel = (compressionlevel>0); clock_t start, end; size_t sizeCheck; @@ -368,7 +363,7 @@ int LZ4IO_compressFilename_Legacy(char* input_filename, char* output_filename, i static void* LZ4IO_LZ4_createStream (const char* inputBuffer) { (void)inputBuffer; - return LZ4_createStream(); + return calloc(4, LZ4_STREAMSIZE_U32); } static int LZ4IO_LZ4_compress_limitedOutput_continue (void* ctx, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel) @@ -377,6 +372,11 @@ static int LZ4IO_LZ4_compress_limitedOutput_continue (void* ctx, const char* sou return LZ4_compress_limitedOutput_continue(ctx, source, dest, inputSize, maxOutputSize); } +static int LZ4IO_LZ4_saveDict (void* LZ4_stream, char* safeBuffer, int dictSize) +{ + return LZ4_saveDict ((LZ4_stream_t*) LZ4_stream, safeBuffer, dictSize); +} + static int LZ4IO_LZ4_slideInputBufferHC (void* ctx, char* buffer, int size) { (void)size; (void)buffer; @@ -385,6 +385,12 @@ static int LZ4IO_LZ4_slideInputBufferHC (void* ctx, char* buffer, int size) } +static int LZ4IO_free (void* ptr) +{ + free(ptr); + return 0; +} + static int compress_file_blockDependency(char* input_filename, char* output_filename, int compressionlevel) { void* (*initFunction) (const char*); @@ -412,19 +418,19 @@ static int compress_file_blockDependency(char* input_filename, char* output_file { initFunction = LZ4IO_LZ4_createStream; compressionFunction = LZ4IO_LZ4_compress_limitedOutput_continue; - nextBlockFunction = LZ4_saveDict; - freeFunction = LZ4_free; + nextBlockFunction = LZ4IO_LZ4_saveDict; + freeFunction = LZ4IO_free; } else { initFunction = LZ4_createHC; compressionFunction = LZ4_compressHC2_limitedOutput_continue; nextBlockFunction = LZ4IO_LZ4_slideInputBufferHC; - freeFunction = LZ4_free; + freeFunction = LZ4IO_free; } get_fileHandle(input_filename, output_filename, &finput, &foutput); - blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); + blockSize = LZ4S_GetBlockSize_FromBlockId (globalBlockSizeId); // Allocate Memory inputBufferSize = 64 KB + blockSize; @@ -442,7 +448,7 @@ static int compress_file_blockDependency(char* input_filename, char* output_file *(out_buff+4) |= (blockIndependence & _1BIT) << 5; *(out_buff+4) |= (blockChecksum & _1BIT) << 4; *(out_buff+4) |= (streamChecksum & _1BIT) << 2; - *(out_buff+5) = (char)((blockSizeId & _3BITS) << 4); + *(out_buff+5) = (char)((globalBlockSizeId & _3BITS) << 4); checkbits = XXH32((out_buff+4), 2, LZ4S_CHECKSUM_SEED); checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); *(out_buff+6) = (unsigned char) checkbits; @@ -570,7 +576,7 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp if (compressionLevel <= 3) compressionFunction = LZ4_compress_limitedOutput_local; else { compressionFunction = LZ4_compressHC2_limitedOutput; } get_fileHandle(input_filename, output_filename, &finput, &foutput); - blockSize = LZ4S_GetBlockSize_FromBlockId (blockSizeId); + blockSize = LZ4S_GetBlockSize_FromBlockId (globalBlockSizeId); // Allocate Memory in_buff = (char*)malloc(blockSize); @@ -585,7 +591,7 @@ int LZ4IO_compressFilename(char* input_filename, char* output_filename, int comp *(headerBuffer+4) |= (blockIndependence & _1BIT) << 5; *(headerBuffer+4) |= (blockChecksum & _1BIT) << 4; *(headerBuffer+4) |= (streamChecksum & _1BIT) << 2; - *(headerBuffer+5) = (char)((blockSizeId & _3BITS) << 4); + *(headerBuffer+5) = (char)((globalBlockSizeId & _3BITS) << 4); checkbits = XXH32((headerBuffer+4), 2, LZ4S_CHECKSUM_SEED); checkbits = LZ4S_GetCheckBits_FromXXH(checkbits); *(headerBuffer+6) = (unsigned char) checkbits; @@ -749,7 +755,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) size_t sizeCheck; int blockChecksumFlag, streamChecksumFlag, blockIndependenceFlag; void* streamChecksumState=NULL; - int (*decompressionFunction)(void* ctx, const char* src, char* dst, int cSize, int maxOSize) = LZ4_decompress_safe_continue; + int (*decompressionFunction)(LZ4_streamDecode_t* ctx, const char* src, char* dst, int cSize, int maxOSize) = LZ4_decompress_safe_continue; LZ4_streamDecode_t ctx; // init @@ -841,7 +847,7 @@ static unsigned long long decodeLZ4S(FILE* finput, FILE* foutput) { // handle dictionary for streaming memcpy(in_buff + blockSize - 64 KB, out_buff, 64 KB); - LZ4_setDictDecode(&ctx, out_buff, 64 KB); + LZ4_setStreamDecode(&ctx, out_buff, 64 KB); out_start = out_buff + 64 KB; } } diff --git a/programs/xxhash.c b/programs/xxhash.c index 8304ec2..99bbe7c 100644 --- a/programs/xxhash.c +++ b/programs/xxhash.c @@ -1,475 +1,832 @@ -/* -xxHash - Fast Hash algorithm -Copyright (C) 2012-2014, Yann Collet. -BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -You can contact the author at : -- xxHash source repository : http://code.google.com/p/xxhash/ -*/ - - -//************************************** -// Tuning parameters -//************************************** -// Unaligned memory access is automatically enabled for "common" CPU, such as x86. -// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. -// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. -// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). -#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_USE_UNALIGNED_ACCESS 1 -#endif - -// XXH_ACCEPT_NULL_INPUT_POINTER : -// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. -// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. -// This option has a very small performance cost (only measurable on small inputs). -// By default, this option is disabled. To enable it, uncomment below define : -//#define XXH_ACCEPT_NULL_INPUT_POINTER 1 - -// XXH_FORCE_NATIVE_FORMAT : -// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. -// Results are therefore identical for little-endian and big-endian CPU. -// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. -// Should endian-independance be of no importance for your application, you may set the #define below to 1. -// It will improve speed for Big-endian CPU. -// This option has no impact on Little_Endian CPU. -#define XXH_FORCE_NATIVE_FORMAT 0 - - -//************************************** -// Compiler Specific Options -//************************************** -// Disable some Visual warning messages -#ifdef _MSC_VER // Visual Studio -# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant -#endif - -#ifdef _MSC_VER // Visual Studio -# define FORCE_INLINE static __forceinline -#else -# ifdef __GNUC__ -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -#endif - - -//************************************** -// Includes & Memory related functions -//************************************** -#include "xxhash.h" -// Modify the local functions below should you wish to use some other memory related routines -// for malloc(), free() -#include -FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } -FORCE_INLINE void XXH_free (void* p) { free(p); } -// for memcpy() -#include -FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - - -//************************************** -// Basic Types -//************************************** -#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - -#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) -# define _PACKED __attribute__ ((packed)) -#else -# define _PACKED -#endif - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# ifdef __IBMC__ -# pragma pack(1) -# else -# pragma pack(push, 1) -# endif -#endif - -typedef struct _U32_S { U32 v; } _PACKED U32_S; - -#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) -# pragma pack(pop) -#endif - -#define A32(x) (((U32_S *)(x))->v) - - -//*************************************** -// Compiler-specific Functions and Macros -//*************************************** -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -// Note : although _rotl exists for minGW (GCC under windows), performance seems poor -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif - -#if defined(_MSC_VER) // Visual Studio -# define XXH_swap32 _byteswap_ulong -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -#else -static inline U32 XXH_swap32 (U32 x) { - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff );} -#endif - - -//************************************** -// Constants -//************************************** -#define PRIME32_1 2654435761U -#define PRIME32_2 2246822519U -#define PRIME32_3 3266489917U -#define PRIME32_4 668265263U -#define PRIME32_5 374761393U - - -//************************************** -// Architecture Macros -//************************************** -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; -#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch - static const int one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) -#endif - - -//************************************** -// Macros -//************************************** -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations - - -//**************************** -// Memory reads -//**************************** -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); - else - return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); -} - -FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } - - -//**************************** -// Simple Hash Functions -//**************************** -FORCE_INLINE U32 XXH32_endian_align(const void* input, int len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U32 h32; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { len=0; p=(const BYTE*)(size_t)16; } -#endif - - if (len>=16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do +/* +xxHash - Fast Hash algorithm +Copyright (C) 2012-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash source repository : http://code.google.com/p/xxhash/ +*/ + + +//************************************** +// Tuning parameters +//************************************** +// Unaligned memory access is automatically enabled for "common" CPU, such as x86. +// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected. +// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance. +// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32). +#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_USE_UNALIGNED_ACCESS 1 +#endif + +// XXH_ACCEPT_NULL_INPUT_POINTER : +// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. +// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. +// This option has a very small performance cost (only measurable on small inputs). +// By default, this option is disabled. To enable it, uncomment below define : +// #define XXH_ACCEPT_NULL_INPUT_POINTER 1 + +// XXH_FORCE_NATIVE_FORMAT : +// By default, xxHash library provides endian-independant Hash values, based on little-endian convention. +// Results are therefore identical for little-endian and big-endian CPU. +// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. +// Should endian-independance be of no importance for your application, you may set the #define below to 1. +// It will improve speed for Big-endian CPU. +// This option has no impact on Little_Endian CPU. +#define XXH_FORCE_NATIVE_FORMAT 0 + +//************************************** +// Compiler Specific Options +//************************************** +// Disable some Visual warning messages +#ifdef _MSC_VER // Visual Studio +# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant +#endif + +#ifdef _MSC_VER // Visual Studio +# define FORCE_INLINE static __forceinline +#else +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +#endif + +//************************************** +// Includes & Memory related functions +//************************************** +#include "xxhash.h" +// Modify the local functions below should you wish to use some other memory related routines +// for malloc(), free() +#include +FORCE_INLINE void* XXH_malloc(size_t s) { return malloc(s); } +FORCE_INLINE void XXH_free (void* p) { free(p); } +// for memcpy() +#include +FORCE_INLINE void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } + + +//************************************** +// Basic Types +//************************************** +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; +#endif + +#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS) +# define _PACKED __attribute__ ((packed)) +#else +# define _PACKED +#endif + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# ifdef __IBMC__ +# pragma pack(1) +# else +# pragma pack(push, 1) +# endif +#endif + +typedef struct _U32_S { U32 v; } _PACKED U32_S; +typedef struct _U64_S { U64 v; } _PACKED U64_S; + +#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__) +# pragma pack(pop) +#endif + +#define A32(x) (((U32_S *)(x))->v) +#define A64(x) (((U64_S *)(x))->v) + + +//*************************************** +// Compiler-specific Functions and Macros +//*************************************** +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// Note : although _rotl exists for minGW (GCC under windows), performance seems poor +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) // Visual Studio +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static inline U32 XXH_swap32 (U32 x) { + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff );} +static inline U64 XXH_swap64 (U64 x) { + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL);} +#endif + + +//************************************** +// Constants +//************************************** +#define PRIME32_1 2654435761U +#define PRIME32_2 2246822519U +#define PRIME32_3 3266489917U +#define PRIME32_4 668265263U +#define PRIME32_5 374761393U + +#define PRIME64_1 11400714785074694791ULL +#define PRIME64_2 14029467366897019727ULL +#define PRIME64_3 1609587929392839161ULL +#define PRIME64_4 9650029242287828579ULL +#define PRIME64_5 2870177450012600261ULL + +//************************************** +// Architecture Macros +//************************************** +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; +#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch + static const int one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one)) +#endif + + +//************************************** +// Macros +//************************************** +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations + + +//**************************** +// Memory reads +//**************************** +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE U32 XXH_readLE32_align(const U32* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap32(*ptr); +} + +FORCE_INLINE U32 XXH_readLE32(const U32* ptr, XXH_endianess endian) { return XXH_readLE32_align(ptr, endian, XXH_unaligned); } + +FORCE_INLINE U64 XXH_readLE64_align(const U64* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr)); + else + return endian==XXH_littleEndian ? *ptr : XXH_swap64(*ptr); +} + +FORCE_INLINE U64 XXH_readLE64(const U64* ptr, XXH_endianess endian) { return XXH_readLE64_align(ptr, endian, XXH_unaligned); } + + +//**************************** +// Simple Hash Functions +//**************************** +FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align((const U32*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)16; } +#endif + + if (len>=16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do + { + v1 += XXH_get32bits(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_get32bits(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_get32bits(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_get32bits(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } + else + { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p<=bEnd-4) + { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32(const void* input, unsigned int len, U32 seed) +{ +#if 0 + // Simple version, good for code maintenance, but unfortunately slow for small inputs + void* state = XXH32_init(seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + +FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align((const U64*)p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { len=0; bEnd=p=(const BYTE*)(size_t)32; } +#endif + + if (len>=32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { - v1 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + v1 += XXH_get64bits(p) * PRIME64_2; p+=8; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; + v2 += XXH_get64bits(p) * PRIME64_2; p+=8; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; + v3 += XXH_get64bits(p) * PRIME64_2; p+=8; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; + v4 += XXH_get64bits(p) * PRIME64_2; p+=8; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; + h64 = h64 * PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; + h64 = h64 * PRIME64_1 + PRIME64_4; + } + else + { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p<=bEnd-8) + { + U64 k1 = XXH_get64bits(p); + k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } - else - { - h32 = seed + PRIME32_5; - } +#if 1 - h32 += (U32) len; + if (p<=bEnd-4) + { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } - while (p<=bEnd-4) - { - h32 += XXH_readLE32_align((const U32*)p, endian, align) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -U32 XXH32(const void* input, int len, U32 seed) -{ -#if 0 - // Simple version, good for code maintenance, but unfortunately slow for small inputs - void* state = XXH32_init(seed); - XXH32_update(state, input, len); - return XXH32_digest(state); + #else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -# if !defined(XXH_USE_UNALIGNED_ACCESS) - if ((((size_t)input) & 3)) // Input is aligned, let's leverage the speed advantage + if( p= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough - return sizeof(struct XXH_state32_t); -} - - -XXH_errorcode XXH32_resetState(void* state_in, U32 seed) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - state->seed = seed; - state->v1 = seed + PRIME32_1 + PRIME32_2; - state->v2 = seed + PRIME32_2; - state->v3 = seed + 0; - state->v4 = seed - PRIME32_1; - state->total_len = 0; - state->memsize = 0; - return XXH_OK; -} - - -void* XXH32_init (U32 seed) -{ - void* state = XXH_malloc (sizeof(struct XXH_state32_t)); - XXH32_resetState(state, seed); - return state; -} - - -FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 16) // fill in tmp buffer - { - XXH_memcpy(state->memory + state->memsize, input, len); - state->memsize += len; - return XXH_OK; - } - - if (state->memsize) // some data left from previous update - { - XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + // Copy the remaining bytes + U64 k1 = PRIME64_5; + BYTE* _k1 = (BYTE*)&k1; + switch( (size_t)(bEnd-p)) // Yes, I tried multiple ways of doing this memcopy { - const U32* p32 = (const U32*)state->memory; - state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; - state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; - state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; - state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + case 7: *_k1++ = *p++; + case 6: *_k1++ = *p++; + case 5: *_k1++ = *p++; + case 4: *_k1++ = *p++; + case 3: *_k1++ = *p++; + case 2: *_k1++ = *p++; + case 1: *_k1++ = *p++; } - p += 16-state->memsize; - state->memsize = 0; + + k1 *= PRIME64_5; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_3; h64 ^= k1; + h64 = XXH_rotl64(h64, 11) * PRIME64_1 + PRIME64_4; } - if (p <= bEnd-16) - { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; +#endif + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} - do - { - v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; - v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; - v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; - v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; - } while (p<=limit); + +unsigned long long XXH64(const void* input, unsigned int len, unsigned long long seed) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + +# if !defined(XXH_USE_UNALIGNED_ACCESS) + if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage + { + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } +# endif + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +} + +//**************************** +// Advanced Hash Functions +//**************************** + +struct XXH_state32_t +{ + U64 total_len; + U32 seed; + U32 v1; + U32 v2; + U32 v3; + U32 v4; + int memsize; + char memory[16]; +}; + +struct XXH_state64_t +{ + U64 total_len; + U64 seed; + U64 v1; + U64 v2; + U64 v3; + U64 v4; + int memsize; + char memory[32]; +}; + + +int XXH32_sizeofState(void) +{ + XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state32_t); +} + +int XXH64_sizeofState(void) +{ + XXH_STATIC_ASSERT(XXH64_SIZEOFSTATE >= sizeof(struct XXH_state64_t)); // A compilation error here means XXH64_SIZEOFSTATE is not large enough + return sizeof(struct XXH_state64_t); +} + + +XXH_errorcode XXH32_resetState(void* state_in, U32 seed) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME32_1 + PRIME32_2; + state->v2 = seed + PRIME32_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME32_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + +XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + state->seed = seed; + state->v1 = seed + PRIME64_1 + PRIME64_2; + state->v2 = seed + PRIME64_2; + state->v3 = seed + 0; + state->v4 = seed - PRIME64_1; + state->total_len = 0; + state->memsize = 0; + return XXH_OK; +} + + +void* XXH32_init (U32 seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state32_t)); + XXH32_resetState(state, seed); + return state; +} + +void* XXH64_init (unsigned long long seed) +{ + void* state = XXH_malloc (sizeof(struct XXH_state64_t)); + XXH64_resetState(state, seed); + return state; +} + + +FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 16) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 16-state->memsize); + { + const U32* p32 = (const U32*)state->memory; + state->v1 += XXH_readLE32(p32, endian) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; + state->v2 += XXH_readLE32(p32, endian) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; + state->v3 += XXH_readLE32(p32, endian) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; + state->v4 += XXH_readLE32(p32, endian) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) + { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do + { + v1 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; + v2 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; + v3 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; + v4 += XXH_readLE32((const U32*)p, endian) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U32 h32; + + if (state->total_len >= 16) + { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } + else + { + h32 = state->seed + PRIME32_5; + } + + h32 += (U32) state->total_len; + + while (p<=bEnd-4) + { + h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +U32 XXH32_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +U32 XXH32_digest (void* state_in) +{ + U32 h32 = XXH32_intermediateDigest(state_in); + + XXH_free(state_in); + + return h32; +} + + +FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* input, int len, XXH_endianess endian) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) // fill in tmp buffer + { + XXH_memcpy(state->memory + state->memsize, input, len); + state->memsize += len; + return XXH_OK; + } + + if (state->memsize) // some data left from previous update + { + XXH_memcpy(state->memory + state->memsize, input, 32-state->memsize); + { + const U64* p64 = (const U64*)state->memory; + state->v1 += XXH_readLE64(p64, endian) * PRIME64_2; state->v1 = XXH_rotl64(state->v1, 31); state->v1 *= PRIME64_1; p64++; + state->v2 += XXH_readLE64(p64, endian) * PRIME64_2; state->v2 = XXH_rotl64(state->v2, 31); state->v2 *= PRIME64_1; p64++; + state->v3 += XXH_readLE64(p64, endian) * PRIME64_2; state->v3 = XXH_rotl64(state->v3, 31); state->v3 *= PRIME64_1; p64++; + state->v4 += XXH_readLE64(p64, endian) * PRIME64_2; state->v4 = XXH_rotl64(state->v4, 31); state->v4 *= PRIME64_1; p64++; + } + p += 32-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-32) + { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do + { + v1 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; p+=8; + v2 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; p+=8; + v3 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; p+=8; + v4 += XXH_readLE64((const U64*)p, endian) * PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) + { + XXH_memcpy(state->memory, p, bEnd-p); + state->memsize = (int)(bEnd-p); + } + + return XXH_OK; +} + +XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess endian) +{ + struct XXH_state64_t * state = (struct XXH_state64_t *) state_in; + const BYTE * p = (const BYTE*)state->memory; + BYTE* bEnd = (BYTE*)state->memory + state->memsize; + U64 h64; + + if (state->total_len >= 32) + { + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + + v1 *= PRIME64_2; v1 = XXH_rotl64(v1, 31); v1 *= PRIME64_1; h64 ^= v1; + h64 = h64*PRIME64_1 + PRIME64_4; + + v2 *= PRIME64_2; v2 = XXH_rotl64(v2, 31); v2 *= PRIME64_1; h64 ^= v2; + h64 = h64*PRIME64_1 + PRIME64_4; + + v3 *= PRIME64_2; v3 = XXH_rotl64(v3, 31); v3 *= PRIME64_1; h64 ^= v3; + h64 = h64*PRIME64_1 + PRIME64_4; + + v4 *= PRIME64_2; v4 = XXH_rotl64(v4, 31); v4 *= PRIME64_1; h64 ^= v4; + h64 = h64*PRIME64_1 + PRIME64_4; + } + else + { + h64 = state->seed + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p<=bEnd-8) + { + U64 k1 = XXH_readLE64((const U64*)p, endian); + k1 *= PRIME64_2; k1 = XXH_rotl64(k1,31); k1 *= PRIME64_1; h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p<=bEnd-4) + { + h64 ^= (U64)(XXH_readLE32((const U32*)p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) - { - XXH_memcpy(state->memory, p, bEnd-p); - state->memsize = (int)(bEnd-p); - } - - return XXH_OK; -} - -XXH_errorcode XXH32_update (void* state_in, const void* input, int len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian) -{ - struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; - const BYTE * p = (const BYTE*)state->memory; - BYTE* bEnd = (BYTE*)state->memory + state->memsize; - U32 h32; - - if (state->total_len >= 16) - { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } - else - { - h32 = state->seed + PRIME32_5; - } - - h32 += (U32) state->total_len; - - while (p<=bEnd-4) - { - h32 += XXH_readLE32((const U32*)p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -U32 XXH32_intermediateDigest (void* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian); - else - return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian); -} - - -U32 XXH32_digest (void* state_in) -{ - U32 h32 = XXH32_intermediateDigest(state_in); - - XXH_free(state_in); - - return h32; -} + } + + h64 ^= h64 >> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +unsigned long long XXH64_intermediateDigest (void* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_intermediateDigest_endian(state_in, XXH_littleEndian); + else + return XXH64_intermediateDigest_endian(state_in, XXH_bigEndian); +} + + +unsigned long long XXH64_digest (void* state_in) +{ + U64 h64 = XXH64_intermediateDigest(state_in); + + XXH_free(state_in); + + return h64; +} + diff --git a/programs/xxhash.h b/programs/xxhash.h index 8491099..8271807 100644 --- a/programs/xxhash.h +++ b/programs/xxhash.h @@ -1,164 +1,167 @@ -/* - xxHash - Fast Hash algorithm - Header File - Copyright (C) 2012-2014, Yann Collet. - BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the - distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - You can contact the author at : - - xxHash source repository : http://code.google.com/p/xxhash/ -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. -*/ - -#pragma once - -#if defined (__cplusplus) -extern "C" { -#endif - - -//**************************** -// Type -//**************************** -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - - -//**************************** -// Simple Hash Functions -//**************************** - -unsigned int XXH32 (const void* input, int len, unsigned int seed); - -/* -XXH32() : - Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". - The memory between input & input+len must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - This function successfully passes all SMHasher tests. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s - Note that "len" is type "int", which means it is limited to 2^31-1. - If your data is larger, use the advanced functions below. -*/ - - - -//**************************** -// Advanced Hash Functions -//**************************** - -void* XXH32_init (unsigned int seed); -XXH_errorcode XXH32_update (void* state, const void* input, int len); -unsigned int XXH32_digest (void* state); - -/* -These functions calculate the xxhash of an input provided in several small packets, -as opposed to an input provided as a single block. - -It must be started with : -void* XXH32_init() -The function returns a pointer which holds the state of calculation. - -This pointer must be provided as "void* state" parameter for XXH32_update(). -XXH32_update() can be called as many times as necessary. -The user must provide a valid (allocated) input. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. -Note that "len" is type "int", which means it is limited to 2^31-1. -If your data is larger, it is recommended to chunk your data into blocks -of size for example 2^30 (1GB) to avoid any "int" overflow issue. - -Finally, you can end the calculation anytime, by using XXH32_digest(). -This function returns the final 32-bits hash. -You must provide the same "void* state" parameter created by XXH32_init(). -Memory will be freed by XXH32_digest(). -*/ - - -int XXH32_sizeofState(void); -XXH_errorcode XXH32_resetState(void* state, unsigned int seed); - -#define XXH32_SIZEOFSTATE 48 -typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; -/* -These functions allow user application to make its own allocation for state. - -XXH32_sizeofState() is used to know how much space must be allocated for the xxHash 32-bits state. -Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. -This pointer must then be provided as 'state' into XXH32_resetState(), which initializes the state. - -For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), -use the structure XXH32_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. -*/ - - -unsigned int XXH32_intermediateDigest (void* state); -/* -This function does the same as XXH32_digest(), generating a 32-bit hash, -but preserve memory context. -This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXH32_update(). -To free memory context, use XXH32_digest(), or free(). -*/ - - - -//**************************** -// Deprecated function names -//**************************** -// The following translations are provided to ease code transition -// You are encouraged to no longer this function names -#define XXH32_feed XXH32_update -#define XXH32_result XXH32_digest -#define XXH32_getIntermediateResult XXH32_intermediateDigest - - - -#if defined (__cplusplus) -} -#endif +/* + xxHash - Extremely Fast Hash algorithm + Header File + Copyright (C) 2012-2014, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - xxHash source repository : http://code.google.com/p/xxhash/ +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. +*/ + +#pragma once + +#if defined (__cplusplus) +extern "C" { +#endif + + +/***************************** + Type +*****************************/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + + +/***************************** + Simple Hash Functions +*****************************/ + +unsigned int XXH32 (const void* input, unsigned int len, unsigned int seed); +unsigned long long XXH64 (const void* input, unsigned int len, unsigned long long seed); + +/* +XXH32() : + Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". + The memory between input & input+len must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + This function successfully passes all SMHasher tests. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s + Note that "len" is type "int", which means it is limited to 2^31-1. + If your data is larger, use the advanced functions below. +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". +*/ + + + +/***************************** + Advanced Hash Functions +*****************************/ + +void* XXH32_init (unsigned int seed); +XXH_errorcode XXH32_update (void* state, const void* input, unsigned int len); +unsigned int XXH32_digest (void* state); + +void* XXH64_init (unsigned long long seed); +XXH_errorcode XXH64_update (void* state, const void* input, unsigned int len); +unsigned long long XXH64_digest (void* state); + +/* +These functions calculate the xxhash of an input provided in several small packets, +as opposed to an input provided as a single block. + +It must be started with : +void* XXHnn_init() +The function returns a pointer which holds the state of calculation. + +This pointer must be provided as "void* state" parameter for XXHnn_update(). +XXHnn_update() can be called as many times as necessary. +The user must provide a valid (allocated) input. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. +Note that "len" is type "int", which means it is limited to 2^31-1. +If your data is larger, it is recommended to chunk your data into blocks +of size for example 2^30 (1GB) to avoid any "int" overflow issue. + +Finally, you can end the calculation anytime, by using XXHnn_digest(). +This function returns the final nn-bits hash. +You must provide the same "void* state" parameter created by XXHnn_init(). +Memory will be freed by XXHnn_digest(). +*/ + + +int XXH32_sizeofState(void); +XXH_errorcode XXH32_resetState(void* state, unsigned int seed); + +#define XXH32_SIZEOFSTATE 48 +typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t; + +int XXH64_sizeofState(void); +XXH_errorcode XXH64_resetState(void* state, unsigned long long seed); + +#define XXH64_SIZEOFSTATE 88 +typedef struct { long long ll[(XXH64_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH64_stateSpace_t; + +/* +These functions allow user application to make its own allocation for state. + +XXHnn_sizeofState() is used to know how much space must be allocated for the xxHash nn-bits state. +Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer. +This pointer must then be provided as 'state' into XXHnn_resetState(), which initializes the state. + +For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()), +use the structure XXHnn_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields. +*/ + + +unsigned int XXH32_intermediateDigest (void* state); +unsigned long long XXH64_intermediateDigest (void* state); +/* +This function does the same as XXHnn_digest(), generating a nn-bit hash, +but preserve memory context. +This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXHnn_update(). +To free memory context, use XXHnn_digest(), or free(). +*/ + + +#if defined (__cplusplus) +} +#endif