diff --git a/NEWS b/NEWS index 50aadcbd..64c1d4ff 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,9 @@ -v0.7.5 +v0.8.0 Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist +Fixed : checksum correctly checked in single-pass mode Modified : minor compression level adaptations -Update : specification, to v0.1.2 : max huffman depth at 11 bits +Updated : compression format specification to v0.2.0 changed : zstd.h moved to /lib directory v0.7.4 diff --git a/lib/common/mem.h b/lib/common/mem.h index f76c52d9..4d35f5ef 100644 --- a/lib/common/mem.h +++ b/lib/common/mem.h @@ -258,6 +258,17 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 92e5b341..0c7046ff 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -555,17 +555,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - BYTE* const ostart = (BYTE* const)dst; - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); - memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); - - /* Build header */ - ostart[0] = (BYTE)(srcSize>>16); - ostart[1] = (BYTE)(srcSize>>8); - ostart[2] = (BYTE) srcSize; - ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */ - + memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); return ZSTD_blockHeaderSize+srcSize; } @@ -2433,10 +2425,8 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); if (ZSTD_isError(cSize)) return cSize; } else { - op[0] = (BYTE)(cSize>>16); - op[1] = (BYTE)(cSize>>8); - op[2] = (BYTE)cSize; - op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */ + U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2); + MEM_writeLE24(op, cBlockHeader24); cSize += 3; } @@ -2446,7 +2436,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx, op += cSize; } - ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); + ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */ return op-ostart; } @@ -2760,11 +2750,9 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) /* frame epilogue */ if (dstCapacity < 3) return ERROR(dstSize_tooSmall); { U32 const checksum = cctx->params.fParams.checksumFlag ? - (U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) : + (U32)(XXH64_digest(&cctx->xxhState) >> 11) : 0; - op[0] = (BYTE)((bt_end<<6) + (checksum>>16)); - op[1] = (BYTE)(checksum>>8); - op[2] = (BYTE)checksum; + MEM_writeLE24(op, (U32)bt_end + (checksum << 2)); } cctx->stage = 0; /* return to "created but not init" status */ diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index f1612711..1f57f480 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -435,18 +435,15 @@ typedef struct * Provides the size of compressed block from block header `src` */ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - const BYTE* const in = (const BYTE* const)src; - U32 cSize; - if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); - - bpPtr->blockType = (blockType_t)((*in) >> 6); - cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); - bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; - - if (bpPtr->blockType == bt_end) return 0; - if (bpPtr->blockType == bt_rle) return 1; - return cSize; + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 2; + bpPtr->blockType = (blockType_t)(cBlockHeader & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; + } } @@ -890,7 +887,6 @@ static size_t ZSTD_decompressSequences( /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - //if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */ if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); memcpy(op, litPtr, lastLLSize); op += lastLLSize; @@ -1008,6 +1004,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, case bt_end : /* end of frame */ if (remainingSize) return ERROR(srcSize_wrong); + if (dctx->fParams.checksumFlag) { + U64 const h64 = XXH64_digest(&dctx->xxhState); + U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); + U32 const check32 = MEM_readLE24(src) >> 2; + if (check32 != h32) return ERROR(checksum_wrong); + } decodedSize = 0; break; default: @@ -1136,8 +1138,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c if (dctx->fParams.checksumFlag) { U64 const h64 = XXH64_digest(&dctx->xxhState); U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); - const BYTE* const ip = (const BYTE*)src; - U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16); + U32 const check32 = MEM_readLE24(src) >> 2; if (check32 != h32) return ERROR(checksum_wrong); } dctx->expected = 0; diff --git a/programs/fuzzer.c b/programs/fuzzer.c index 77a71186..3778f12b 100644 --- a/programs/fuzzer.c +++ b/programs/fuzzer.c @@ -35,19 +35,19 @@ /*-************************************ * Includes **************************************/ -#include /* free */ -#include /* fgets, sscanf */ -#include /* timeb */ -#include /* strcmp */ -#include /* clock_t */ +#include /* free */ +#include /* fgets, sscanf */ +#include /* timeb */ +#include /* strcmp */ +#include /* clock_t */ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressContinue, ZSTD_compressBlock */ -#include "zstd.h" /* ZSTD_VERSION_STRING */ +#include "zstd.h" /* ZSTD_VERSION_STRING */ #include "error_public.h" /* ZSTD_getErrorCode */ -#include "zdict.h" /* ZDICT_trainFromBuffer */ -#include "datagen.h" /* RDG_genBuffer */ +#include "zdict.h" /* ZDICT_trainFromBuffer */ +#include "datagen.h" /* RDG_genBuffer */ #include "mem.h" #define XXH_STATIC_LINKING_ONLY -#include "xxhash.h" /* XXH64 */ +#include "xxhash.h" /* XXH64 */ /*-************************************ diff --git a/zstd_compression_format.md b/zstd_compression_format.md index 3a4ba4c0..199a172b 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.1.2 (15/07/16) +0.2.0 (22/07/16) Introduction @@ -76,7 +76,7 @@ allowing streaming operations. General Structure of Zstandard Frame format ------------------------------------------- -| MagicNb | Frame Header | Block | (More blocks) | EndMark | +| MagicNb | Frame Header | Block | [More blocks] | EndMark | |:-------:|:-------------:| ----- | ------------- | ------- | | 4 bytes | 2-14 bytes | | | 3 bytes | @@ -135,7 +135,7 @@ delivering the final decompressed result as if it was a single content. Frame Header ------------- -| FHD | (WD) | (dictID) | (Content Size) | +| FHD | [WD] | [dictID] | [Content Size] | | ------- | --------- | --------- |:--------------:| | 1 byte | 0-1 byte | 0-4 bytes | 0 - 8 bytes | @@ -317,9 +317,9 @@ Data Blocks __Block Header__ -This field uses 3-bytes, format is __big-endian__. +This field uses 3-bytes, format is __little-endian__. -The 2 highest bits represent the `block type`, +The 2 lowest bits represent the `block type`, while the remaining 22 bits represent the (compressed) block size. There are 4 block types : @@ -424,7 +424,7 @@ All literals are regrouped in the first part of the block. They can be decoded first, and then copied during sequence operations, or they can be decoded on the flow, as needed by sequence commands. -| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) | +| Header | [Tree Description] | Stream1 | [Stream2] | [Stream3] | [Stream4] | | ------ | ------------------ | ------- | --------- | --------- | --------- | Literals can be compressed, or uncompressed. @@ -437,7 +437,7 @@ Header is in charge of describing how literals are packed. It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, using big-endian convention. -| BlockType | sizes format | (compressed size) | regenerated size | +| BlockType | sizes format | [compressed size] | regenerated size | | --------- | ------------ | ----------------- | ---------------- | | 2 bits | 1 - 2 bits | 0 - 18 bits | 5 - 20 bits | @@ -723,7 +723,7 @@ The Sequences section starts by a header, followed by optional Probability tables for each symbol type, followed by the bitstream. -| Header | (LitLengthTable) | (OffsetTable) | (MatchLengthTable) | bitStream | +| Header | [LitLengthTable] | [OffsetTable] | [MatchLengthTable] | bitStream | | ------ | ---------------- | ------------- | ------------------ | --------- | To decode the Sequence section, it's required to know its size. @@ -1165,6 +1165,7 @@ __Content__ : Where the actual dictionary content is. Version changes --------------- +- 0.2.0 : numerous format adjustments for zstd v0.8 - 0.1.2 : limit huffman tree depth to 11 bits - 0.1.1 : reserved dictID ranges - 0.1.0 : initial release