cBlockSize uses little-endian convention

This commit is contained in:
Yann Collet 2016-07-20 14:58:49 +02:00
parent 7bf72bbf5e
commit 6fa05a2371
6 changed files with 54 additions and 52 deletions

5
NEWS
View File

@ -1,8 +1,9 @@
v0.7.5 v0.8.0
Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
Fixed : checksum correctly checked in single-pass mode
Modified : minor compression level adaptations Modified : minor compression level adaptations
Update : specification, to v0.1.2 : max huffman depth at 11 bits Updated : compression format specification to v0.2.0
changed : zstd.h moved to /lib directory changed : zstd.h moved to /lib directory
v0.7.4 v0.7.4

View File

@ -258,6 +258,17 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
} }
} }
MEM_STATIC U32 MEM_readLE24(const void* memPtr)
{
return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
}
MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
{
MEM_writeLE16(memPtr, (U16)val);
((BYTE*)memPtr)[2] = (BYTE)(val>>16);
}
MEM_STATIC U32 MEM_readLE32(const void* memPtr) MEM_STATIC U32 MEM_readLE32(const void* memPtr)
{ {
if (MEM_isLittleEndian()) if (MEM_isLittleEndian())

View File

@ -555,17 +555,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
{ {
BYTE* const ostart = (BYTE* const)dst;
if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall); if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize); memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
/* Build header */
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE) srcSize;
ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
return ZSTD_blockHeaderSize+srcSize; return ZSTD_blockHeaderSize+srcSize;
} }
@ -2433,10 +2425,8 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize); cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize);
if (ZSTD_isError(cSize)) return cSize; if (ZSTD_isError(cSize)) return cSize;
} else { } else {
op[0] = (BYTE)(cSize>>16); U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2);
op[1] = (BYTE)(cSize>>8); MEM_writeLE24(op, cBlockHeader24);
op[2] = (BYTE)cSize;
op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
cSize += 3; cSize += 3;
} }
@ -2446,7 +2436,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
op += cSize; op += cSize;
} }
ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */
return op-ostart; return op-ostart;
} }
@ -2760,11 +2750,9 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
/* frame epilogue */ /* frame epilogue */
if (dstCapacity < 3) return ERROR(dstSize_tooSmall); if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
{ U32 const checksum = cctx->params.fParams.checksumFlag ? { U32 const checksum = cctx->params.fParams.checksumFlag ?
(U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) : (U32)(XXH64_digest(&cctx->xxhState) >> 11) :
0; 0;
op[0] = (BYTE)((bt_end<<6) + (checksum>>16)); MEM_writeLE24(op, (U32)bt_end + (checksum << 2));
op[1] = (BYTE)(checksum>>8);
op[2] = (BYTE)checksum;
} }
cctx->stage = 0; /* return to "created but not init" status */ cctx->stage = 0; /* return to "created but not init" status */

View File

@ -435,19 +435,16 @@ typedef struct
* Provides the size of compressed block from block header `src` */ * Provides the size of compressed block from block header `src` */
size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
{ {
const BYTE* const in = (const BYTE* const)src;
U32 cSize;
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
{ U32 const cBlockHeader = MEM_readLE24(src);
bpPtr->blockType = (blockType_t)((*in) >> 6); U32 const cSize = cBlockHeader >> 2;
cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); bpPtr->blockType = (blockType_t)(cBlockHeader & 3);
bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; bpPtr->origSize = cSize; /* only useful for RLE */
if (bpPtr->blockType == bt_end) return 0; if (bpPtr->blockType == bt_end) return 0;
if (bpPtr->blockType == bt_rle) return 1; if (bpPtr->blockType == bt_rle) return 1;
return cSize; return cSize;
} }
}
static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
@ -890,7 +887,6 @@ static size_t ZSTD_decompressSequences(
/* last literal segment */ /* last literal segment */
{ size_t const lastLLSize = litEnd - litPtr; { size_t const lastLLSize = litEnd - litPtr;
//if (litPtr > litEnd) return ERROR(corruption_detected); /* too many literals already used */
if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
memcpy(op, litPtr, lastLLSize); memcpy(op, litPtr, lastLLSize);
op += lastLLSize; op += lastLLSize;
@ -1008,6 +1004,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
case bt_end : case bt_end :
/* end of frame */ /* end of frame */
if (remainingSize) return ERROR(srcSize_wrong); if (remainingSize) return ERROR(srcSize_wrong);
if (dctx->fParams.checksumFlag) {
U64 const h64 = XXH64_digest(&dctx->xxhState);
U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
U32 const check32 = MEM_readLE24(src) >> 2;
if (check32 != h32) return ERROR(checksum_wrong);
}
decodedSize = 0; decodedSize = 0;
break; break;
default: default:
@ -1136,8 +1138,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
if (dctx->fParams.checksumFlag) { if (dctx->fParams.checksumFlag) {
U64 const h64 = XXH64_digest(&dctx->xxhState); U64 const h64 = XXH64_digest(&dctx->xxhState);
U32 const h32 = (U32)(h64>>11) & ((1<<22)-1); U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
const BYTE* const ip = (const BYTE*)src; U32 const check32 = MEM_readLE24(src) >> 2;
U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16);
if (check32 != h32) return ERROR(checksum_wrong); if (check32 != h32) return ERROR(checksum_wrong);
} }
dctx->expected = 0; dctx->expected = 0;

View File

@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version ### Version
0.1.2 (15/07/16) 0.2.0 (22/07/16)
Introduction Introduction
@ -76,7 +76,7 @@ allowing streaming operations.
General Structure of Zstandard Frame format General Structure of Zstandard Frame format
------------------------------------------- -------------------------------------------
| MagicNb | Frame Header | Block | (More blocks) | EndMark | | MagicNb | Frame Header | Block | [More blocks] | EndMark |
|:-------:|:-------------:| ----- | ------------- | ------- | |:-------:|:-------------:| ----- | ------------- | ------- |
| 4 bytes | 2-14 bytes | | | 3 bytes | | 4 bytes | 2-14 bytes | | | 3 bytes |
@ -135,7 +135,7 @@ delivering the final decompressed result as if it was a single content.
Frame Header Frame Header
------------- -------------
| FHD | (WD) | (dictID) | (Content Size) | | FHD | [WD] | [dictID] | [Content Size] |
| ------- | --------- | --------- |:--------------:| | ------- | --------- | --------- |:--------------:|
| 1 byte | 0-1 byte | 0-4 bytes | 0 - 8 bytes | | 1 byte | 0-1 byte | 0-4 bytes | 0 - 8 bytes |
@ -317,9 +317,9 @@ Data Blocks
__Block Header__ __Block Header__
This field uses 3-bytes, format is __big-endian__. This field uses 3-bytes, format is __little-endian__.
The 2 highest bits represent the `block type`, The 2 lowest bits represent the `block type`,
while the remaining 22 bits represent the (compressed) block size. while the remaining 22 bits represent the (compressed) block size.
There are 4 block types : There are 4 block types :
@ -424,7 +424,7 @@ All literals are regrouped in the first part of the block.
They can be decoded first, and then copied during sequence operations, They can be decoded first, and then copied during sequence operations,
or they can be decoded on the flow, as needed by sequence commands. or they can be decoded on the flow, as needed by sequence commands.
| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) | | Header | [Tree Description] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
| ------ | ------------------ | ------- | --------- | --------- | --------- | | ------ | ------------------ | ------- | --------- | --------- | --------- |
Literals can be compressed, or uncompressed. Literals can be compressed, or uncompressed.
@ -437,7 +437,7 @@ Header is in charge of describing how literals are packed.
It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes, It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
using big-endian convention. using big-endian convention.
| BlockType | sizes format | (compressed size) | regenerated size | | BlockType | sizes format | [compressed size] | regenerated size |
| --------- | ------------ | ----------------- | ---------------- | | --------- | ------------ | ----------------- | ---------------- |
| 2 bits | 1 - 2 bits | 0 - 18 bits | 5 - 20 bits | | 2 bits | 1 - 2 bits | 0 - 18 bits | 5 - 20 bits |
@ -723,7 +723,7 @@ The Sequences section starts by a header,
followed by optional Probability tables for each symbol type, followed by optional Probability tables for each symbol type,
followed by the bitstream. followed by the bitstream.
| Header | (LitLengthTable) | (OffsetTable) | (MatchLengthTable) | bitStream | | Header | [LitLengthTable] | [OffsetTable] | [MatchLengthTable] | bitStream |
| ------ | ---------------- | ------------- | ------------------ | --------- | | ------ | ---------------- | ------------- | ------------------ | --------- |
To decode the Sequence section, it's required to know its size. To decode the Sequence section, it's required to know its size.
@ -1165,6 +1165,7 @@ __Content__ : Where the actual dictionary content is.
Version changes Version changes
--------------- ---------------
- 0.2.0 : numerous format adjustments for zstd v0.8
- 0.1.2 : limit huffman tree depth to 11 bits - 0.1.2 : limit huffman tree depth to 11 bits
- 0.1.1 : reserved dictID ranges - 0.1.1 : reserved dictID ranges
- 0.1.0 : initial release - 0.1.0 : initial release