cBlockSize uses little-endian convention

2016-07-20 14:58:49 +02:00 · 2016-07-20 14:58:49 +02:00 · 6fa05a2371
commit 6fa05a2371
parent 7bf72bbf5e
6 changed files with 54 additions and 52 deletions
--- a/5
+++ b/5
@ -1,8 +1,9 @@
-v0.7.5
+v0.8.0
 Fixed : premature end of frame when zero-sized raw block, reported by Eric Biggers
 Fixed : legacy mode with ZSTD_HEAPMODE=0, by Christopher Bergqvist
+Fixed : checksum correctly checked in single-pass mode
 Modified : minor compression level adaptations
-Update : specification, to v0.1.2 : max huffman depth at 11 bits
+Updated : compression format specification to v0.2.0
 changed : zstd.h moved to /lib directory

 v0.7.4
--- a/lib/common/mem.h
+++ b/lib/common/mem.h
@ -258,6 +258,17 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
    }
 }

+MEM_STATIC U32 MEM_readLE24(const void* memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
+{
+    MEM_writeLE16(memPtr, (U16)val);
+    ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
+}
+
 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
 {
    if (MEM_isLittleEndian())
--- a/lib/compress/zstd_compress.c
+++ b/lib/compress/zstd_compress.c
@ -555,17 +555,9 @@ static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)

 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
 {
-    BYTE* const ostart = (BYTE* const)dst;
-
    if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
-    memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
-
-    /* Build header */
-    ostart[0]  = (BYTE)(srcSize>>16);
-    ostart[1]  = (BYTE)(srcSize>>8);
-    ostart[2]  = (BYTE) srcSize;
-    ostart[0] += (BYTE)(bt_raw<<6);   /* is a raw (uncompressed) block */
-
+    memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
    return ZSTD_blockHeaderSize+srcSize;
 }

@ -2433,10 +2425,8 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize);
            if (ZSTD_isError(cSize)) return cSize;
        } else {
-            op[0] = (BYTE)(cSize>>16);
-            op[1] = (BYTE)(cSize>>8);
-            op[2] = (BYTE)cSize;
-            op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
+            U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2);
+            MEM_writeLE24(op, cBlockHeader24);
            cSize += 3;
        }

@ -2446,7 +2436,7 @@ static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
        op += cSize;
    }

-    ZSTD_statsPrint(stats, cctx->params.cParams.searchLength);
+    ZSTD_statsPrint(stats, cctx->params.cParams.searchLength);   /* debug only */
    return op-ostart;
 }

@ -2760,11 +2750,9 @@ size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
    /* frame epilogue */
    if (dstCapacity < 3) return ERROR(dstSize_tooSmall);
    {   U32 const checksum = cctx->params.fParams.checksumFlag ?
-                             (U32)((XXH64_digest(&cctx->xxhState) >> 11) & ((1<<22)-1)) :
+                             (U32)(XXH64_digest(&cctx->xxhState) >> 11) :
                             0;
-        op[0] = (BYTE)((bt_end<<6) + (checksum>>16));
-        op[1] = (BYTE)(checksum>>8);
-        op[2] = (BYTE)checksum;
+        MEM_writeLE24(op, (U32)bt_end + (checksum << 2));
    }

    cctx->stage = 0;  /* return to "created but not init" status */
--- a/lib/decompress/zstd_decompress.c
+++ b/lib/decompress/zstd_decompress.c
@ -435,18 +435,15 @@ typedef struct
 *   Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
 {
-    const BYTE* const in = (const BYTE* const)src;
-    U32 cSize;
-
    if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
-
-    bpPtr->blockType = (blockType_t)((*in) >> 6);
-    cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
-    bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
-
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 2;
+        bpPtr->blockType = (blockType_t)(cBlockHeader & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
        if (bpPtr->blockType == bt_end) return 0;
        if (bpPtr->blockType == bt_rle) return 1;
        return cSize;
+    }
 }


@ -890,7 +887,6 @@ static size_t ZSTD_decompressSequences(

    /* last literal segment */
    {   size_t const lastLLSize = litEnd - litPtr;
-        //if (litPtr > litEnd) return ERROR(corruption_detected);   /* too many literals already used */
        if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall);
        memcpy(op, litPtr, lastLLSize);
        op += lastLLSize;
@ -1008,6 +1004,12 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
        case bt_end :
            /* end of frame */
            if (remainingSize) return ERROR(srcSize_wrong);
+            if (dctx->fParams.checksumFlag) {
+                U64 const h64 = XXH64_digest(&dctx->xxhState);
+                U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
+                U32 const check32 = MEM_readLE24(src) >> 2;
+                if (check32 != h32) return ERROR(checksum_wrong);
+            }
            decodedSize = 0;
            break;
        default:
@ -1136,8 +1138,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
                if (dctx->fParams.checksumFlag) {
                    U64 const h64 = XXH64_digest(&dctx->xxhState);
                    U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
-                    const BYTE* const ip = (const BYTE*)src;
-                    U32 const check32 = ip[2] + (ip[1] << 8) + ((ip[0] & 0x3F) << 16);
+                    U32 const check32 = MEM_readLE24(src) >> 2;
                    if (check32 != h32) return ERROR(checksum_wrong);
                }
                dctx->expected = 0;
--- a/zstd_compression_format.md
+++ b/zstd_compression_format.md
@ -16,7 +16,7 @@ Distribution of this document is unlimited.

 ### Version

-0.1.2 (15/07/16)
+0.2.0 (22/07/16)


 Introduction
@ -76,7 +76,7 @@ allowing streaming operations.
 General Structure of Zstandard Frame format
 -------------------------------------------

-| MagicNb |  Frame Header | Block | (More blocks) | EndMark |
+| MagicNb |  Frame Header | Block | [More blocks] | EndMark |
 |:-------:|:-------------:| ----- | ------------- | ------- |
 | 4 bytes |  2-14 bytes   |       |               | 3 bytes |

@ -135,7 +135,7 @@ delivering the final decompressed result as if it was a single content.
 Frame Header
 -------------

-| FHD     | (WD)      | (dictID)  | (Content Size) |
+| FHD     | [WD]      | [dictID]  | [Content Size] |
 | ------- | --------- | --------- |:--------------:|
 | 1 byte  | 0-1 byte  | 0-4 bytes |  0 - 8 bytes   |

@ -317,9 +317,9 @@ Data Blocks

 __Block Header__

-This field uses 3-bytes, format is __big-endian__.
+This field uses 3-bytes, format is __little-endian__.

-The 2 highest bits represent the `block type`,
+The 2 lowest bits represent the `block type`,
 while the remaining 22 bits represent the (compressed) block size.

 There are 4 block types :
@ -424,7 +424,7 @@ All literals are regrouped in the first part of the block.
 They can be decoded first, and then copied during sequence operations,
 or they can be decoded on the flow, as needed by sequence commands.

-| Header | (Tree Description) | Stream1 | (Stream2) | (Stream3) | (Stream4) |
+| Header | [Tree Description] | Stream1 | [Stream2] | [Stream3] | [Stream4] |
 | ------ | ------------------ | ------- | --------- | --------- | --------- |

 Literals can be compressed, or uncompressed.
@ -437,7 +437,7 @@ Header is in charge of describing how literals are packed.
 It's a byte-aligned variable-size bitfield, ranging from 1 to 5 bytes,
 using big-endian convention.

-| BlockType | sizes format | (compressed size) | regenerated size |
+| BlockType | sizes format | [compressed size] | regenerated size |
 | --------- | ------------ | ----------------- | ---------------- |
 |   2 bits  |  1 - 2 bits  |    0 - 18 bits    |    5 - 20 bits   |

@ -723,7 +723,7 @@ The Sequences section starts by a header,
 followed by optional Probability tables for each symbol type,
 followed by the bitstream.

-| Header | (LitLengthTable) | (OffsetTable) | (MatchLengthTable) | bitStream |
+| Header | [LitLengthTable] | [OffsetTable] | [MatchLengthTable] | bitStream |
 | ------ | ---------------- | ------------- | ------------------ | --------- |

 To decode the Sequence section, it's required to know its size.
@ -1165,6 +1165,7 @@ __Content__ : Where the actual dictionary content is.

 Version changes
 ---------------
+- 0.2.0 : numerous format adjustments for zstd v0.8
 - 0.1.2 : limit huffman tree depth to 11 bits
 - 0.1.1 : reserved dictID ranges
 - 0.1.0 : initial release