lz4/examples/dictionaryRandomAccess.c

// LZ4 API example : Dictionary Random Access

#if defined(_MSC_VER) && (_MSC_VER <= 1800)  /* Visual Studio <= 2013 */
#  define _CRT_SECURE_NO_WARNINGS
#  define snprintf sprintf_s
#endif
#include "lz4.h"

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>

#define MIN(x, y)  ((x) < (y) ? (x) : (y))

enum {
    BLOCK_BYTES = 1024,  /* 1 KiB of uncompressed data in a block */
    DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */
    MAX_BLOCKS = 1024 /* For simplicity of implementation */
};

/**
 * Magic bytes for this test case.
 * This is not a great magic number because it is a common word in ASCII.
 * However, it is important to have some versioning system in your format.
 */
const char kTestMagic[] = { 'T', 'E', 'S', 'T' };


void write_int(FILE* fp, int i) {
    size_t written = fwrite(&i, sizeof(i), 1, fp);
    if (written != 1) { exit(10); }
}

void write_bin(FILE* fp, const void* array, size_t arrayBytes) {
    size_t written = fwrite(array, 1, arrayBytes, fp);
    if (written != arrayBytes) { exit(11); }
}

void read_int(FILE* fp, int* i) {
    size_t read = fread(i, sizeof(*i), 1, fp);
    if (read != 1) { exit(12); }
}

size_t read_bin(FILE* fp, void* array, size_t arrayBytes) {
    size_t read = fread(array, 1, arrayBytes, fp);
    if (ferror(fp)) { exit(12); }
    return read;
}

void seek_bin(FILE* fp, long offset, int origin) {
    if (fseek(fp, offset, origin)) { exit(14); }
}


void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize)
{
    LZ4_stream_t lz4Stream_body;
    LZ4_stream_t* lz4Stream = &lz4Stream_body;

    char inpBuf[BLOCK_BYTES];
    int offsets[MAX_BLOCKS];
    int *offsetsEnd = offsets;


    LZ4_initStream(lz4Stream, sizeof(*lz4Stream));

    /* Write header magic */
    write_bin(outFp, kTestMagic, sizeof(kTestMagic));

    *offsetsEnd++ = sizeof(kTestMagic);
    /* Write compressed data blocks.  Each block contains BLOCK_BYTES of plain
       data except possibly the last. */
    for(;;) {
        const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES);
        if(0 == inpBytes) {
            break;
        }

        /* Forget previously compressed data and load the dictionary */
        LZ4_loadDict(lz4Stream, (const char*) dict, dictSize);
        {
            char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
            const int cmpBytes = LZ4_compress_fast_continue(
                lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1);
            if(cmpBytes <= 0) { exit(1); }
            write_bin(outFp, cmpBuf, (size_t)cmpBytes);
            /* Keep track of the offsets */
            *offsetsEnd = *(offsetsEnd - 1) + cmpBytes;
            ++offsetsEnd;
        }
        if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); }
    }
    /* Write the tailing jump table */
    {
        int *ptr = offsets;
        while (ptr != offsetsEnd) {
            write_int(outFp, *ptr++);
        }
        write_int(outFp, (int) (offsetsEnd - offsets));
    }
}


void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length)
{
    LZ4_streamDecode_t lz4StreamDecode_body;
    LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;

    /* The blocks [currentBlock, endBlock) contain the data we want */
    int currentBlock = offset / BLOCK_BYTES;
    int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1;

    char decBuf[BLOCK_BYTES];
    int offsets[MAX_BLOCKS];

    /* Special cases */
    if (length == 0) { return; }

    /* Read the magic bytes */
    {
        char magic[sizeof(kTestMagic)];
        size_t read = read_bin(inpFp, magic, sizeof(magic));
        if (read != sizeof(magic)) { exit(1); }
        if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); }
    }

    /* Read the offsets tail */
    {
        int numOffsets;
        int block;
        int *offsetsPtr = offsets;
        seek_bin(inpFp, -4, SEEK_END);
        read_int(inpFp, &numOffsets);
        if (numOffsets <= endBlock) { exit(3); }
        seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END);
        for (block = 0; block <= endBlock; ++block) {
            read_int(inpFp, offsetsPtr++);
        }
    }
    /* Seek to the first block to read */
    seek_bin(inpFp, offsets[currentBlock], SEEK_SET);
    offset = offset % BLOCK_BYTES;

    /* Start decoding */
    for(; currentBlock < endBlock; ++currentBlock) {
        char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];
        /* The difference in offsets is the size of the block */
        int  cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock];
        {
            const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes);
            if(read != (size_t)cmpBytes) { exit(4); }
        }

        /* Load the dictionary */
        LZ4_setStreamDecode(lz4StreamDecode, (const char*) dict, dictSize);
        {
            const int decBytes = LZ4_decompress_safe_continue(
                lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES);
            if(decBytes <= 0) { exit(5); }
            {
                /* Write out the part of the data we care about */
                int blockLength = MIN(length, (decBytes - offset));
                write_bin(outFp, decBuf + offset, (size_t)blockLength);
                offset = 0;
                length -= blockLength;
            }
        }
    }
}


int compare(FILE* fp0, FILE* fp1, int length)
{
    int result = 0;

    while(0 == result) {
        char b0[4096];
        char b1[4096];
        const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0)));
        const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1)));

        result = (int) r0 - (int) r1;

        if(0 == r0 || 0 == r1) {
            break;
        }
        if(0 == result) {
            result = memcmp(b0, b1, r0);
        }
        length -= r0;
    }

    return result;
}


int main(int argc, char* argv[])
{
    char inpFilename[256] = { 0 };
    char lz4Filename[256] = { 0 };
    char decFilename[256] = { 0 };
    char dictFilename[256] = { 0 };
    int offset;
    int length;
    char dict[DICTIONARY_BYTES];
    int dictSize;

    if(argc < 5) {
        printf("Usage: %s input dictionary offset length", argv[0]);
        return 0;
    }

    snprintf(inpFilename, 256, "%s", argv[1]);
    snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES);
    snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES);
    snprintf(dictFilename, 256, "%s", argv[2]);
    offset = atoi(argv[3]);
    length = atoi(argv[4]);

    printf("inp    = [%s]\n", inpFilename);
    printf("lz4    = [%s]\n", lz4Filename);
    printf("dec    = [%s]\n", decFilename);
    printf("dict   = [%s]\n", dictFilename);
    printf("offset = [%d]\n", offset);
    printf("length = [%d]\n", length);

    /* Load dictionary */
    {
        FILE* dictFp = fopen(dictFilename, "rb");
        dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES);
        fclose(dictFp);
    }

    /* compress */
    {
        FILE* inpFp = fopen(inpFilename, "rb");
        FILE* outFp = fopen(lz4Filename, "wb");

        printf("compress : %s -> %s\n", inpFilename, lz4Filename);
        test_compress(outFp, inpFp, dict, dictSize);
        printf("compress : done\n");

        fclose(outFp);
        fclose(inpFp);
    }

    /* decompress */
    {
        FILE* inpFp = fopen(lz4Filename, "rb");
        FILE* outFp = fopen(decFilename, "wb");

        printf("decompress : %s -> %s\n", lz4Filename, decFilename);
        test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length);
        printf("decompress : done\n");

        fclose(outFp);
        fclose(inpFp);
    }

    /* verify */
    {
        FILE* inpFp = fopen(inpFilename, "rb");
        FILE* decFp = fopen(decFilename, "rb");
        seek_bin(inpFp, offset, SEEK_SET);

        printf("verify : %s <-> %s\n", inpFilename, decFilename);
        const int cmp = compare(inpFp, decFp, length);
        if(0 == cmp) {
            printf("verify : OK\n");
        } else {
            printf("verify : NG\n");
        }

        fclose(decFp);
        fclose(inpFp);
    }

    return 0;
}
Add dictionary random access example 2016-11-10 00:20:47 +00:00			`// LZ4 API example : Dictionary Random Access`

minor updates to examples see https://github.com/lz4/lz4/commit/810e2ca27b3561e0f6bfa7a88e0fde6faf807064#commitcomment-25810887 2017-11-25 01:18:46 +00:00			`#if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */`
Add dictionary random access example 2016-11-10 00:20:47 +00:00			`# define _CRT_SECURE_NO_WARNINGS`
			`# define snprintf sprintf_s`
			`#endif`
			`#include "lz4.h"`

			`#include <stdio.h>`
			`#include <stdint.h>`
			`#include <stdlib.h>`
			`#include <string.h>`

created LZ4_initStream() - promoted LZ4_resetStream_fast() to stable - moved LZ4_resetStream() into deprecate, but without triggering a compiler warning - update all sources to no longer rely on LZ4_resetStream() note : LZ4_initStream() proposal is slightly different : it's able to initialize any buffer, provided that it's large enough. To this end, it accepts a void, and returns an LZ4_stream_t. 2019-04-05 19:54:13 +00:00			`#define MIN(x, y) ((x) < (y) ? (x) : (y))`
Add dictionary random access example 2016-11-10 00:20:47 +00:00
			`enum {`
			`BLOCK_BYTES = 1024, /* 1 KiB of uncompressed data in a block */`
			`DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */`
			`MAX_BLOCKS = 1024 /* For simplicity of implementation */`
			`};`

			`/**`
			`* Magic bytes for this test case.`
			`* This is not a great magic number because it is a common word in ASCII.`
			`* However, it is important to have some versioning system in your format.`
			`*/`
			`const char kTestMagic[] = { 'T', 'E', 'S', 'T' };`


			`void write_int(FILE* fp, int i) {`
			`size_t written = fwrite(&i, sizeof(i), 1, fp);`
			`if (written != 1) { exit(10); }`
			`}`

			`void write_bin(FILE* fp, const void* array, size_t arrayBytes) {`
			`size_t written = fwrite(array, 1, arrayBytes, fp);`
			`if (written != arrayBytes) { exit(11); }`
			`}`

			`void read_int(FILE* fp, int* i) {`
			`size_t read = fread(i, sizeof(*i), 1, fp);`
			`if (read != 1) { exit(12); }`
			`}`

			`size_t read_bin(FILE* fp, void* array, size_t arrayBytes) {`
			`size_t read = fread(array, 1, arrayBytes, fp);`
			`if (ferror(fp)) { exit(12); }`
			`return read;`
			`}`

			`void seek_bin(FILE* fp, long offset, int origin) {`
			`if (fseek(fp, offset, origin)) { exit(14); }`
			`}`


			`void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize)`
			`{`
			`LZ4_stream_t lz4Stream_body;`
			`LZ4_stream_t* lz4Stream = &lz4Stream_body;`

			`char inpBuf[BLOCK_BYTES];`
			`int offsets[MAX_BLOCKS];`
			`int *offsetsEnd = offsets;`


created LZ4_initStream() - promoted LZ4_resetStream_fast() to stable - moved LZ4_resetStream() into deprecate, but without triggering a compiler warning - update all sources to no longer rely on LZ4_resetStream() note : LZ4_initStream() proposal is slightly different : it's able to initialize any buffer, provided that it's large enough. To this end, it accepts a void, and returns an LZ4_stream_t. 2019-04-05 19:54:13 +00:00			`LZ4_initStream(lz4Stream, sizeof(*lz4Stream));`
Add dictionary random access example 2016-11-10 00:20:47 +00:00
			`/* Write header magic */`
			`write_bin(outFp, kTestMagic, sizeof(kTestMagic));`

			`*offsetsEnd++ = sizeof(kTestMagic);`
			`/* Write compressed data blocks. Each block contains BLOCK_BYTES of plain`
			`data except possibly the last. */`
			`for(;;) {`
			`const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES);`
			`if(0 == inpBytes) {`
			`break;`
			`}`

			`/* Forget previously compressed data and load the dictionary */`
Add cast operator for compatibility with C++ 2021-09-06 16:02:23 +00:00			`LZ4_loadDict(lz4Stream, (const char*) dict, dictSize);`
Add dictionary random access example 2016-11-10 00:20:47 +00:00			`{`
			`char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];`
			`const int cmpBytes = LZ4_compress_fast_continue(`
			`lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1);`
			`if(cmpBytes <= 0) { exit(1); }`
			`write_bin(outFp, cmpBuf, (size_t)cmpBytes);`
			`/* Keep track of the offsets */`
			`offsetsEnd = (offsetsEnd - 1) + cmpBytes;`
			`++offsetsEnd;`
			`}`
			`if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); }`
			`}`
			`/* Write the tailing jump table */`
			`{`
			`int *ptr = offsets;`
			`while (ptr != offsetsEnd) {`
			`write_int(outFp, *ptr++);`
			`}`
Fix -Wshorten-64-to-32 warning 2021-05-27 08:24:00 +00:00			`write_int(outFp, (int) (offsetsEnd - offsets));`
Add dictionary random access example 2016-11-10 00:20:47 +00:00			`}`
			`}`


			`void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length)`
			`{`
			`LZ4_streamDecode_t lz4StreamDecode_body;`
			`LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body;`

			`/* The blocks [currentBlock, endBlock) contain the data we want */`
			`int currentBlock = offset / BLOCK_BYTES;`
			`int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1;`

			`char decBuf[BLOCK_BYTES];`
			`int offsets[MAX_BLOCKS];`

			`/* Special cases */`
			`if (length == 0) { return; }`

			`/* Read the magic bytes */`
			`{`
			`char magic[sizeof(kTestMagic)];`
			`size_t read = read_bin(inpFp, magic, sizeof(magic));`
			`if (read != sizeof(magic)) { exit(1); }`
			`if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); }`
			`}`

			`/* Read the offsets tail */`
			`{`
			`int numOffsets;`
			`int block;`
			`int *offsetsPtr = offsets;`
			`seek_bin(inpFp, -4, SEEK_END);`
			`read_int(inpFp, &numOffsets);`
			`if (numOffsets <= endBlock) { exit(3); }`
			`seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END);`
			`for (block = 0; block <= endBlock; ++block) {`
			`read_int(inpFp, offsetsPtr++);`
			`}`
			`}`
			`/* Seek to the first block to read */`
			`seek_bin(inpFp, offsets[currentBlock], SEEK_SET);`
			`offset = offset % BLOCK_BYTES;`

			`/* Start decoding */`
			`for(; currentBlock < endBlock; ++currentBlock) {`
			`char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)];`
			`/* The difference in offsets is the size of the block */`
			`int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock];`
			`{`
			`const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes);`
			`if(read != (size_t)cmpBytes) { exit(4); }`
			`}`

			`/* Load the dictionary */`
Add cast operator for compatibility with C++ 2021-09-06 16:02:23 +00:00			`LZ4_setStreamDecode(lz4StreamDecode, (const char*) dict, dictSize);`
Add dictionary random access example 2016-11-10 00:20:47 +00:00			`{`
			`const int decBytes = LZ4_decompress_safe_continue(`
			`lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES);`
			`if(decBytes <= 0) { exit(5); }`
			`{`
			`/* Write out the part of the data we care about */`
			`int blockLength = MIN(length, (decBytes - offset));`
			`write_bin(outFp, decBuf + offset, (size_t)blockLength);`
			`offset = 0;`
			`length -= blockLength;`
			`}`
			`}`
			`}`
			`}`


			`int compare(FILE* fp0, FILE* fp1, int length)`
			`{`
			`int result = 0;`

			`while(0 == result) {`
			`char b0[4096];`
			`char b1[4096];`
			`const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0)));`
			`const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1)));`

			`result = (int) r0 - (int) r1;`

			`if(0 == r0 \|\| 0 == r1) {`
			`break;`
			`}`
			`if(0 == result) {`
			`result = memcmp(b0, b1, r0);`
			`}`
			`length -= r0;`
			`}`

			`return result;`
			`}`


			`int main(int argc, char* argv[])`
			`{`
			`char inpFilename[256] = { 0 };`
			`char lz4Filename[256] = { 0 };`
			`char decFilename[256] = { 0 };`
			`char dictFilename[256] = { 0 };`
			`int offset;`
			`int length;`
			`char dict[DICTIONARY_BYTES];`
			`int dictSize;`

			`if(argc < 5) {`
			`printf("Usage: %s input dictionary offset length", argv[0]);`
			`return 0;`
			`}`

			`snprintf(inpFilename, 256, "%s", argv[1]);`
			`snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES);`
			`snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES);`
			`snprintf(dictFilename, 256, "%s", argv[2]);`
			`offset = atoi(argv[3]);`
			`length = atoi(argv[4]);`

			`printf("inp = [%s]\n", inpFilename);`
			`printf("lz4 = [%s]\n", lz4Filename);`
			`printf("dec = [%s]\n", decFilename);`
			`printf("dict = [%s]\n", dictFilename);`
			`printf("offset = [%d]\n", offset);`
			`printf("length = [%d]\n", length);`

			`/* Load dictionary */`
			`{`
			`FILE* dictFp = fopen(dictFilename, "rb");`
			`dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES);`
			`fclose(dictFp);`
			`}`

			`/* compress */`
			`{`
			`FILE* inpFp = fopen(inpFilename, "rb");`
			`FILE* outFp = fopen(lz4Filename, "wb");`

			`printf("compress : %s -> %s\n", inpFilename, lz4Filename);`
			`test_compress(outFp, inpFp, dict, dictSize);`
			`printf("compress : done\n");`

			`fclose(outFp);`
			`fclose(inpFp);`
			`}`

			`/* decompress */`
			`{`
			`FILE* inpFp = fopen(lz4Filename, "rb");`
			`FILE* outFp = fopen(decFilename, "wb");`

			`printf("decompress : %s -> %s\n", lz4Filename, decFilename);`
			`test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length);`
			`printf("decompress : done\n");`

			`fclose(outFp);`
			`fclose(inpFp);`
			`}`

			`/* verify */`
			`{`
			`FILE* inpFp = fopen(inpFilename, "rb");`
			`FILE* decFp = fopen(decFilename, "rb");`
			`seek_bin(inpFp, offset, SEEK_SET);`

			`printf("verify : %s <-> %s\n", inpFilename, decFilename);`
			`const int cmp = compare(inpFp, decFp, length);`
			`if(0 == cmp) {`
			`printf("verify : OK\n");`
			`} else {`
			`printf("verify : NG\n");`
			`}`

			`fclose(decFp);`
			`fclose(inpFp);`
			`}`

			`return 0;`
			`}`