ac58c8d720
* All copyright lines now have -2020 instead of -present * All copyright lines include "Facebook, Inc" * All licenses are now standardized The copyright in `threading.{h,c}` is not changed because it comes from zstdmt. The copyright and license of `divsufsort.{h,c}` is not changed.
261 lines
7.8 KiB
C
261 lines
7.8 KiB
C
/*
|
|
* Copyright (c) 2017-2020, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under both the BSD-style license (found in the
|
|
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
* in the COPYING file in the root directory of this source tree).
|
|
* You may select, at your option, one of the above-listed licenses.
|
|
*/
|
|
|
|
#include "seqgen.h"
|
|
#include "mem.h"
|
|
#include <string.h>
|
|
|
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
|
|
|
static const size_t kMatchBytes = 128;
|
|
|
|
#define SEQ_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
|
static BYTE SEQ_randByte(unsigned* src)
|
|
{
|
|
static const U32 prime1 = 2654435761U;
|
|
static const U32 prime2 = 2246822519U;
|
|
U32 rand32 = *src;
|
|
rand32 *= prime1;
|
|
rand32 ^= prime2;
|
|
rand32 = SEQ_rotl32(rand32, 13);
|
|
*src = rand32;
|
|
return (BYTE)(rand32 >> 5);
|
|
}
|
|
|
|
SEQ_stream SEQ_initStream(unsigned seed)
|
|
{
|
|
SEQ_stream stream;
|
|
stream.state = 0;
|
|
XXH64_reset(&stream.xxh, 0);
|
|
stream.seed = seed;
|
|
return stream;
|
|
}
|
|
|
|
/* Generates a single guard byte, then match length + 1 of a different byte,
|
|
* then another guard byte.
|
|
*/
|
|
static size_t SEQ_gen_matchLength(SEQ_stream* stream, unsigned value,
|
|
SEQ_outBuffer* out)
|
|
{
|
|
typedef enum {
|
|
ml_first_byte = 0,
|
|
ml_match_bytes,
|
|
ml_last_byte,
|
|
} ml_state;
|
|
BYTE* const ostart = (BYTE*)out->dst;
|
|
BYTE* const oend = ostart + out->size;
|
|
BYTE* op = ostart + out->pos;
|
|
|
|
switch ((ml_state)stream->state) {
|
|
case ml_first_byte:
|
|
/* Generate a single byte and pick a different byte for the match */
|
|
if (op >= oend) {
|
|
stream->bytesLeft = 1;
|
|
break;
|
|
}
|
|
*op = SEQ_randByte(&stream->seed) & 0xFF;
|
|
do {
|
|
stream->saved = SEQ_randByte(&stream->seed) & 0xFF;
|
|
} while (*op == stream->saved);
|
|
++op;
|
|
/* State transition */
|
|
stream->state = ml_match_bytes;
|
|
stream->bytesLeft = value + 1;
|
|
/* fall-through */
|
|
case ml_match_bytes: {
|
|
/* Copy matchLength + 1 bytes to the output buffer */
|
|
size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
|
|
if (setLength > 0) {
|
|
memset(op, stream->saved, setLength);
|
|
op += setLength;
|
|
stream->bytesLeft -= setLength;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
/* State transition */
|
|
stream->state = ml_last_byte;
|
|
}
|
|
/* fall-through */
|
|
case ml_last_byte:
|
|
/* Generate a single byte and pick a different byte for the match */
|
|
if (op >= oend) {
|
|
stream->bytesLeft = 1;
|
|
break;
|
|
}
|
|
do {
|
|
*op = SEQ_randByte(&stream->seed) & 0xFF;
|
|
} while (*op == stream->saved);
|
|
++op;
|
|
/* State transition */
|
|
/* fall-through */
|
|
default:
|
|
stream->state = 0;
|
|
stream->bytesLeft = 0;
|
|
break;
|
|
}
|
|
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
|
out->pos = op - ostart;
|
|
return stream->bytesLeft;
|
|
}
|
|
|
|
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
|
|
* Generates literal length - kMatchBytes random bytes < 128.
|
|
* Generates another kMatchBytes using the saved seed to generate a match.
|
|
* This way the match is easy to find for the compressors.
|
|
*/
|
|
static size_t SEQ_gen_litLength(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
|
|
{
|
|
typedef enum {
|
|
ll_start = 0,
|
|
ll_run_bytes,
|
|
ll_literals,
|
|
ll_run_match,
|
|
} ll_state;
|
|
BYTE* const ostart = (BYTE*)out->dst;
|
|
BYTE* const oend = ostart + out->size;
|
|
BYTE* op = ostart + out->pos;
|
|
|
|
switch ((ll_state)stream->state) {
|
|
case ll_start:
|
|
stream->state = ll_run_bytes;
|
|
stream->saved = stream->seed;
|
|
stream->bytesLeft = MIN(kMatchBytes, value);
|
|
/* fall-through */
|
|
case ll_run_bytes:
|
|
while (stream->bytesLeft > 0 && op < oend) {
|
|
*op++ = SEQ_randByte(&stream->seed) | 0x80;
|
|
--stream->bytesLeft;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
/* State transition */
|
|
stream->state = ll_literals;
|
|
stream->bytesLeft = value - MIN(kMatchBytes, value);
|
|
/* fall-through */
|
|
case ll_literals:
|
|
while (stream->bytesLeft > 0 && op < oend) {
|
|
*op++ = SEQ_randByte(&stream->seed) & 0x7F;
|
|
--stream->bytesLeft;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
/* State transition */
|
|
stream->state = ll_run_match;
|
|
stream->bytesLeft = MIN(kMatchBytes, value);
|
|
/* fall-through */
|
|
case ll_run_match: {
|
|
while (stream->bytesLeft > 0 && op < oend) {
|
|
*op++ = SEQ_randByte(&stream->saved) | 0x80;
|
|
--stream->bytesLeft;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
}
|
|
/* fall-through */
|
|
default:
|
|
stream->state = 0;
|
|
stream->bytesLeft = 0;
|
|
break;
|
|
}
|
|
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
|
out->pos = op - ostart;
|
|
return stream->bytesLeft;
|
|
}
|
|
|
|
/* Saves the current seed then generates kMatchBytes random bytes >= 128.
|
|
* Generates offset - kMatchBytes of zeros to get a large offset without
|
|
* polluting the hash tables.
|
|
* Generates another kMatchBytes using the saved seed to generate a with the
|
|
* required offset.
|
|
*/
|
|
static size_t SEQ_gen_offset(SEQ_stream* stream, unsigned value, SEQ_outBuffer* out)
|
|
{
|
|
typedef enum {
|
|
of_start = 0,
|
|
of_run_bytes,
|
|
of_offset,
|
|
of_run_match,
|
|
} of_state;
|
|
BYTE* const ostart = (BYTE*)out->dst;
|
|
BYTE* const oend = ostart + out->size;
|
|
BYTE* op = ostart + out->pos;
|
|
|
|
switch ((of_state)stream->state) {
|
|
case of_start:
|
|
stream->state = of_run_bytes;
|
|
stream->saved = stream->seed;
|
|
stream->bytesLeft = MIN(value, kMatchBytes);
|
|
/* fall-through */
|
|
case of_run_bytes: {
|
|
while (stream->bytesLeft > 0 && op < oend) {
|
|
*op++ = SEQ_randByte(&stream->seed) | 0x80;
|
|
--stream->bytesLeft;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
/* State transition */
|
|
stream->state = of_offset;
|
|
stream->bytesLeft = value - MIN(value, kMatchBytes);
|
|
}
|
|
/* fall-through */
|
|
case of_offset: {
|
|
/* Copy matchLength + 1 bytes to the output buffer */
|
|
size_t const setLength = MIN(stream->bytesLeft, (size_t)(oend - op));
|
|
if (setLength > 0) {
|
|
memset(op, 0, setLength);
|
|
op += setLength;
|
|
stream->bytesLeft -= setLength;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
/* State transition */
|
|
stream->state = of_run_match;
|
|
stream->bytesLeft = MIN(value, kMatchBytes);
|
|
}
|
|
/* fall-through */
|
|
case of_run_match: {
|
|
while (stream->bytesLeft > 0 && op < oend) {
|
|
*op++ = SEQ_randByte(&stream->saved) | 0x80;
|
|
--stream->bytesLeft;
|
|
}
|
|
if (stream->bytesLeft > 0)
|
|
break;
|
|
}
|
|
/* fall-through */
|
|
default:
|
|
stream->state = 0;
|
|
stream->bytesLeft = 0;
|
|
break;
|
|
}
|
|
XXH64_update(&stream->xxh, ostart + out->pos, (op - ostart) - out->pos);
|
|
out->pos = op - ostart;
|
|
return stream->bytesLeft;
|
|
}
|
|
|
|
/* Returns the number of bytes left to generate.
|
|
* Must pass the same type/value until it returns 0.
|
|
*/
|
|
size_t SEQ_gen(SEQ_stream* stream, SEQ_gen_type type, unsigned value, SEQ_outBuffer* out)
|
|
{
|
|
switch (type) {
|
|
case SEQ_gen_ml: return SEQ_gen_matchLength(stream, value, out);
|
|
case SEQ_gen_ll: return SEQ_gen_litLength(stream, value, out);
|
|
case SEQ_gen_of: return SEQ_gen_offset(stream, value, out);
|
|
case SEQ_gen_max: /* fall-through */
|
|
default: return 0;
|
|
}
|
|
}
|
|
|
|
/* Returns the xxhash of the data produced so far */
|
|
XXH64_hash_t SEQ_digest(SEQ_stream const* stream)
|
|
{
|
|
return XXH64_digest(&stream->xxh);
|
|
}
|