2017-08-18 23:52:05 +00:00
/*
2017-09-08 07:09:23 +00:00
* Copyright ( c ) 2017 - present , Yann Collet , Facebook , Inc .
2017-02-22 23:59:15 +00:00
* All rights reserved .
*
2017-08-18 23:52:05 +00:00
* This source code is licensed under both the BSD - style license ( found in the
* LICENSE file in the root directory of this source tree ) and the GPLv2 ( found
* in the COPYING file in the root directory of this source tree ) .
2017-09-08 07:09:23 +00:00
* You may select , at your option , one of the above - listed licenses .
2017-02-22 23:59:15 +00:00
*/
# include <limits.h>
# include <math.h>
# include <stddef.h>
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
2017-11-30 03:11:12 +00:00
# include "util.h"
2017-02-22 23:59:15 +00:00
# include "zstd.h"
# include "zstd_internal.h"
# include "mem.h"
2017-06-13 21:19:35 +00:00
# define ZDICT_STATIC_LINKING_ONLY
2017-06-13 18:54:43 +00:00
# include "zdict.h"
2017-02-22 23:59:15 +00:00
// Direct access to internal compression functions is required
# include "zstd_compress.c"
# define XXH_STATIC_LINKING_ONLY
# include "xxhash.h" /* XXH64 */
# ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
# endif
# ifndef MAX_PATH
# ifdef PATH_MAX
# define MAX_PATH PATH_MAX
# else
# define MAX_PATH 256
# endif
# endif
/*-************************************
* DISPLAY Macros
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
# define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
2017-10-07 22:19:52 +00:00
static U32 g_displayLevel = 2 ;
2017-02-22 23:59:15 +00:00
# define DISPLAYUPDATE(...) \
do { \
2017-11-30 03:11:12 +00:00
if ( ( UTIL_clockSpanMicro ( g_displayClock ) > g_refreshRate ) | | \
2017-02-22 23:59:15 +00:00
( g_displayLevel > = 4 ) ) { \
2017-11-30 03:11:12 +00:00
g_displayClock = UTIL_getTime ( ) ; \
2017-02-22 23:59:15 +00:00
DISPLAY ( __VA_ARGS__ ) ; \
if ( g_displayLevel > = 4 ) fflush ( stderr ) ; \
} \
} while ( 0 )
2017-11-30 03:11:12 +00:00
static const U64 g_refreshRate = SEC_TO_MICRO / 6 ;
static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER ;
2017-02-22 23:59:15 +00:00
# define CHECKERR(code) \
do { \
if ( ZSTD_isError ( code ) ) { \
DISPLAY ( " Error occurred while generating data: %s \n " , \
ZSTD_getErrorName ( code ) ) ; \
exit ( 1 ) ; \
} \
} while ( 0 )
/*-*******************************************************
* Random function
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static unsigned RAND ( unsigned * src )
{
# define RAND_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static const U32 prime1 = 2654435761U ;
static const U32 prime2 = 2246822519U ;
U32 rand32 = * src ;
rand32 * = prime1 ;
rand32 + = prime2 ;
rand32 = RAND_rotl32 ( rand32 , 13 ) ;
* src = rand32 ;
return RAND_rotl32 ( rand32 , 27 ) ;
# undef RAND_rotl32
}
# define DISTSIZE (8192)
/* Write `size` bytes into `ptr`, all of which are less than or equal to `maxSymb` */
static void RAND_bufferMaxSymb ( U32 * seed , void * ptr , size_t size , int maxSymb )
{
size_t i ;
BYTE * op = ptr ;
for ( i = 0 ; i < size ; i + + ) {
2017-03-20 23:02:24 +00:00
op [ i ] = ( BYTE ) ( RAND ( seed ) % ( maxSymb + 1 ) ) ;
2017-02-22 23:59:15 +00:00
}
}
/* Write `size` random bytes into `ptr` */
static void RAND_buffer ( U32 * seed , void * ptr , size_t size )
{
size_t i ;
BYTE * op = ptr ;
for ( i = 0 ; i + 4 < = size ; i + = 4 ) {
MEM_writeLE32 ( op + i , RAND ( seed ) ) ;
}
for ( ; i < size ; i + + ) {
op [ i ] = RAND ( seed ) & 0xff ;
}
}
/* Write `size` bytes into `ptr` following the distribution `dist` */
static void RAND_bufferDist ( U32 * seed , BYTE * dist , void * ptr , size_t size )
{
size_t i ;
BYTE * op = ptr ;
for ( i = 0 ; i < size ; i + + ) {
op [ i ] = dist [ RAND ( seed ) % DISTSIZE ] ;
}
}
/* Generate a random distribution where the frequency of each symbol follows a
* geometric distribution defined by ` weight `
* ` dist ` should have size at least ` DISTSIZE ` */
static void RAND_genDist ( U32 * seed , BYTE * dist , double weight )
{
size_t i = 0 ;
size_t statesLeft = DISTSIZE ;
2017-03-20 23:02:24 +00:00
BYTE symb = ( BYTE ) ( RAND ( seed ) % 256 ) ;
BYTE step = ( BYTE ) ( ( RAND ( seed ) % 256 ) | 1 ) ; /* force it to be odd so it's relatively prime to 256 */
2017-02-22 23:59:15 +00:00
while ( i < DISTSIZE ) {
size_t states = ( ( size_t ) ( weight * statesLeft ) ) + 1 ;
size_t j ;
for ( j = 0 ; j < states & & i < DISTSIZE ; j + + , i + + ) {
dist [ i ] = symb ;
}
symb + = step ;
statesLeft - = states ;
}
}
/* Generates a random number in the range [min, max) */
static inline U32 RAND_range ( U32 * seed , U32 min , U32 max )
{
return ( RAND ( seed ) % ( max - min ) ) + min ;
}
# define ROUND(x) ((U32)(x + 0.5))
/* Generates a random number in an exponential distribution with mean `mean` */
static double RAND_exp ( U32 * seed , double mean )
{
double const u = RAND ( seed ) / ( double ) UINT_MAX ;
return log ( 1 - u ) * ( - mean ) ;
}
/*-*******************************************************
* Constants and Structs
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
const char * BLOCK_TYPES [ ] = { " raw " , " rle " , " compressed " } ;
# define MAX_DECOMPRESSED_SIZE_LOG 20
# define MAX_DECOMPRESSED_SIZE (1ULL << MAX_DECOMPRESSED_SIZE_LOG)
# define MAX_WINDOW_LOG 22 /* Recommended support is 8MB, so limit to 4MB + mantissa */
# define MIN_SEQ_LEN (3)
2017-10-07 22:19:52 +00:00
# define MAX_NB_SEQ ((ZSTD_BLOCKSIZE_MAX + MIN_SEQ_LEN - 1) / MIN_SEQ_LEN)
2017-02-22 23:59:15 +00:00
BYTE CONTENT_BUFFER [ MAX_DECOMPRESSED_SIZE ] ;
BYTE FRAME_BUFFER [ MAX_DECOMPRESSED_SIZE * 2 ] ;
2017-10-07 22:19:52 +00:00
BYTE LITERAL_BUFFER [ ZSTD_BLOCKSIZE_MAX ] ;
2017-02-22 23:59:15 +00:00
seqDef SEQUENCE_BUFFER [ MAX_NB_SEQ ] ;
2017-10-07 22:19:52 +00:00
BYTE SEQUENCE_LITERAL_BUFFER [ ZSTD_BLOCKSIZE_MAX ] ; /* storeSeq expects a place to copy literals to */
BYTE SEQUENCE_LLCODE [ ZSTD_BLOCKSIZE_MAX ] ;
BYTE SEQUENCE_MLCODE [ ZSTD_BLOCKSIZE_MAX ] ;
BYTE SEQUENCE_OFCODE [ ZSTD_BLOCKSIZE_MAX ] ;
2017-02-22 23:59:15 +00:00
unsigned WKSP [ 1024 ] ;
typedef struct {
size_t contentSize ; /* 0 means unknown (unless contentSize == windowSize == 0) */
unsigned windowSize ; /* contentSize >= windowSize means single segment */
} frameHeader_t ;
/* For repeat modes */
typedef struct {
U32 rep [ ZSTD_REP_NUM ] ;
int hufInit ;
/* the distribution used in the previous block for repeat mode */
BYTE hufDist [ DISTSIZE ] ;
U32 hufTable [ 256 ] ; /* HUF_CElt is an incomplete type */
int fseInit ;
FSE_CTable offcodeCTable [ FSE_CTABLE_SIZE_U32 ( OffFSELog , MaxOff ) ] ;
FSE_CTable matchlengthCTable [ FSE_CTABLE_SIZE_U32 ( MLFSELog , MaxML ) ] ;
FSE_CTable litlengthCTable [ FSE_CTABLE_SIZE_U32 ( LLFSELog , MaxLL ) ] ;
/* Symbols that were present in the previous distribution, for use with
* set_repeat */
BYTE litlengthSymbolSet [ 36 ] ;
BYTE offsetSymbolSet [ 29 ] ;
BYTE matchlengthSymbolSet [ 53 ] ;
} cblockStats_t ;
typedef struct {
void * data ;
void * dataStart ;
void * dataEnd ;
void * src ;
void * srcStart ;
void * srcEnd ;
frameHeader_t header ;
cblockStats_t stats ;
cblockStats_t oldStats ; /* so they can be rolled back if uncompressible */
} frame_t ;
2017-06-19 21:02:27 +00:00
typedef struct {
int useDict ;
U32 dictID ;
2017-06-19 23:23:00 +00:00
size_t dictContentSize ;
2017-06-19 21:02:27 +00:00
BYTE * dictContent ;
2017-06-19 23:05:05 +00:00
} dictInfo ;
2017-09-12 21:29:39 +00:00
typedef enum {
gt_frame = 0 , /* generate frames */
gt_block , /* generate compressed blocks without block/frame headers */
} genType_e ;
2017-09-13 22:07:03 +00:00
/*-*******************************************************
* Global variables ( set from command line )
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
U32 g_maxDecompressedSizeLog = MAX_DECOMPRESSED_SIZE_LOG ; /* <= 20 */
2017-10-07 22:19:52 +00:00
U32 g_maxBlockSize = ZSTD_BLOCKSIZE_MAX ; /* <= 128 KB */
2017-09-13 22:07:03 +00:00
2017-02-22 23:59:15 +00:00
/*-*******************************************************
* Generator Functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2017-02-23 21:06:50 +00:00
struct {
int contentSize ; /* force the content size to be present */
} opts ; /* advanced options on generation */
2017-02-22 23:59:15 +00:00
/* Generate and write a random frame header */
2017-06-19 23:23:00 +00:00
static void writeFrameHeader ( U32 * seed , frame_t * frame , dictInfo info )
2017-02-22 23:59:15 +00:00
{
BYTE * const op = frame - > data ;
size_t pos = 0 ;
2017-02-23 21:06:50 +00:00
frameHeader_t fh ;
2017-02-22 23:59:15 +00:00
BYTE windowByte = 0 ;
int singleSegment = 0 ;
int contentSizeFlag = 0 ;
int fcsCode = 0 ;
2017-02-23 21:06:50 +00:00
memset ( & fh , 0 , sizeof ( fh ) ) ;
2017-02-22 23:59:15 +00:00
/* generate window size */
{
/* Follow window algorithm from specification */
int const exponent = RAND ( seed ) % ( MAX_WINDOW_LOG - 10 ) ;
int const mantissa = RAND ( seed ) % 8 ;
2017-03-20 23:02:24 +00:00
windowByte = ( BYTE ) ( ( exponent < < 3 ) | mantissa ) ;
2017-02-23 21:06:50 +00:00
fh . windowSize = ( 1U < < ( exponent + 10 ) ) ;
fh . windowSize + = fh . windowSize / 8 * mantissa ;
2017-02-22 23:59:15 +00:00
}
{
/* Generate random content size */
size_t highBit ;
2017-09-13 22:07:03 +00:00
if ( RAND ( seed ) & 7 & & g_maxDecompressedSizeLog > 7 ) {
2017-02-22 23:59:15 +00:00
/* do content of at least 128 bytes */
2017-09-13 22:07:03 +00:00
highBit = 1ULL < < RAND_range ( seed , 7 , g_maxDecompressedSizeLog ) ;
2017-02-22 23:59:15 +00:00
} else if ( RAND ( seed ) & 3 ) {
/* do small content */
2017-09-13 23:01:16 +00:00
highBit = 1ULL < < RAND_range ( seed , 0 , MIN ( 7 , 1U < < g_maxDecompressedSizeLog ) ) ;
2017-02-22 23:59:15 +00:00
} else {
/* 0 size frame */
highBit = 0 ;
}
2017-02-23 21:06:50 +00:00
fh . contentSize = highBit ? highBit + ( RAND ( seed ) % highBit ) : 0 ;
2017-02-22 23:59:15 +00:00
/* provide size sometimes */
2017-02-23 21:06:50 +00:00
contentSizeFlag = opts . contentSize | ( RAND ( seed ) & 1 ) ;
2017-02-22 23:59:15 +00:00
2017-02-23 21:06:50 +00:00
if ( contentSizeFlag & & ( fh . contentSize = = 0 | | ! ( RAND ( seed ) & 7 ) ) ) {
2017-02-22 23:59:15 +00:00
/* do single segment sometimes */
2017-03-20 23:02:24 +00:00
fh . windowSize = ( U32 ) fh . contentSize ;
2017-02-22 23:59:15 +00:00
singleSegment = 1 ;
}
}
if ( contentSizeFlag ) {
/* Determine how large fcs field has to be */
2017-02-23 21:06:50 +00:00
int minFcsCode = ( fh . contentSize > = 256 ) +
( fh . contentSize > = 65536 + 256 ) +
( fh . contentSize > 0xFFFFFFFFU ) ;
2017-02-22 23:59:15 +00:00
if ( ! singleSegment & & ! minFcsCode ) {
minFcsCode = 1 ;
}
fcsCode = minFcsCode + ( RAND ( seed ) % ( 4 - minFcsCode ) ) ;
2017-02-23 21:06:50 +00:00
if ( fcsCode = = 1 & & fh . contentSize < 256 ) fcsCode + + ;
2017-02-22 23:59:15 +00:00
}
/* write out the header */
MEM_writeLE32 ( op + pos , ZSTD_MAGICNUMBER ) ;
pos + = 4 ;
{
2017-06-22 00:43:21 +00:00
/*
* fcsCode : 2 - bit flag specifying how many bytes used to represent Frame_Content_Size ( bits 7 - 6 )
* singleSegment : 1 - bit flag describing if data must be regenerated within a single continuous memory segment . ( bit 5 )
* contentChecksumFlag : 1 - bit flag that is set if frame includes checksum at the end - - set to 1 below ( bit 2 )
* dictBits : 2 - bit flag describing how many bytes Dictionary_ID uses - - set to 3 ( bits 1 - 0 )
* For more information : https : //github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_header
*/
2017-06-20 18:15:26 +00:00
int const dictBits = info . useDict ? 3 : 0 ;
2017-02-22 23:59:15 +00:00
BYTE const frameHeaderDescriptor =
2017-06-09 00:27:15 +00:00
( BYTE ) ( ( fcsCode < < 6 ) | ( singleSegment < < 5 ) | ( 1 < < 2 ) | dictBits ) ;
2017-02-22 23:59:15 +00:00
op [ pos + + ] = frameHeaderDescriptor ;
}
if ( ! singleSegment ) {
op [ pos + + ] = windowByte ;
}
2017-06-20 16:50:28 +00:00
if ( info . useDict ) {
2017-06-19 23:23:00 +00:00
MEM_writeLE32 ( op + pos , ( U32 ) info . dictID ) ;
2017-06-13 18:54:43 +00:00
pos + = 4 ;
2017-06-09 00:27:15 +00:00
}
2017-02-22 23:59:15 +00:00
if ( contentSizeFlag ) {
switch ( fcsCode ) {
default : /* Impossible */
2017-03-20 23:02:24 +00:00
case 0 : op [ pos + + ] = ( BYTE ) fh . contentSize ; break ;
case 1 : MEM_writeLE16 ( op + pos , ( U16 ) ( fh . contentSize - 256 ) ) ; pos + = 2 ; break ;
case 2 : MEM_writeLE32 ( op + pos , ( U32 ) fh . contentSize ) ; pos + = 4 ; break ;
case 3 : MEM_writeLE64 ( op + pos , ( U64 ) fh . contentSize ) ; pos + = 8 ; break ;
2017-02-22 23:59:15 +00:00
}
}
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 3 , " frame content size: \t %u \n " , ( U32 ) fh . contentSize ) ;
DISPLAYLEVEL ( 3 , " frame window size: \t %u \n " , fh . windowSize ) ;
DISPLAYLEVEL ( 3 , " content size flag: \t %d \n " , contentSizeFlag ) ;
DISPLAYLEVEL ( 3 , " single segment flag: \t %d \n " , singleSegment ) ;
2017-02-22 23:59:15 +00:00
frame - > data = op + pos ;
2017-02-23 21:06:50 +00:00
frame - > header = fh ;
2017-02-22 23:59:15 +00:00
}
/* Write a literal block in either raw or RLE form, return the literals size */
static size_t writeLiteralsBlockSimple ( U32 * seed , frame_t * frame , size_t contentSize )
{
BYTE * op = ( BYTE * ) frame - > data ;
int const type = RAND ( seed ) % 2 ;
int const sizeFormatDesc = RAND ( seed ) % 8 ;
size_t litSize ;
2017-09-13 22:07:03 +00:00
size_t maxLitSize = MIN ( contentSize , g_maxBlockSize ) ;
2017-02-22 23:59:15 +00:00
if ( sizeFormatDesc = = 0 ) {
/* Size_FormatDesc = ?0 */
maxLitSize = MIN ( maxLitSize , 31 ) ;
} else if ( sizeFormatDesc < = 4 ) {
/* Size_FormatDesc = 01 */
maxLitSize = MIN ( maxLitSize , 4095 ) ;
} else {
/* Size_Format = 11 */
maxLitSize = MIN ( maxLitSize , 1048575 ) ;
}
litSize = RAND ( seed ) % ( maxLitSize + 1 ) ;
if ( frame - > src = = frame - > srcStart & & litSize = = 0 ) {
litSize = 1 ; /* no empty literals if there's nothing preceding this block */
}
if ( litSize + 3 > contentSize ) {
litSize = contentSize ; /* no matches shorter than 3 are allowed */
}
/* use smallest size format that fits */
if ( litSize < 32 ) {
op [ 0 ] = ( type | ( 0 < < 2 ) | ( litSize < < 3 ) ) & 0xff ;
op + = 1 ;
} else if ( litSize < 4096 ) {
op [ 0 ] = ( type | ( 1 < < 2 ) | ( litSize < < 4 ) ) & 0xff ;
op [ 1 ] = ( litSize > > 4 ) & 0xff ;
op + = 2 ;
} else {
op [ 0 ] = ( type | ( 3 < < 2 ) | ( litSize < < 4 ) ) & 0xff ;
op [ 1 ] = ( litSize > > 4 ) & 0xff ;
op [ 2 ] = ( litSize > > 12 ) & 0xff ;
op + = 3 ;
}
if ( type = = 0 ) {
/* Raw literals */
DISPLAYLEVEL ( 4 , " raw literals \n " ) ;
RAND_buffer ( seed , LITERAL_BUFFER , litSize ) ;
memcpy ( op , LITERAL_BUFFER , litSize ) ;
op + = litSize ;
} else {
/* RLE literals */
2017-03-20 23:02:24 +00:00
BYTE const symb = ( BYTE ) ( RAND ( seed ) % 256 ) ;
2017-02-22 23:59:15 +00:00
DISPLAYLEVEL ( 4 , " rle literals: 0x%02x \n " , ( U32 ) symb ) ;
memset ( LITERAL_BUFFER , symb , litSize ) ;
op [ 0 ] = symb ;
op + + ;
}
frame - > data = op ;
return litSize ;
}
/* Generate a Huffman header for the given source */
static size_t writeHufHeader ( U32 * seed , HUF_CElt * hufTable , void * dst , size_t dstSize ,
const void * src , size_t srcSize )
{
BYTE * const ostart = ( BYTE * ) dst ;
BYTE * op = ostart ;
unsigned huffLog = 11 ;
U32 maxSymbolValue = 255 ;
U32 count [ HUF_SYMBOLVALUE_MAX + 1 ] ;
/* Scan input and build symbol stats */
2018-10-26 20:21:37 +00:00
{ size_t const largest = HIST_count_wksp ( count , & maxSymbolValue , ( const BYTE * ) src , srcSize , WKSP , sizeof ( WKSP ) ) ;
2018-06-13 23:49:31 +00:00
assert ( ! HIST_isError ( largest ) ) ;
2017-02-22 23:59:15 +00:00
if ( largest = = srcSize ) { * ostart = ( ( const BYTE * ) src ) [ 0 ] ; return 0 ; } /* single symbol, rle */
if ( largest < = ( srcSize > > 7 ) + 1 ) return 0 ; /* Fast heuristic : not compressible enough */
}
/* Build Huffman Tree */
/* Max Huffman log is 11, min is highbit(maxSymbolValue)+1 */
huffLog = RAND_range ( seed , ZSTD_highbit32 ( maxSymbolValue ) + 1 , huffLog + 1 ) ;
DISPLAYLEVEL ( 6 , " huffman log: %u \n " , huffLog ) ;
{ size_t const maxBits = HUF_buildCTable_wksp ( hufTable , count , maxSymbolValue , huffLog , WKSP , sizeof ( WKSP ) ) ;
CHECKERR ( maxBits ) ;
huffLog = ( U32 ) maxBits ;
}
/* Write table description header */
{ size_t const hSize = HUF_writeCTable ( op , dstSize , hufTable , maxSymbolValue , huffLog ) ;
if ( hSize + 12 > = srcSize ) return 0 ; /* not useful to try compression */
op + = hSize ;
}
return op - ostart ;
}
2017-09-12 21:29:39 +00:00
/* Write a Huffman coded literals block and return the literals size */
2017-02-22 23:59:15 +00:00
static size_t writeLiteralsBlockCompressed ( U32 * seed , frame_t * frame , size_t contentSize )
{
BYTE * origop = ( BYTE * ) frame - > data ;
BYTE * opend = ( BYTE * ) frame - > dataEnd ;
BYTE * op ;
BYTE * const ostart = origop ;
int const sizeFormat = RAND ( seed ) % 4 ;
size_t litSize ;
size_t hufHeaderSize = 0 ;
size_t compressedSize = 0 ;
2017-09-13 22:07:03 +00:00
size_t maxLitSize = MIN ( contentSize - 3 , g_maxBlockSize ) ;
2017-02-22 23:59:15 +00:00
symbolEncodingType_e hType ;
if ( contentSize < 64 ) {
/* make sure we get reasonably-sized literals for compression */
return ERROR ( GENERIC ) ;
}
DISPLAYLEVEL ( 4 , " compressed literals \n " ) ;
switch ( sizeFormat ) {
case 0 : /* fall through, size is the same as case 1 */
case 1 :
maxLitSize = MIN ( maxLitSize , 1023 ) ;
origop + = 3 ;
break ;
case 2 :
maxLitSize = MIN ( maxLitSize , 16383 ) ;
origop + = 4 ;
break ;
case 3 :
maxLitSize = MIN ( maxLitSize , 262143 ) ;
origop + = 5 ;
break ;
default : ; /* impossible */
}
do {
op = origop ;
do {
litSize = RAND ( seed ) % ( maxLitSize + 1 ) ;
} while ( litSize < 32 ) ; /* avoid small literal sizes */
if ( litSize + 3 > contentSize ) {
litSize = contentSize ; /* no matches shorter than 3 are allowed */
}
/* most of the time generate a new distribution */
if ( ( RAND ( seed ) & 3 ) | | ! frame - > stats . hufInit ) {
do {
if ( RAND ( seed ) & 3 ) {
/* add 10 to ensure some compressability */
double const weight = ( ( RAND ( seed ) % 90 ) + 10 ) / 100.0 ;
DISPLAYLEVEL ( 5 , " distribution weight: %d%% \n " ,
( int ) ( weight * 100 ) ) ;
RAND_genDist ( seed , frame - > stats . hufDist , weight ) ;
} else {
/* sometimes do restricted range literals to force
* non - huffman headers */
DISPLAYLEVEL ( 5 , " small range literals \n " ) ;
RAND_bufferMaxSymb ( seed , frame - > stats . hufDist , DISTSIZE ,
15 ) ;
}
RAND_bufferDist ( seed , frame - > stats . hufDist , LITERAL_BUFFER ,
litSize ) ;
/* generate the header from the distribution instead of the
* actual data to avoid bugs with symbols that were in the
* distribution but never showed up in the output */
hufHeaderSize = writeHufHeader (
seed , ( HUF_CElt * ) frame - > stats . hufTable , op , opend - op ,
frame - > stats . hufDist , DISTSIZE ) ;
CHECKERR ( hufHeaderSize ) ;
/* repeat until a valid header is written */
} while ( hufHeaderSize = = 0 ) ;
op + = hufHeaderSize ;
hType = set_compressed ;
frame - > stats . hufInit = 1 ;
} else {
/* repeat the distribution/table from last time */
DISPLAYLEVEL ( 5 , " huffman repeat stats \n " ) ;
RAND_bufferDist ( seed , frame - > stats . hufDist , LITERAL_BUFFER ,
litSize ) ;
hufHeaderSize = 0 ;
hType = set_repeat ;
}
do {
compressedSize =
sizeFormat = = 0
? HUF_compress1X_usingCTable (
op , opend - op , LITERAL_BUFFER , litSize ,
( HUF_CElt * ) frame - > stats . hufTable )
: HUF_compress4X_usingCTable (
op , opend - op , LITERAL_BUFFER , litSize ,
( HUF_CElt * ) frame - > stats . hufTable ) ;
CHECKERR ( compressedSize ) ;
/* this only occurs when it could not compress or similar */
} while ( compressedSize < = 0 ) ;
op + = compressedSize ;
compressedSize + = hufHeaderSize ;
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 5 , " regenerated size: %u \n " , ( U32 ) litSize ) ;
DISPLAYLEVEL ( 5 , " compressed size: %u \n " , ( U32 ) compressedSize ) ;
2017-02-22 23:59:15 +00:00
if ( compressedSize > = litSize ) {
DISPLAYLEVEL ( 5 , " trying again \n " ) ;
/* if we have to try again, reset the stats so we don't accidentally
* try to repeat a distribution we just made */
frame - > stats = frame - > oldStats ;
} else {
break ;
}
} while ( 1 ) ;
/* write header */
switch ( sizeFormat ) {
case 0 : /* fall through, size is the same as case 1 */
case 1 : {
U32 const header = hType | ( sizeFormat < < 2 ) | ( ( U32 ) litSize < < 4 ) |
( ( U32 ) compressedSize < < 14 ) ;
MEM_writeLE24 ( ostart , header ) ;
break ;
}
case 2 : {
U32 const header = hType | ( sizeFormat < < 2 ) | ( ( U32 ) litSize < < 4 ) |
( ( U32 ) compressedSize < < 18 ) ;
MEM_writeLE32 ( ostart , header ) ;
break ;
}
case 3 : {
U32 const header = hType | ( sizeFormat < < 2 ) | ( ( U32 ) litSize < < 4 ) |
( ( U32 ) compressedSize < < 22 ) ;
MEM_writeLE32 ( ostart , header ) ;
ostart [ 4 ] = ( BYTE ) ( compressedSize > > 10 ) ;
break ;
}
default : ; /* impossible */
}
frame - > data = op ;
return litSize ;
}
static size_t writeLiteralsBlock ( U32 * seed , frame_t * frame , size_t contentSize )
{
/* only do compressed for larger segments to avoid compressibility issues */
if ( RAND ( seed ) & 7 & & contentSize > = 64 ) {
return writeLiteralsBlockCompressed ( seed , frame , contentSize ) ;
} else {
return writeLiteralsBlockSimple ( seed , frame , contentSize ) ;
}
}
static inline void initSeqStore ( seqStore_t * seqStore ) {
2018-08-23 22:28:12 +00:00
seqStore - > maxNbSeq = MAX_NB_SEQ ;
2018-08-28 20:56:47 +00:00
seqStore - > maxNbLit = ZSTD_BLOCKSIZE_MAX ;
2017-02-22 23:59:15 +00:00
seqStore - > sequencesStart = SEQUENCE_BUFFER ;
seqStore - > litStart = SEQUENCE_LITERAL_BUFFER ;
seqStore - > llCode = SEQUENCE_LLCODE ;
seqStore - > mlCode = SEQUENCE_MLCODE ;
seqStore - > ofCode = SEQUENCE_OFCODE ;
ZSTD_resetSeqStore ( seqStore ) ;
}
/* Randomly generate sequence commands */
static U32 generateSequences ( U32 * seed , frame_t * frame , seqStore_t * seqStore ,
2017-06-19 23:23:00 +00:00
size_t contentSize , size_t literalsSize , dictInfo info )
2017-02-22 23:59:15 +00:00
{
/* The total length of all the matches */
size_t const remainingMatch = contentSize - literalsSize ;
2017-02-24 18:53:16 +00:00
size_t excessMatch = 0 ;
U32 numSequences = 0 ;
2017-03-20 19:38:19 +00:00
2017-02-22 23:59:15 +00:00
U32 i ;
const BYTE * literals = LITERAL_BUFFER ;
BYTE * srcPtr = frame - > src ;
2017-02-24 18:53:16 +00:00
if ( literalsSize ! = contentSize ) {
2017-02-22 23:59:15 +00:00
/* each match must be at least MIN_SEQ_LEN, so this is the maximum
* number of sequences we can have */
U32 const maxSequences = ( U32 ) remainingMatch / MIN_SEQ_LEN ;
numSequences = ( RAND ( seed ) % maxSequences ) + 1 ;
/* the extra match lengths we have to allocate to each sequence */
excessMatch = remainingMatch - numSequences * MIN_SEQ_LEN ;
}
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 5 , " total match lengths: %u \n " , ( U32 ) remainingMatch ) ;
2017-02-22 23:59:15 +00:00
for ( i = 0 ; i < numSequences ; i + + ) {
/* Generate match and literal lengths by exponential distribution to
* ensure nice numbers */
U32 matchLen =
MIN_SEQ_LEN +
ROUND ( RAND_exp ( seed , excessMatch / ( double ) ( numSequences - i ) ) ) ;
U32 literalLen =
( RAND ( seed ) & 7 )
? ROUND ( RAND_exp ( seed ,
literalsSize /
( double ) ( numSequences - i ) ) )
: 0 ;
/* actual offset, code to send, and point to copy up to when shifting
* codes in the repeat offsets history */
U32 offset , offsetCode , repIndex ;
/* bounds checks */
2017-03-20 23:02:24 +00:00
matchLen = ( U32 ) MIN ( matchLen , excessMatch + MIN_SEQ_LEN ) ;
literalLen = MIN ( literalLen , ( U32 ) literalsSize ) ;
2017-02-22 23:59:15 +00:00
if ( i = = 0 & & srcPtr = = frame - > srcStart & & literalLen = = 0 ) literalLen = 1 ;
2017-03-20 23:02:24 +00:00
if ( i + 1 = = numSequences ) matchLen = MIN_SEQ_LEN + ( U32 ) excessMatch ;
2017-02-22 23:59:15 +00:00
memcpy ( srcPtr , literals , literalLen ) ;
srcPtr + = literalLen ;
do {
if ( RAND ( seed ) & 7 ) {
/* do a normal offset */
2017-06-23 22:54:51 +00:00
U32 const dataDecompressed = ( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) ;
2017-02-22 23:59:15 +00:00
offset = ( RAND ( seed ) %
MIN ( frame - > header . windowSize ,
2017-02-23 23:53:44 +00:00
( size_t ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) ) ) +
2017-02-22 23:59:15 +00:00
1 ;
2017-06-23 22:54:51 +00:00
if ( info . useDict & & ( RAND ( seed ) & 1 ) & & i + 1 ! = numSequences & & dataDecompressed < frame - > header . windowSize ) {
2017-06-09 16:48:56 +00:00
/* need to occasionally generate offsets that go past the start */
2017-06-23 20:44:24 +00:00
/* including i+1 != numSequences because the last sequences has to adhere to predetermined contentSize */
2017-06-23 18:22:29 +00:00
U32 lenPastStart = ( RAND ( seed ) % info . dictContentSize ) + 1 ;
2017-06-23 19:00:48 +00:00
offset = ( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) + lenPastStart ;
2017-06-22 21:43:44 +00:00
if ( offset > frame - > header . windowSize ) {
2017-06-23 18:22:29 +00:00
if ( lenPastStart < MIN_SEQ_LEN ) {
/* when offset > windowSize, matchLen bound by end of dictionary (lenPastStart) */
/* this also means that lenPastStart must be greater than MIN_SEQ_LEN */
/* make sure lenPastStart does not go past dictionary start though */
2017-06-23 19:00:48 +00:00
lenPastStart = MIN ( lenPastStart + MIN_SEQ_LEN , ( U32 ) info . dictContentSize ) ;
offset = ( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) + lenPastStart ;
2017-06-23 18:22:29 +00:00
}
{
U32 const matchLenBound = MIN ( frame - > header . windowSize , lenPastStart ) ;
matchLen = MIN ( matchLen , matchLenBound ) ;
}
2017-06-22 21:43:44 +00:00
}
2017-06-09 16:48:56 +00:00
}
2017-02-22 23:59:15 +00:00
offsetCode = offset + ZSTD_REP_MOVE ;
repIndex = 2 ;
} else {
/* do a repeat offset */
offsetCode = RAND ( seed ) % 3 ;
if ( literalLen > 0 ) {
offset = frame - > stats . rep [ offsetCode ] ;
repIndex = offsetCode ;
} else {
/* special case */
offset = offsetCode = = 2 ? frame - > stats . rep [ 0 ] - 1
: frame - > stats . rep [ offsetCode + 1 ] ;
repIndex = MIN ( 2 , offsetCode + 1 ) ;
}
}
2017-06-19 23:23:00 +00:00
} while ( ( ( ! info . useDict ) & & ( offset > ( size_t ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) ) ) | | offset = = 0 ) ;
2017-02-22 23:59:15 +00:00
2017-06-20 16:50:28 +00:00
{
size_t j ;
BYTE * const dictEnd = info . dictContent + info . dictContentSize ;
2017-02-22 23:59:15 +00:00
for ( j = 0 ; j < matchLen ; j + + ) {
2017-06-23 19:00:48 +00:00
if ( ( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) < offset ) {
2017-06-13 18:54:43 +00:00
/* copy from dictionary instead of literals */
2017-06-20 18:27:39 +00:00
size_t const dictOffset = offset - ( srcPtr - ( BYTE * ) frame - > srcStart ) ;
2017-06-20 16:50:28 +00:00
* srcPtr = * ( dictEnd - dictOffset ) ;
2017-06-13 18:54:43 +00:00
}
2017-06-19 21:07:56 +00:00
else {
2017-06-13 18:54:43 +00:00
* srcPtr = * ( srcPtr - offset ) ;
}
2017-02-22 23:59:15 +00:00
srcPtr + + ;
}
}
{ int r ;
for ( r = repIndex ; r > 0 ; r - - ) {
frame - > stats . rep [ r ] = frame - > stats . rep [ r - 1 ] ;
}
frame - > stats . rep [ 0 ] = offset ;
}
DISPLAYLEVEL ( 6 , " LL: %5u OF: %5u ML: %5u " , literalLen , offset , matchLen ) ;
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 7 , " srcPos: %8u seqNb: %3u " ,
( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) , i ) ;
2017-02-22 23:59:15 +00:00
DISPLAYLEVEL ( 6 , " \n " ) ;
if ( offsetCode < 3 ) {
DISPLAYLEVEL ( 7 , " repeat offset: %d \n " , repIndex ) ;
}
/* use libzstd sequence handling */
ZSTD_storeSeq ( seqStore , literalLen , literals , offsetCode ,
matchLen - MINMATCH ) ;
literalsSize - = literalLen ;
excessMatch - = ( matchLen - MIN_SEQ_LEN ) ;
literals + = literalLen ;
}
memcpy ( srcPtr , literals , literalsSize ) ;
srcPtr + = literalsSize ;
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 6 , " excess literals: %5u " , ( U32 ) literalsSize ) ;
DISPLAYLEVEL ( 7 , " srcPos: %8u " , ( U32 ) ( ( BYTE * ) srcPtr - ( BYTE * ) frame - > srcStart ) ) ;
2017-02-22 23:59:15 +00:00
DISPLAYLEVEL ( 6 , " \n " ) ;
return numSequences ;
}
static void initSymbolSet ( const BYTE * symbols , size_t len , BYTE * set , BYTE maxSymbolValue )
{
size_t i ;
memset ( set , 0 , ( size_t ) maxSymbolValue + 1 ) ;
for ( i = 0 ; i < len ; i + + ) {
set [ symbols [ i ] ] = 1 ;
}
}
static int isSymbolSubset ( const BYTE * symbols , size_t len , const BYTE * set , BYTE maxSymbolValue )
{
size_t i ;
for ( i = 0 ; i < len ; i + + ) {
if ( symbols [ i ] > maxSymbolValue | | ! set [ symbols [ i ] ] ) {
return 0 ;
}
}
return 1 ;
}
static size_t writeSequences ( U32 * seed , frame_t * frame , seqStore_t * seqStorePtr ,
size_t nbSeq )
{
/* This code is mostly copied from ZSTD_compressSequences in zstd_compress.c */
U32 count [ MaxSeq + 1 ] ;
S16 norm [ MaxSeq + 1 ] ;
FSE_CTable * CTable_LitLength = frame - > stats . litlengthCTable ;
FSE_CTable * CTable_OffsetBits = frame - > stats . offcodeCTable ;
FSE_CTable * CTable_MatchLength = frame - > stats . matchlengthCTable ;
U32 LLtype , Offtype , MLtype ; /* compressed, raw or rle */
const seqDef * const sequences = seqStorePtr - > sequencesStart ;
const BYTE * const ofCodeTable = seqStorePtr - > ofCode ;
const BYTE * const llCodeTable = seqStorePtr - > llCode ;
const BYTE * const mlCodeTable = seqStorePtr - > mlCode ;
BYTE * const oend = ( BYTE * ) frame - > dataEnd ;
BYTE * op = ( BYTE * ) frame - > data ;
BYTE * seqHead ;
BYTE scratchBuffer [ 1 < < MAX ( MLFSELog , LLFSELog ) ] ;
/* literals compressing block removed so that can be done separately */
/* Sequences Header */
if ( ( oend - op ) < 3 /*max nbSeq Size*/ + 1 /*seqHead */ ) return ERROR ( dstSize_tooSmall ) ;
if ( nbSeq < 0x7F ) * op + + = ( BYTE ) nbSeq ;
else if ( nbSeq < LONGNBSEQ ) op [ 0 ] = ( BYTE ) ( ( nbSeq > > 8 ) + 0x80 ) , op [ 1 ] = ( BYTE ) nbSeq , op + = 2 ;
else op [ 0 ] = 0xFF , MEM_writeLE16 ( op + 1 , ( U16 ) ( nbSeq - LONGNBSEQ ) ) , op + = 3 ;
if ( nbSeq = = 0 ) {
frame - > data = op ;
return 0 ;
}
2018-10-21 01:53:02 +00:00
/* seqHead : flags for FSE encoding type */
seqHead = op + + ;
2017-02-22 23:59:15 +00:00
/* convert length/distances into codes */
ZSTD_seqToCodes ( seqStorePtr ) ;
/* CTable for Literal Lengths */
{ U32 max = MaxLL ;
2018-10-26 20:21:37 +00:00
size_t const mostFrequent = HIST_countFast_wksp ( count , & max , llCodeTable , nbSeq , WKSP , sizeof ( WKSP ) ) ; /* cannot fail */
2018-06-13 23:49:31 +00:00
assert ( ! HIST_isError ( mostFrequent ) ) ;
2017-02-22 23:59:15 +00:00
if ( mostFrequent = = nbSeq ) {
/* do RLE if we have the chance */
* op + + = llCodeTable [ 0 ] ;
FSE_buildCTable_rle ( CTable_LitLength , ( BYTE ) max ) ;
LLtype = set_rle ;
} else if ( frame - > stats . fseInit & & ! ( RAND ( seed ) & 3 ) & &
isSymbolSubset ( llCodeTable , nbSeq ,
frame - > stats . litlengthSymbolSet , 35 ) ) {
/* maybe do repeat mode if we're allowed to */
LLtype = set_repeat ;
} else if ( ! ( RAND ( seed ) & 3 ) ) {
/* maybe use the default distribution */
FSE_buildCTable_wksp ( CTable_LitLength , LL_defaultNorm , MaxLL , LL_defaultNormLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
LLtype = set_basic ;
} else {
/* fall back on a full table */
size_t nbSeq_1 = nbSeq ;
const U32 tableLog = FSE_optimalTableLog ( LLFSELog , nbSeq , max ) ;
if ( count [ llCodeTable [ nbSeq - 1 ] ] > 1 ) { count [ llCodeTable [ nbSeq - 1 ] ] - - ; nbSeq_1 - - ; }
FSE_normalizeCount ( norm , tableLog , count , nbSeq_1 , max ) ;
{ size_t const NCountSize = FSE_writeNCount ( op , oend - op , norm , max , tableLog ) ; /* overflow protected */
if ( FSE_isError ( NCountSize ) ) return ERROR ( GENERIC ) ;
op + = NCountSize ; }
FSE_buildCTable_wksp ( CTable_LitLength , norm , max , tableLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
LLtype = set_compressed ;
} }
/* CTable for Offsets */
/* see Literal Lengths for descriptions of mode choices */
{ U32 max = MaxOff ;
2018-10-26 20:21:37 +00:00
size_t const mostFrequent = HIST_countFast_wksp ( count , & max , ofCodeTable , nbSeq , WKSP , sizeof ( WKSP ) ) ; /* cannot fail */
2018-06-13 23:49:31 +00:00
assert ( ! HIST_isError ( mostFrequent ) ) ;
2017-02-22 23:59:15 +00:00
if ( mostFrequent = = nbSeq ) {
* op + + = ofCodeTable [ 0 ] ;
FSE_buildCTable_rle ( CTable_OffsetBits , ( BYTE ) max ) ;
Offtype = set_rle ;
} else if ( frame - > stats . fseInit & & ! ( RAND ( seed ) & 3 ) & &
isSymbolSubset ( ofCodeTable , nbSeq ,
frame - > stats . offsetSymbolSet , 28 ) ) {
Offtype = set_repeat ;
} else if ( ! ( RAND ( seed ) & 3 ) ) {
2017-09-18 23:54:53 +00:00
FSE_buildCTable_wksp ( CTable_OffsetBits , OF_defaultNorm , DefaultMaxOff , OF_defaultNormLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
2017-02-22 23:59:15 +00:00
Offtype = set_basic ;
} else {
size_t nbSeq_1 = nbSeq ;
const U32 tableLog = FSE_optimalTableLog ( OffFSELog , nbSeq , max ) ;
if ( count [ ofCodeTable [ nbSeq - 1 ] ] > 1 ) { count [ ofCodeTable [ nbSeq - 1 ] ] - - ; nbSeq_1 - - ; }
FSE_normalizeCount ( norm , tableLog , count , nbSeq_1 , max ) ;
{ size_t const NCountSize = FSE_writeNCount ( op , oend - op , norm , max , tableLog ) ; /* overflow protected */
if ( FSE_isError ( NCountSize ) ) return ERROR ( GENERIC ) ;
op + = NCountSize ; }
FSE_buildCTable_wksp ( CTable_OffsetBits , norm , max , tableLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
Offtype = set_compressed ;
} }
/* CTable for MatchLengths */
/* see Literal Lengths for descriptions of mode choices */
{ U32 max = MaxML ;
2018-10-26 20:21:37 +00:00
size_t const mostFrequent = HIST_countFast_wksp ( count , & max , mlCodeTable , nbSeq , WKSP , sizeof ( WKSP ) ) ; /* cannot fail */
2018-06-13 23:49:31 +00:00
assert ( ! HIST_isError ( mostFrequent ) ) ;
2017-02-22 23:59:15 +00:00
if ( mostFrequent = = nbSeq ) {
* op + + = * mlCodeTable ;
FSE_buildCTable_rle ( CTable_MatchLength , ( BYTE ) max ) ;
MLtype = set_rle ;
} else if ( frame - > stats . fseInit & & ! ( RAND ( seed ) & 3 ) & &
isSymbolSubset ( mlCodeTable , nbSeq ,
frame - > stats . matchlengthSymbolSet , 52 ) ) {
MLtype = set_repeat ;
} else if ( ! ( RAND ( seed ) & 3 ) ) {
/* sometimes do default distribution */
FSE_buildCTable_wksp ( CTable_MatchLength , ML_defaultNorm , MaxML , ML_defaultNormLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
MLtype = set_basic ;
} else {
/* fall back on table */
size_t nbSeq_1 = nbSeq ;
const U32 tableLog = FSE_optimalTableLog ( MLFSELog , nbSeq , max ) ;
if ( count [ mlCodeTable [ nbSeq - 1 ] ] > 1 ) { count [ mlCodeTable [ nbSeq - 1 ] ] - - ; nbSeq_1 - - ; }
FSE_normalizeCount ( norm , tableLog , count , nbSeq_1 , max ) ;
{ size_t const NCountSize = FSE_writeNCount ( op , oend - op , norm , max , tableLog ) ; /* overflow protected */
if ( FSE_isError ( NCountSize ) ) return ERROR ( GENERIC ) ;
op + = NCountSize ; }
FSE_buildCTable_wksp ( CTable_MatchLength , norm , max , tableLog , scratchBuffer , sizeof ( scratchBuffer ) ) ;
MLtype = set_compressed ;
} }
frame - > stats . fseInit = 1 ;
initSymbolSet ( llCodeTable , nbSeq , frame - > stats . litlengthSymbolSet , 35 ) ;
initSymbolSet ( ofCodeTable , nbSeq , frame - > stats . offsetSymbolSet , 28 ) ;
initSymbolSet ( mlCodeTable , nbSeq , frame - > stats . matchlengthSymbolSet , 52 ) ;
DISPLAYLEVEL ( 5 , " LL type: %d OF type: %d ML type: %d \n " , LLtype , Offtype , MLtype ) ;
* seqHead = ( BYTE ) ( ( LLtype < < 6 ) + ( Offtype < < 4 ) + ( MLtype < < 2 ) ) ;
/* Encoding Sequences */
{ BIT_CStream_t blockStream ;
FSE_CState_t stateMatchLength ;
FSE_CState_t stateOffsetBits ;
FSE_CState_t stateLitLength ;
CHECK_E ( BIT_initCStream ( & blockStream , op , oend - op ) , dstSize_tooSmall ) ; /* not enough space remaining */
/* first symbols */
FSE_initCState2 ( & stateMatchLength , CTable_MatchLength , mlCodeTable [ nbSeq - 1 ] ) ;
FSE_initCState2 ( & stateOffsetBits , CTable_OffsetBits , ofCodeTable [ nbSeq - 1 ] ) ;
FSE_initCState2 ( & stateLitLength , CTable_LitLength , llCodeTable [ nbSeq - 1 ] ) ;
BIT_addBits ( & blockStream , sequences [ nbSeq - 1 ] . litLength , LL_bits [ llCodeTable [ nbSeq - 1 ] ] ) ;
if ( MEM_32bits ( ) ) BIT_flushBits ( & blockStream ) ;
BIT_addBits ( & blockStream , sequences [ nbSeq - 1 ] . matchLength , ML_bits [ mlCodeTable [ nbSeq - 1 ] ] ) ;
if ( MEM_32bits ( ) ) BIT_flushBits ( & blockStream ) ;
BIT_addBits ( & blockStream , sequences [ nbSeq - 1 ] . offset , ofCodeTable [ nbSeq - 1 ] ) ;
BIT_flushBits ( & blockStream ) ;
{ size_t n ;
for ( n = nbSeq - 2 ; n < nbSeq ; n - - ) { /* intentional underflow */
BYTE const llCode = llCodeTable [ n ] ;
BYTE const ofCode = ofCodeTable [ n ] ;
BYTE const mlCode = mlCodeTable [ n ] ;
U32 const llBits = LL_bits [ llCode ] ;
U32 const ofBits = ofCode ; /* 32b*/ /* 64b*/
U32 const mlBits = ML_bits [ mlCode ] ;
/* (7)*/ /* (7)*/
FSE_encodeSymbol ( & blockStream , & stateOffsetBits , ofCode ) ; /* 15 */ /* 15 */
FSE_encodeSymbol ( & blockStream , & stateMatchLength , mlCode ) ; /* 24 */ /* 24 */
if ( MEM_32bits ( ) ) BIT_flushBits ( & blockStream ) ; /* (7)*/
FSE_encodeSymbol ( & blockStream , & stateLitLength , llCode ) ; /* 16 */ /* 33 */
if ( MEM_32bits ( ) | | ( ofBits + mlBits + llBits > = 64 - 7 - ( LLFSELog + MLFSELog + OffFSELog ) ) )
BIT_flushBits ( & blockStream ) ; /* (7)*/
BIT_addBits ( & blockStream , sequences [ n ] . litLength , llBits ) ;
if ( MEM_32bits ( ) & & ( ( llBits + mlBits ) > 24 ) ) BIT_flushBits ( & blockStream ) ;
BIT_addBits ( & blockStream , sequences [ n ] . matchLength , mlBits ) ;
if ( MEM_32bits ( ) ) BIT_flushBits ( & blockStream ) ; /* (7)*/
BIT_addBits ( & blockStream , sequences [ n ] . offset , ofBits ) ; /* 31 */
BIT_flushBits ( & blockStream ) ; /* (7)*/
} }
FSE_flushCState ( & blockStream , & stateMatchLength ) ;
FSE_flushCState ( & blockStream , & stateOffsetBits ) ;
FSE_flushCState ( & blockStream , & stateLitLength ) ;
{ size_t const streamSize = BIT_closeCStream ( & blockStream ) ;
if ( streamSize = = 0 ) return ERROR ( dstSize_tooSmall ) ; /* not enough space */
op + = streamSize ;
} }
frame - > data = op ;
return 0 ;
}
static size_t writeSequencesBlock ( U32 * seed , frame_t * frame , size_t contentSize ,
2017-06-19 23:23:00 +00:00
size_t literalsSize , dictInfo info )
2017-02-22 23:59:15 +00:00
{
seqStore_t seqStore ;
size_t numSequences ;
initSeqStore ( & seqStore ) ;
/* randomly generate sequences */
2017-06-19 23:23:00 +00:00
numSequences = generateSequences ( seed , frame , & seqStore , contentSize , literalsSize , info ) ;
2017-02-22 23:59:15 +00:00
/* write them out to the frame data */
CHECKERR ( writeSequences ( seed , frame , & seqStore , numSequences ) ) ;
return numSequences ;
}
2017-06-19 23:23:00 +00:00
static size_t writeCompressedBlock ( U32 * seed , frame_t * frame , size_t contentSize , dictInfo info )
2017-02-22 23:59:15 +00:00
{
BYTE * const blockStart = ( BYTE * ) frame - > data ;
size_t literalsSize ;
size_t nbSeq ;
DISPLAYLEVEL ( 4 , " compressed block: \n " ) ;
literalsSize = writeLiteralsBlock ( seed , frame , contentSize ) ;
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 4 , " literals size: %u \n " , ( U32 ) literalsSize ) ;
2017-02-22 23:59:15 +00:00
2017-06-19 23:23:00 +00:00
nbSeq = writeSequencesBlock ( seed , frame , contentSize , literalsSize , info ) ;
2017-02-22 23:59:15 +00:00
2017-03-20 19:38:19 +00:00
DISPLAYLEVEL ( 4 , " number of sequences: %u \n " , ( U32 ) nbSeq ) ;
2017-02-22 23:59:15 +00:00
return ( BYTE * ) frame - > data - blockStart ;
}
static void writeBlock ( U32 * seed , frame_t * frame , size_t contentSize ,
2017-06-19 23:23:00 +00:00
int lastBlock , dictInfo info )
2017-02-22 23:59:15 +00:00
{
int const blockTypeDesc = RAND ( seed ) % 8 ;
size_t blockSize ;
int blockType ;
BYTE * const header = ( BYTE * ) frame - > data ;
BYTE * op = header + 3 ;
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 4 , " block: \n " ) ;
DISPLAYLEVEL ( 4 , " block content size: %u \n " , ( U32 ) contentSize ) ;
DISPLAYLEVEL ( 4 , " last block: %s \n " , lastBlock ? " yes " : " no " ) ;
2017-02-22 23:59:15 +00:00
if ( blockTypeDesc = = 0 ) {
/* Raw data frame */
RAND_buffer ( seed , frame - > src , contentSize ) ;
memcpy ( op , frame - > src , contentSize ) ;
op + = contentSize ;
blockType = 0 ;
blockSize = contentSize ;
} else if ( blockTypeDesc = = 1 ) {
/* RLE */
BYTE const symbol = RAND ( seed ) & 0xff ;
op [ 0 ] = symbol ;
memset ( frame - > src , symbol , contentSize ) ;
op + + ;
blockType = 1 ;
blockSize = contentSize ;
} else {
/* compressed, most common */
size_t compressedSize ;
blockType = 2 ;
frame - > oldStats = frame - > stats ;
frame - > data = op ;
2017-06-19 23:23:00 +00:00
compressedSize = writeCompressedBlock ( seed , frame , contentSize , info ) ;
2017-10-07 22:19:52 +00:00
if ( compressedSize > = contentSize ) { /* compressed block must be strictly smaller than uncompressed one */
2017-02-22 23:59:15 +00:00
blockType = 0 ;
memcpy ( op , frame - > src , contentSize ) ;
op + = contentSize ;
blockSize = contentSize ; /* fall back on raw block if data doesn't
compress */
frame - > stats = frame - > oldStats ; /* don't update the stats */
} else {
op + = compressedSize ;
blockSize = compressedSize ;
}
}
frame - > src = ( BYTE * ) frame - > src + contentSize ;
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 4 , " block type: %s \n " , BLOCK_TYPES [ blockType ] ) ;
DISPLAYLEVEL ( 4 , " block size field: %u \n " , ( U32 ) blockSize ) ;
2017-02-22 23:59:15 +00:00
2017-03-20 23:02:24 +00:00
header [ 0 ] = ( BYTE ) ( ( lastBlock | ( blockType < < 1 ) | ( blockSize < < 3 ) ) & 0xff ) ;
MEM_writeLE16 ( header + 1 , ( U16 ) ( blockSize > > 5 ) ) ;
2017-02-22 23:59:15 +00:00
frame - > data = op ;
}
2017-06-19 23:23:00 +00:00
static void writeBlocks ( U32 * seed , frame_t * frame , dictInfo info )
2017-02-22 23:59:15 +00:00
{
size_t contentLeft = frame - > header . contentSize ;
2017-09-13 22:07:03 +00:00
size_t const maxBlockSize = MIN ( g_maxBlockSize , frame - > header . windowSize ) ;
2017-02-22 23:59:15 +00:00
while ( 1 ) {
/* 1 in 4 chance of ending frame */
int const lastBlock = contentLeft > maxBlockSize ? 0 : ! ( RAND ( seed ) & 3 ) ;
size_t blockContentSize ;
if ( lastBlock ) {
blockContentSize = contentLeft ;
} else {
if ( contentLeft > 0 & & ( RAND ( seed ) & 7 ) ) {
2017-10-07 22:19:52 +00:00
/* some variable size block */
2017-02-22 23:59:15 +00:00
blockContentSize = RAND ( seed ) % ( MIN ( maxBlockSize , contentLeft ) + 1 ) ;
} else if ( contentLeft > maxBlockSize & & ( RAND ( seed ) & 1 ) ) {
2017-10-07 22:19:52 +00:00
/* some full size block */
2017-02-22 23:59:15 +00:00
blockContentSize = maxBlockSize ;
} else {
2017-10-07 22:19:52 +00:00
/* some empty block */
2017-02-22 23:59:15 +00:00
blockContentSize = 0 ;
}
}
2017-06-19 23:23:00 +00:00
writeBlock ( seed , frame , blockContentSize , lastBlock , info ) ;
2017-02-22 23:59:15 +00:00
contentLeft - = blockContentSize ;
if ( lastBlock ) break ;
}
}
static void writeChecksum ( frame_t * frame )
{
/* write checksum so implementations can verify their output */
U64 digest = XXH64 ( frame - > srcStart , ( BYTE * ) frame - > src - ( BYTE * ) frame - > srcStart , 0 ) ;
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 3 , " checksum: %08x \n " , ( U32 ) digest ) ;
2017-02-22 23:59:15 +00:00
MEM_writeLE32 ( frame - > data , ( U32 ) digest ) ;
frame - > data = ( BYTE * ) frame - > data + 4 ;
}
static void outputBuffer ( const void * buf , size_t size , const char * const path )
{
/* write data out to file */
const BYTE * ip = ( const BYTE * ) buf ;
FILE * out ;
if ( path ) {
out = fopen ( path , " wb " ) ;
} else {
out = stdout ;
}
if ( ! out ) {
fprintf ( stderr , " Failed to open file at %s: " , path ) ;
perror ( NULL ) ;
exit ( 1 ) ;
}
2017-10-07 22:19:52 +00:00
{ size_t fsize = size ;
2017-02-22 23:59:15 +00:00
size_t written = 0 ;
while ( written < fsize ) {
written + = fwrite ( ip + written , 1 , fsize - written , out ) ;
if ( ferror ( out ) ) {
fprintf ( stderr , " Failed to write to file at %s: " , path ) ;
perror ( NULL ) ;
exit ( 1 ) ;
}
}
}
if ( path ) {
fclose ( out ) ;
}
}
static void initFrame ( frame_t * fr )
{
memset ( fr , 0 , sizeof ( * fr ) ) ;
fr - > data = fr - > dataStart = FRAME_BUFFER ;
fr - > dataEnd = FRAME_BUFFER + sizeof ( FRAME_BUFFER ) ;
fr - > src = fr - > srcStart = CONTENT_BUFFER ;
fr - > srcEnd = CONTENT_BUFFER + sizeof ( CONTENT_BUFFER ) ;
/* init repeat codes */
fr - > stats . rep [ 0 ] = 1 ;
fr - > stats . rep [ 1 ] = 4 ;
fr - > stats . rep [ 2 ] = 8 ;
}
2017-09-12 21:29:39 +00:00
/**
* Generated a single zstd compressed block with no block / frame header .
* Returns the final seed .
*/
static U32 generateCompressedBlock ( U32 seed , frame_t * frame , dictInfo info )
{
size_t blockContentSize ;
int blockWritten = 0 ;
BYTE * op ;
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 4 , " block seed: %u \n " , seed ) ;
2017-09-12 21:29:39 +00:00
initFrame ( frame ) ;
op = ( BYTE * ) frame - > data ;
while ( ! blockWritten ) {
size_t cSize ;
/* generate window size */
2017-10-07 22:19:52 +00:00
{ int const exponent = RAND ( & seed ) % ( MAX_WINDOW_LOG - 10 ) ;
2017-09-12 21:29:39 +00:00
int const mantissa = RAND ( & seed ) % 8 ;
frame - > header . windowSize = ( 1U < < ( exponent + 10 ) ) ;
frame - > header . windowSize + = ( frame - > header . windowSize / 8 ) * mantissa ;
}
/* generate content size */
2017-10-07 22:19:52 +00:00
{ size_t const maxBlockSize = MIN ( g_maxBlockSize , frame - > header . windowSize ) ;
2017-09-12 21:29:39 +00:00
if ( RAND ( & seed ) & 15 ) {
/* some full size blocks */
blockContentSize = maxBlockSize ;
2017-09-13 22:07:03 +00:00
} else if ( RAND ( & seed ) & 7 & & g_maxBlockSize > = ( 1U < < 7 ) ) {
2017-09-12 21:29:39 +00:00
/* some small blocks <= 128 bytes*/
blockContentSize = RAND ( & seed ) % ( 1U < < 7 ) ;
} else {
/* some variable size blocks */
blockContentSize = RAND ( & seed ) % maxBlockSize ;
}
}
/* try generating a compressed block */
frame - > oldStats = frame - > stats ;
frame - > data = op ;
cSize = writeCompressedBlock ( & seed , frame , blockContentSize , info ) ;
2017-10-07 22:19:52 +00:00
if ( cSize > = blockContentSize ) { /* compressed size must be strictly smaller than decompressed size : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#blocks */
2017-09-12 21:29:39 +00:00
/* data doesn't compress -- try again */
frame - > stats = frame - > oldStats ; /* don't update the stats */
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 5 , " can't compress block : try again \n " ) ;
2017-09-12 21:29:39 +00:00
} else {
blockWritten = 1 ;
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 4 , " block size: %u \n " , ( U32 ) cSize ) ;
2017-09-12 21:29:39 +00:00
frame - > src = ( BYTE * ) frame - > src + blockContentSize ;
}
}
return seed ;
}
2017-02-22 23:59:15 +00:00
/* Return the final seed */
2017-06-19 23:23:00 +00:00
static U32 generateFrame ( U32 seed , frame_t * fr , dictInfo info )
2017-02-22 23:59:15 +00:00
{
/* generate a complete frame */
2017-10-07 22:19:52 +00:00
DISPLAYLEVEL ( 3 , " frame seed: %u \n " , seed ) ;
2017-02-22 23:59:15 +00:00
initFrame ( fr ) ;
2017-06-19 23:23:00 +00:00
writeFrameHeader ( & seed , fr , info ) ;
writeBlocks ( & seed , fr , info ) ;
2017-02-22 23:59:15 +00:00
writeChecksum ( fr ) ;
return seed ;
}
2017-06-19 23:23:00 +00:00
/*_*******************************************************
* Dictionary Helper Functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* returns 0 if successful, otherwise returns 1 upon error */
2017-10-07 22:19:52 +00:00
static int genRandomDict ( U32 dictID , U32 seed , size_t dictSize , BYTE * fullDict )
{
2017-06-20 17:12:13 +00:00
/* allocate space for samples */
2017-06-22 01:24:19 +00:00
int ret = 0 ;
2017-06-20 17:12:13 +00:00
unsigned const numSamples = 4 ;
2017-06-20 22:31:54 +00:00
size_t sampleSizes [ 4 ] ;
2017-06-22 22:52:33 +00:00
BYTE * const samples = malloc ( 5000 * sizeof ( BYTE ) ) ;
2017-06-22 01:24:19 +00:00
if ( samples = = NULL ) {
DISPLAY ( " Error: could not allocate space for samples \n " ) ;
return 1 ;
}
2017-06-19 23:23:00 +00:00
2017-06-20 17:12:13 +00:00
/* generate samples */
2017-10-07 22:19:52 +00:00
{ unsigned literalValue = 1 ;
2017-06-22 01:30:27 +00:00
unsigned samplesPos = 0 ;
2017-06-20 17:12:13 +00:00
size_t currSize = 1 ;
2017-06-22 01:30:27 +00:00
while ( literalValue < = 4 ) {
sampleSizes [ literalValue - 1 ] = currSize ;
2017-10-07 22:19:52 +00:00
{ size_t k ;
2017-06-20 22:31:54 +00:00
for ( k = 0 ; k < currSize ; k + + ) {
2017-06-22 22:36:28 +00:00
* ( samples + ( samplesPos + + ) ) = ( BYTE ) literalValue ;
2017-10-07 22:19:52 +00:00
} }
2017-06-22 01:30:27 +00:00
literalValue + + ;
2017-06-20 17:12:13 +00:00
currSize * = 16 ;
2017-10-07 22:19:52 +00:00
} }
2017-06-19 23:23:00 +00:00
2017-10-07 22:19:52 +00:00
{ size_t dictWriteSize = 0 ;
2017-06-20 17:12:13 +00:00
ZDICT_params_t zdictParams ;
2017-06-28 19:19:37 +00:00
size_t const headerSize = MAX ( dictSize / 4 , 256 ) ;
size_t const dictContentSize = dictSize - headerSize ;
2017-06-20 17:12:13 +00:00
BYTE * const dictContent = fullDict + headerSize ;
if ( dictContentSize < ZDICT_CONTENTSIZE_MIN | | dictSize < ZDICT_DICTSIZE_MIN ) {
DISPLAY ( " Error: dictionary size is too small \n " ) ;
2017-06-22 01:24:19 +00:00
ret = 1 ;
goto exitGenRandomDict ;
2017-06-19 23:23:00 +00:00
}
2017-06-20 17:12:13 +00:00
/* init dictionary params */
memset ( & zdictParams , 0 , sizeof ( zdictParams ) ) ;
zdictParams . dictID = dictID ;
zdictParams . notificationLevel = 1 ;
/* fill in dictionary content */
RAND_buffer ( & seed , ( void * ) dictContent , dictContentSize ) ;
/* finalize dictionary with random samples */
dictWriteSize = ZDICT_finalizeDictionary ( fullDict , dictSize ,
dictContent , dictContentSize ,
samples , sampleSizes , numSamples ,
zdictParams ) ;
2017-06-19 23:23:00 +00:00
2017-06-20 17:14:03 +00:00
if ( ZDICT_isError ( dictWriteSize ) ) {
2017-06-19 23:23:00 +00:00
DISPLAY ( " Could not finalize dictionary: %s \n " , ZDICT_getErrorName ( dictWriteSize ) ) ;
2017-06-22 01:24:19 +00:00
ret = 1 ;
2017-06-19 23:23:00 +00:00
}
}
2017-06-20 17:12:13 +00:00
2017-06-22 01:24:19 +00:00
exitGenRandomDict :
free ( samples ) ;
return ret ;
2017-06-19 23:23:00 +00:00
}
static dictInfo initDictInfo ( int useDict , size_t dictContentSize , BYTE * dictContent , U32 dictID ) {
/* allocate space statically */
dictInfo dictOp ;
2017-06-22 00:46:37 +00:00
memset ( & dictOp , 0 , sizeof ( dictOp ) ) ;
2017-06-19 23:23:00 +00:00
dictOp . useDict = useDict ;
dictOp . dictContentSize = dictContentSize ;
dictOp . dictContent = dictContent ;
dictOp . dictID = dictID ;
return dictOp ;
}
2017-06-09 00:06:30 +00:00
2017-02-22 23:59:15 +00:00
/*-*******************************************************
* Test Mode
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
BYTE DECOMPRESSED_BUFFER [ MAX_DECOMPRESSED_SIZE ] ;
static size_t testDecodeSimple ( frame_t * fr )
{
/* test decoding the generated data with the simple API */
size_t const ret = ZSTD_decompress ( DECOMPRESSED_BUFFER , MAX_DECOMPRESSED_SIZE ,
fr - > dataStart , ( BYTE * ) fr - > data - ( BYTE * ) fr - > dataStart ) ;
if ( ZSTD_isError ( ret ) ) return ret ;
if ( memcmp ( DECOMPRESSED_BUFFER , fr - > srcStart ,
( BYTE * ) fr - > src - ( BYTE * ) fr - > srcStart ) ! = 0 ) {
return ERROR ( corruption_detected ) ;
}
return ret ;
}
static size_t testDecodeStreaming ( frame_t * fr )
{
/* test decoding the generated data with the streaming API */
ZSTD_DStream * zd = ZSTD_createDStream ( ) ;
ZSTD_inBuffer in ;
ZSTD_outBuffer out ;
size_t ret ;
if ( ! zd ) return ERROR ( memory_allocation ) ;
in . src = fr - > dataStart ;
in . pos = 0 ;
in . size = ( BYTE * ) fr - > data - ( BYTE * ) fr - > dataStart ;
out . dst = DECOMPRESSED_BUFFER ;
out . pos = 0 ;
out . size = ZSTD_DStreamOutSize ( ) ;
ZSTD_initDStream ( zd ) ;
while ( 1 ) {
ret = ZSTD_decompressStream ( zd , & out , & in ) ;
if ( ZSTD_isError ( ret ) ) goto cleanup ; /* error */
if ( ret = = 0 ) break ; /* frame is done */
/* force decoding to be done in chunks */
out . size + = MIN ( ZSTD_DStreamOutSize ( ) , MAX_DECOMPRESSED_SIZE - out . size ) ;
}
ret = out . pos ;
if ( memcmp ( out . dst , fr - > srcStart , out . pos ) ! = 0 ) {
return ERROR ( corruption_detected ) ;
}
cleanup :
ZSTD_freeDStream ( zd ) ;
return ret ;
}
2017-09-12 21:29:39 +00:00
static size_t testDecodeWithDict ( U32 seed , genType_e genType )
2017-06-20 00:13:00 +00:00
{
2017-06-20 17:17:38 +00:00
/* create variables */
2017-06-28 19:19:37 +00:00
size_t const dictSize = RAND ( & seed ) % ( 10 < < 20 ) + ZDICT_DICTSIZE_MIN + ZDICT_CONTENTSIZE_MIN ;
2017-06-20 00:13:00 +00:00
U32 const dictID = RAND ( & seed ) ;
2017-06-20 00:18:43 +00:00
size_t errorDetected = 0 ;
2017-06-20 00:13:00 +00:00
BYTE * const fullDict = malloc ( dictSize ) ;
if ( fullDict = = NULL ) {
return ERROR ( GENERIC ) ;
}
2017-06-20 17:17:38 +00:00
/* generate random dictionary */
2017-10-07 22:19:52 +00:00
if ( genRandomDict ( dictID , seed , dictSize , fullDict ) ) { /* return 0 on success */
errorDetected = ERROR ( GENERIC ) ;
goto dictTestCleanup ;
2017-06-20 00:13:00 +00:00
}
2017-06-20 17:17:38 +00:00
2017-10-07 22:19:52 +00:00
{ frame_t fr ;
2017-09-12 21:29:39 +00:00
dictInfo info ;
ZSTD_DCtx * const dctx = ZSTD_createDCtx ( ) ;
size_t ret ;
2017-06-20 17:17:38 +00:00
2017-09-12 21:29:39 +00:00
/* get dict info */
2017-10-07 22:19:52 +00:00
{ size_t const headerSize = MAX ( dictSize / 4 , 256 ) ;
2017-06-28 19:19:37 +00:00
size_t const dictContentSize = dictSize - headerSize ;
BYTE * const dictContent = fullDict + headerSize ;
2017-09-12 21:29:39 +00:00
info = initDictInfo ( 1 , dictContentSize , dictContent , dictID ) ;
2017-06-20 00:43:16 +00:00
}
2017-06-20 00:13:00 +00:00
2017-06-20 17:17:38 +00:00
/* manually decompress and check difference */
2017-09-12 21:29:39 +00:00
if ( genType = = gt_frame ) {
/* Test frame */
2017-10-07 22:19:52 +00:00
generateFrame ( seed , & fr , info ) ;
2017-09-12 21:29:39 +00:00
ret = ZSTD_decompress_usingDict ( dctx , DECOMPRESSED_BUFFER , MAX_DECOMPRESSED_SIZE ,
fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart ,
fullDict , dictSize ) ;
} else {
/* Test block */
2017-10-07 22:19:52 +00:00
generateCompressedBlock ( seed , & fr , info ) ;
2017-09-12 21:29:39 +00:00
ret = ZSTD_decompressBegin_usingDict ( dctx , fullDict , dictSize ) ;
if ( ZSTD_isError ( ret ) ) {
errorDetected = ret ;
ZSTD_freeDCtx ( dctx ) ;
2017-06-20 00:43:16 +00:00
goto dictTestCleanup ;
}
2017-09-12 21:29:39 +00:00
ret = ZSTD_decompressBlock ( dctx , DECOMPRESSED_BUFFER , MAX_DECOMPRESSED_SIZE ,
fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart ) ;
}
ZSTD_freeDCtx ( dctx ) ;
if ( ZSTD_isError ( ret ) ) {
errorDetected = ret ;
goto dictTestCleanup ;
}
if ( memcmp ( DECOMPRESSED_BUFFER , fr . srcStart , ( BYTE * ) fr . src - ( BYTE * ) fr . srcStart ) ! = 0 ) {
errorDetected = ERROR ( corruption_detected ) ;
goto dictTestCleanup ;
2017-06-20 00:13:00 +00:00
}
}
dictTestCleanup :
free ( fullDict ) ;
return errorDetected ;
}
2017-09-12 21:29:39 +00:00
static size_t testDecodeRawBlock ( frame_t * fr )
{
ZSTD_DCtx * dctx = ZSTD_createDCtx ( ) ;
size_t ret = ZSTD_decompressBegin ( dctx ) ;
if ( ZSTD_isError ( ret ) ) return ret ;
ret = ZSTD_decompressBlock (
dctx ,
DECOMPRESSED_BUFFER , MAX_DECOMPRESSED_SIZE ,
fr - > dataStart , ( BYTE * ) fr - > data - ( BYTE * ) fr - > dataStart ) ;
ZSTD_freeDCtx ( dctx ) ;
if ( ZSTD_isError ( ret ) ) return ret ;
if ( memcmp ( DECOMPRESSED_BUFFER , fr - > srcStart ,
( BYTE * ) fr - > src - ( BYTE * ) fr - > srcStart ) ! = 0 ) {
return ERROR ( corruption_detected ) ;
}
return ret ;
}
static int runBlockTest ( U32 * seed )
{
frame_t fr ;
U32 const seedCopy = * seed ;
2017-10-07 22:19:52 +00:00
{ dictInfo const info = initDictInfo ( 0 , 0 , NULL , 0 ) ;
2017-09-12 21:29:39 +00:00
* seed = generateCompressedBlock ( * seed , & fr , info ) ;
}
{ size_t const r = testDecodeRawBlock ( & fr ) ;
if ( ZSTD_isError ( r ) ) {
DISPLAY ( " Error in block mode on test seed %u: %s \n " , seedCopy ,
ZSTD_getErrorName ( r ) ) ;
return 1 ;
}
}
2017-10-07 22:19:52 +00:00
{ size_t const r = testDecodeWithDict ( * seed , gt_block ) ;
2017-09-12 21:29:39 +00:00
if ( ZSTD_isError ( r ) ) {
DISPLAY ( " Error in block mode with dictionary on test seed %u: %s \n " ,
seedCopy , ZSTD_getErrorName ( r ) ) ;
return 1 ;
}
}
return 0 ;
}
static int runFrameTest ( U32 * seed )
{
frame_t fr ;
U32 const seedCopy = * seed ;
2017-10-07 22:19:52 +00:00
{ dictInfo const info = initDictInfo ( 0 , 0 , NULL , 0 ) ;
2017-09-12 21:29:39 +00:00
* seed = generateFrame ( * seed , & fr , info ) ;
}
{ size_t const r = testDecodeSimple ( & fr ) ;
if ( ZSTD_isError ( r ) ) {
2017-10-07 22:19:52 +00:00
DISPLAY ( " Error in simple mode on test seed %u: %s \n " ,
seedCopy , ZSTD_getErrorName ( r ) ) ;
2017-09-12 21:29:39 +00:00
return 1 ;
}
}
{ size_t const r = testDecodeStreaming ( & fr ) ;
if ( ZSTD_isError ( r ) ) {
2017-10-07 22:19:52 +00:00
DISPLAY ( " Error in streaming mode on test seed %u: %s \n " ,
seedCopy , ZSTD_getErrorName ( r ) ) ;
2017-09-12 21:29:39 +00:00
return 1 ;
}
}
2017-10-07 22:19:52 +00:00
{ size_t const r = testDecodeWithDict ( * seed , gt_frame ) ; /* avoid big dictionaries */
2017-09-12 21:29:39 +00:00
if ( ZSTD_isError ( r ) ) {
2017-10-07 22:19:52 +00:00
DISPLAY ( " Error in dictionary mode on test seed %u: %s \n " ,
seedCopy , ZSTD_getErrorName ( r ) ) ;
2017-09-12 21:29:39 +00:00
return 1 ;
}
}
return 0 ;
}
static int runTestMode ( U32 seed , unsigned numFiles , unsigned const testDurationS ,
genType_e genType )
2017-02-22 23:59:15 +00:00
{
unsigned fnum ;
2017-11-30 03:11:12 +00:00
UTIL_time_t const startClock = UTIL_getTime ( ) ;
U64 const maxClockSpan = testDurationS * SEC_TO_MICRO ;
2017-02-22 23:59:15 +00:00
if ( numFiles = = 0 & & ! testDurationS ) numFiles = 1 ;
DISPLAY ( " seed: %u \n " , seed ) ;
2017-11-30 03:11:12 +00:00
for ( fnum = 0 ; fnum < numFiles | | UTIL_clockSpanMicro ( startClock ) < maxClockSpan ; fnum + + ) {
2017-02-22 23:59:15 +00:00
if ( fnum < numFiles )
DISPLAYUPDATE ( " \r %u/%u " , fnum , numFiles ) ;
else
DISPLAYUPDATE ( " \r %u " , fnum ) ;
2017-10-07 22:19:52 +00:00
{ int const ret = ( genType = = gt_frame ) ?
runFrameTest ( & seed ) :
runBlockTest ( & seed ) ;
if ( ret ) return ret ;
2017-06-20 00:29:15 +00:00
}
2017-02-22 23:59:15 +00:00
}
DISPLAY ( " \r %u tests completed: " , fnum ) ;
DISPLAY ( " OK \n " ) ;
return 0 ;
}
/*-*******************************************************
* File I / O
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static int generateFile ( U32 seed , const char * const path ,
2017-09-12 21:29:39 +00:00
const char * const origPath , genType_e genType )
2017-02-22 23:59:15 +00:00
{
frame_t fr ;
2017-02-23 21:06:50 +00:00
DISPLAY ( " seed: %u \n " , seed ) ;
2017-10-07 22:19:52 +00:00
{ dictInfo const info = initDictInfo ( 0 , 0 , NULL , 0 ) ;
2017-09-12 21:29:39 +00:00
if ( genType = = gt_frame ) {
generateFrame ( seed , & fr , info ) ;
} else {
generateCompressedBlock ( seed , & fr , info ) ;
}
2017-06-19 23:23:00 +00:00
}
2017-02-22 23:59:15 +00:00
outputBuffer ( fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart , path ) ;
if ( origPath ) {
outputBuffer ( fr . srcStart , ( BYTE * ) fr . src - ( BYTE * ) fr . srcStart , origPath ) ;
}
return 0 ;
}
static int generateCorpus ( U32 seed , unsigned numFiles , const char * const path ,
2017-09-12 21:29:39 +00:00
const char * const origPath , genType_e genType )
2017-02-22 23:59:15 +00:00
{
char outPath [ MAX_PATH ] ;
unsigned fnum ;
DISPLAY ( " seed: %u \n " , seed ) ;
for ( fnum = 0 ; fnum < numFiles ; fnum + + ) {
frame_t fr ;
DISPLAYUPDATE ( " \r %u/%u " , fnum , numFiles ) ;
2017-10-07 22:19:52 +00:00
{ dictInfo const info = initDictInfo ( 0 , 0 , NULL , 0 ) ;
2017-09-12 21:29:39 +00:00
if ( genType = = gt_frame ) {
seed = generateFrame ( seed , & fr , info ) ;
} else {
seed = generateCompressedBlock ( seed , & fr , info ) ;
}
2017-06-19 23:23:00 +00:00
}
2017-02-22 23:59:15 +00:00
if ( snprintf ( outPath , MAX_PATH , " %s/z%06u.zst " , path , fnum ) + 1 > MAX_PATH ) {
DISPLAY ( " Error: path too long \n " ) ;
return 1 ;
}
outputBuffer ( fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart , outPath ) ;
if ( origPath ) {
if ( snprintf ( outPath , MAX_PATH , " %s/z%06u " , origPath , fnum ) + 1 > MAX_PATH ) {
DISPLAY ( " Error: path too long \n " ) ;
return 1 ;
}
outputBuffer ( fr . srcStart , ( BYTE * ) fr . src - ( BYTE * ) fr . srcStart , outPath ) ;
}
}
DISPLAY ( " \r %u/%u \n " , fnum , numFiles ) ;
return 0 ;
}
2017-06-09 00:06:30 +00:00
static int generateCorpusWithDict ( U32 seed , unsigned numFiles , const char * const path ,
2017-09-12 21:29:39 +00:00
const char * const origPath , const size_t dictSize ,
genType_e genType )
2017-06-09 00:06:30 +00:00
{
char outPath [ MAX_PATH ] ;
2017-06-13 18:54:43 +00:00
BYTE * fullDict ;
2017-06-19 21:20:35 +00:00
U32 const dictID = RAND ( & seed ) ;
2017-06-19 22:55:01 +00:00
int errorDetected = 0 ;
2017-06-16 01:06:32 +00:00
if ( snprintf ( outPath , MAX_PATH , " %s/dictionary " , path ) + 1 > MAX_PATH ) {
2017-06-09 00:06:30 +00:00
DISPLAY ( " Error: path too long \n " ) ;
return 1 ;
}
2017-06-19 22:55:01 +00:00
/* allocate space for the dictionary */
2017-06-13 18:54:43 +00:00
fullDict = malloc ( dictSize ) ;
2017-06-19 21:02:27 +00:00
if ( fullDict = = NULL ) {
DISPLAY ( " Error: could not allocate space for full dictionary. \n " ) ;
return 1 ;
}
2017-06-19 18:27:50 +00:00
2017-06-19 22:55:01 +00:00
/* randomly generate the dictionary */
2017-10-07 22:19:52 +00:00
{ int const ret = genRandomDict ( dictID , seed , dictSize , fullDict ) ;
2017-06-19 22:55:01 +00:00
if ( ret ! = 0 ) {
errorDetected = ret ;
goto dictCleanup ;
2017-06-13 21:19:35 +00:00
}
2017-06-09 00:06:30 +00:00
}
2017-06-19 22:55:01 +00:00
/* write out dictionary */
2017-06-20 18:07:38 +00:00
if ( numFiles ! = 0 ) {
if ( snprintf ( outPath , MAX_PATH , " %s/dictionary " , path ) + 1 > MAX_PATH ) {
DISPLAY ( " Error: dictionary path too long \n " ) ;
errorDetected = 1 ;
goto dictCleanup ;
}
outputBuffer ( fullDict , dictSize , outPath ) ;
}
else {
outputBuffer ( fullDict , dictSize , " dictionary " ) ;
2017-06-19 21:02:27 +00:00
}
2017-06-14 17:54:47 +00:00
2017-06-09 00:06:30 +00:00
/* generate random compressed/decompressed files */
2017-10-07 22:19:52 +00:00
{ unsigned fnum ;
2017-06-20 18:07:38 +00:00
for ( fnum = 0 ; fnum < MAX ( numFiles , 1 ) ; fnum + + ) {
2017-06-19 23:38:51 +00:00
frame_t fr ;
DISPLAYUPDATE ( " \r %u/%u " , fnum , numFiles ) ;
{
2017-06-28 19:19:37 +00:00
size_t const headerSize = MAX ( dictSize / 4 , 256 ) ;
size_t const dictContentSize = dictSize - headerSize ;
BYTE * const dictContent = fullDict + headerSize ;
2017-06-19 23:38:51 +00:00
dictInfo const info = initDictInfo ( 1 , dictContentSize , dictContent , dictID ) ;
2017-09-12 21:29:39 +00:00
if ( genType = = gt_frame ) {
seed = generateFrame ( seed , & fr , info ) ;
} else {
seed = generateCompressedBlock ( seed , & fr , info ) ;
}
2017-06-19 23:38:51 +00:00
}
2017-06-09 00:06:30 +00:00
2017-06-20 18:07:38 +00:00
if ( numFiles ! = 0 ) {
if ( snprintf ( outPath , MAX_PATH , " %s/z%06u.zst " , path , fnum ) + 1 > MAX_PATH ) {
2017-06-19 23:38:51 +00:00
DISPLAY ( " Error: path too long \n " ) ;
errorDetected = 1 ;
goto dictCleanup ;
2017-06-19 23:32:38 +00:00
}
2017-06-20 18:07:38 +00:00
outputBuffer ( fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart , outPath ) ;
if ( origPath ) {
if ( snprintf ( outPath , MAX_PATH , " %s/z%06u " , origPath , fnum ) + 1 > MAX_PATH ) {
DISPLAY ( " Error: path too long \n " ) ;
errorDetected = 1 ;
goto dictCleanup ;
}
outputBuffer ( fr . srcStart , ( BYTE * ) fr . src - ( BYTE * ) fr . srcStart , outPath ) ;
}
}
else {
outputBuffer ( fr . dataStart , ( BYTE * ) fr . data - ( BYTE * ) fr . dataStart , path ) ;
if ( origPath ) {
outputBuffer ( fr . srcStart , ( BYTE * ) fr . src - ( BYTE * ) fr . srcStart , origPath ) ;
}
2017-06-19 22:55:01 +00:00
}
2017-06-19 20:21:50 +00:00
}
2017-06-09 00:06:30 +00:00
}
2017-06-19 22:55:01 +00:00
dictCleanup :
2017-06-19 21:02:27 +00:00
free ( fullDict ) ;
2017-06-19 22:55:01 +00:00
return errorDetected ;
2017-06-09 00:06:30 +00:00
}
2017-02-22 23:59:15 +00:00
/*_*******************************************************
* Command line
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static U32 makeSeed ( void )
{
2017-03-20 23:02:24 +00:00
U32 t = ( U32 ) time ( NULL ) ;
2017-02-22 23:59:15 +00:00
return XXH32 ( & t , sizeof ( t ) , 0 ) % 65536 ;
}
static unsigned readInt ( const char * * argument )
{
unsigned val = 0 ;
while ( ( * * argument > = ' 0 ' ) & & ( * * argument < = ' 9 ' ) ) {
val * = 10 ;
val + = * * argument - ' 0 ' ;
( * argument ) + + ;
}
return val ;
}
static void usage ( const char * programName )
{
DISPLAY ( " Usage : \n " ) ;
DISPLAY ( " %s [args] \n " , programName ) ;
DISPLAY ( " \n " ) ;
DISPLAY ( " Arguments : \n " ) ;
DISPLAY ( " -p<path> : select output path (default:stdout) \n " ) ;
DISPLAY ( " in multiple files mode this should be a directory \n " ) ;
DISPLAY ( " -o<path> : select path to output original file (default:no output) \n " ) ;
DISPLAY ( " in multiple files mode this should be a directory \n " ) ;
DISPLAY ( " -s# : select seed (default:random based on time) \n " ) ;
DISPLAY ( " -n# : number of files to generate (default:1) \n " ) ;
DISPLAY ( " -t : activate test mode (test files against libzstd instead of outputting them) \n " ) ;
DISPLAY ( " -T# : length of time to run tests for \n " ) ;
DISPLAY ( " -v : increase verbosity level (default:0, max:7) \n " ) ;
2017-02-23 21:06:50 +00:00
DISPLAY ( " -h/H : display help/long help and exit \n " ) ;
}
static void advancedUsage ( const char * programName )
{
usage ( programName ) ;
DISPLAY ( " \n " ) ;
2017-09-13 22:07:03 +00:00
DISPLAY ( " Advanced arguments : \n " ) ;
DISPLAY ( " --content-size : always include the content size in the frame header \n " ) ;
DISPLAY ( " --use-dict=# : include a dictionary used to decompress the corpus \n " ) ;
DISPLAY ( " --gen-blocks : generate raw compressed blocks without block/frame headers \n " ) ;
DISPLAY ( " --max-block-size-log=# : max block size log, must be in range [2, 17] \n " ) ;
DISPLAY ( " --max-content-size-log=# : max content size log, must be <= 20 \n " ) ;
DISPLAY ( " (this is ignored with gen-blocks) \n " ) ;
2017-06-22 17:04:14 +00:00
}
/*! readU32FromChar() :
@ return : unsigned integer value read from input in ` char ` format
allows and interprets K , KB , KiB , M , MB and MiB suffix .
Will also modify ` * stringPtr ` , advancing it to position where it stopped reading .
Note : function result can overflow if digit string > MAX_UINT */
static unsigned readU32FromChar ( const char * * stringPtr )
{
unsigned result = 0 ;
while ( ( * * stringPtr > = ' 0 ' ) & & ( * * stringPtr < = ' 9 ' ) )
result * = 10 , result + = * * stringPtr - ' 0 ' , ( * stringPtr ) + + ;
if ( ( * * stringPtr = = ' K ' ) | | ( * * stringPtr = = ' M ' ) ) {
result < < = 10 ;
if ( * * stringPtr = = ' M ' ) result < < = 10 ;
( * stringPtr ) + + ;
if ( * * stringPtr = = ' i ' ) ( * stringPtr ) + + ;
if ( * * stringPtr = = ' B ' ) ( * stringPtr ) + + ;
}
return result ;
}
/** longCommandWArg() :
* check if * stringPtr is the same as longCommand .
* If yes , @ return 1 and advances * stringPtr to the position which immediately follows longCommand .
* @ return 0 and doesn ' t modify * stringPtr otherwise .
*/
static unsigned longCommandWArg ( const char * * stringPtr , const char * longCommand )
{
size_t const comSize = strlen ( longCommand ) ;
int const result = ! strncmp ( * stringPtr , longCommand , comSize ) ;
if ( result ) * stringPtr + = comSize ;
return result ;
2017-02-22 23:59:15 +00:00
}
int main ( int argc , char * * argv )
{
U32 seed = 0 ;
int seedset = 0 ;
unsigned numFiles = 0 ;
unsigned testDuration = 0 ;
int testMode = 0 ;
const char * path = NULL ;
const char * origPath = NULL ;
2017-06-19 23:23:00 +00:00
int useDict = 0 ;
2017-06-09 00:06:30 +00:00
unsigned dictSize = ( 10 < < 10 ) ; /* 10 kB default */
2017-09-12 21:29:39 +00:00
genType_e genType = gt_frame ;
2017-02-22 23:59:15 +00:00
int argNb ;
/* Check command line */
for ( argNb = 1 ; argNb < argc ; argNb + + ) {
const char * argument = argv [ argNb ] ;
if ( ! argument ) continue ; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if ( argument [ 0 ] = = ' - ' ) {
argument + + ;
while ( * argument ! = 0 ) {
switch ( * argument )
{
case ' h ' :
usage ( argv [ 0 ] ) ;
return 0 ;
2017-02-23 21:06:50 +00:00
case ' H ' :
advancedUsage ( argv [ 0 ] ) ;
return 0 ;
2017-02-22 23:59:15 +00:00
case ' v ' :
argument + + ;
g_displayLevel + + ;
break ;
case ' s ' :
argument + + ;
seedset = 1 ;
seed = readInt ( & argument ) ;
break ;
case ' n ' :
argument + + ;
numFiles = readInt ( & argument ) ;
break ;
case ' T ' :
argument + + ;
testDuration = readInt ( & argument ) ;
if ( * argument = = ' m ' ) {
testDuration * = 60 ;
argument + + ;
if ( * argument = = ' n ' ) argument + + ;
}
break ;
case ' o ' :
argument + + ;
origPath = argument ;
argument + = strlen ( argument ) ;
break ;
case ' p ' :
argument + + ;
path = argument ;
argument + = strlen ( argument ) ;
break ;
case ' t ' :
argument + + ;
testMode = 1 ;
break ;
2017-02-23 21:06:50 +00:00
case ' - ' :
argument + + ;
if ( strcmp ( argument , " content-size " ) = = 0 ) {
opts . contentSize = 1 ;
2017-06-22 17:04:14 +00:00
} else if ( longCommandWArg ( & argument , " use-dict= " ) ) {
dictSize = readU32FromChar ( & argument ) ;
2017-06-19 23:23:00 +00:00
useDict = 1 ;
2017-09-12 21:29:39 +00:00
} else if ( strcmp ( argument , " gen-blocks " ) = = 0 ) {
genType = gt_block ;
2017-09-13 22:07:03 +00:00
} else if ( longCommandWArg ( & argument , " max-block-size-log= " ) ) {
U32 value = readU32FromChar ( & argument ) ;
2017-10-07 22:19:52 +00:00
if ( value > = 2 & & value < = ZSTD_BLOCKSIZE_MAX ) {
2017-09-13 22:07:03 +00:00
g_maxBlockSize = 1U < < value ;
}
} else if ( longCommandWArg ( & argument , " max-content-size-log= " ) ) {
U32 value = readU32FromChar ( & argument ) ;
g_maxDecompressedSizeLog =
MIN ( MAX_DECOMPRESSED_SIZE_LOG , value ) ;
2017-02-23 21:06:50 +00:00
} else {
advancedUsage ( argv [ 0 ] ) ;
return 1 ;
}
argument + = strlen ( argument ) ;
break ;
2017-02-22 23:59:15 +00:00
default :
usage ( argv [ 0 ] ) ;
return 1 ;
} } } } /* for (argNb=1; argNb<argc; argNb++) */
if ( ! seedset ) {
seed = makeSeed ( ) ;
}
if ( testMode ) {
2017-09-12 21:29:39 +00:00
return runTestMode ( seed , numFiles , testDuration , genType ) ;
2017-02-22 23:59:15 +00:00
} else {
if ( testDuration ) {
DISPLAY ( " Error: -T requires test mode (-t) \n \n " ) ;
usage ( argv [ 0 ] ) ;
return 1 ;
}
}
2017-02-23 21:06:50 +00:00
if ( ! path ) {
DISPLAY ( " Error: path is required in file generation mode \n " ) ;
usage ( argv [ 0 ] ) ;
return 1 ;
}
2017-06-19 23:23:00 +00:00
if ( numFiles = = 0 & & useDict = = 0 ) {
2017-09-12 21:29:39 +00:00
return generateFile ( seed , path , origPath , genType ) ;
2017-06-19 23:23:00 +00:00
} else if ( useDict = = 0 ) {
2017-09-12 21:29:39 +00:00
return generateCorpus ( seed , numFiles , path , origPath , genType ) ;
2017-06-16 01:06:32 +00:00
} else {
/* should generate files with a dictionary */
2017-09-12 21:29:39 +00:00
return generateCorpusWithDict ( seed , numFiles , path , origPath , dictSize , genType ) ;
2017-02-22 23:59:15 +00:00
}
2017-06-09 00:06:30 +00:00
2017-02-22 23:59:15 +00:00
}