added code for generating dictionary/test files randomly. Still need to make sure dictionary ID matches
This commit is contained in:
parent
2a39ac5486
commit
c2d909e396
@ -1121,11 +1121,10 @@ static void initFrame(frame_t* fr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Return the final seed */
|
/* Return the final seed */
|
||||||
static U32 generateFrame(U32 seed, frame_t* fr)
|
static U32 generateFrame(U32 seed, frame_t* fr, int genDict, size_t dictSize)
|
||||||
{
|
{
|
||||||
/* generate a complete frame */
|
/* generate a complete frame */
|
||||||
DISPLAYLEVEL(1, "frame seed: %u\n", seed);
|
DISPLAYLEVEL(1, "frame seed: %u\n", seed);
|
||||||
|
|
||||||
initFrame(fr);
|
initFrame(fr);
|
||||||
|
|
||||||
writeFrameHeader(&seed, fr);
|
writeFrameHeader(&seed, fr);
|
||||||
@ -1135,6 +1134,7 @@ static U32 generateFrame(U32 seed, frame_t* fr)
|
|||||||
return seed;
|
return seed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*-*******************************************************
|
/*-*******************************************************
|
||||||
* Test Mode
|
* Test Mode
|
||||||
*********************************************************/
|
*********************************************************/
|
||||||
@ -1215,7 +1215,7 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS
|
|||||||
else
|
else
|
||||||
DISPLAYUPDATE("\r%u ", fnum);
|
DISPLAYUPDATE("\r%u ", fnum);
|
||||||
|
|
||||||
seed = generateFrame(seed, &fr);
|
seed = generateFrame(seed, &fr, 0, 0);
|
||||||
|
|
||||||
{ size_t const r = testDecodeSimple(&fr);
|
{ size_t const r = testDecodeSimple(&fr);
|
||||||
if (ZSTD_isError(r)) {
|
if (ZSTD_isError(r)) {
|
||||||
@ -1250,7 +1250,7 @@ static int generateFile(U32 seed, const char* const path,
|
|||||||
|
|
||||||
DISPLAY("seed: %u\n", seed);
|
DISPLAY("seed: %u\n", seed);
|
||||||
|
|
||||||
generateFrame(seed, &fr);
|
generateFrame(seed, &fr, 0, 0);
|
||||||
|
|
||||||
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
|
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path);
|
||||||
if (origPath) {
|
if (origPath) {
|
||||||
@ -1272,7 +1272,7 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
|
|||||||
|
|
||||||
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
|
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
|
||||||
|
|
||||||
seed = generateFrame(seed, &fr);
|
seed = generateFrame(seed, &fr, 0, 0);
|
||||||
|
|
||||||
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
|
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
|
||||||
DISPLAY("Error: path too long\n");
|
DISPLAY("Error: path too long\n");
|
||||||
@ -1294,6 +1294,81 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path,
|
||||||
|
const char* const origPath, const size_t dictSize)
|
||||||
|
{
|
||||||
|
const size_t minDictSize = 8;
|
||||||
|
char outPath[MAX_PATH];
|
||||||
|
U32 dictID;
|
||||||
|
BYTE* dictStart;
|
||||||
|
unsigned fnum;
|
||||||
|
ZSTD_DCtx* dctx = ZSTD_createDCtx();
|
||||||
|
if(snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) {
|
||||||
|
DISPLAY("Error: path too long\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Generate the dictionary randomly first */
|
||||||
|
if(dictSize < minDictSize){
|
||||||
|
DISPLAY("Error: dictionary size (%zu) is too small\n", dictSize);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
/* variable declaration */
|
||||||
|
dictStart = malloc(dictSize);
|
||||||
|
size_t pos = 0;
|
||||||
|
dictID = RAND(&seed) + 1;
|
||||||
|
|
||||||
|
/* write dictionary magic number */
|
||||||
|
MEM_writeLE32(dictStart + pos, ZSTD_DICT_MAGIC);
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
/* write random dictionary ID */
|
||||||
|
MEM_writeLE32(dictStart + pos, dictID);
|
||||||
|
pos += 4;
|
||||||
|
|
||||||
|
/* randomly generate the rest of the dictionary */
|
||||||
|
RAND_buffer(&seed, dictStart + pos, dictSize-8);
|
||||||
|
outputBuffer(dictStart, dictSize, outPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* generate random compressed/decompressed files */
|
||||||
|
for (fnum = 0; fnum < numFiles; fnum++) {
|
||||||
|
frame_t fr;
|
||||||
|
size_t returnValue;
|
||||||
|
BYTE* decompressedPtr = malloc(MAX_DECOMPRESSED_SIZE);
|
||||||
|
|
||||||
|
DISPLAYUPDATE("\r%u/%u ", fnum, numFiles);
|
||||||
|
|
||||||
|
seed = generateFrame(seed, &fr, 1, dictSize);
|
||||||
|
|
||||||
|
if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) {
|
||||||
|
DISPLAY("Error: path too long\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath);
|
||||||
|
|
||||||
|
if (origPath) {
|
||||||
|
if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) {
|
||||||
|
DISPLAY("Error: path too long\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if asked, supply the decompressed version */
|
||||||
|
|
||||||
|
returnValue = ZSTD_decompress_usingDict(dctx, decompressedPtr, MAX_DECOMPRESSED_SIZE,
|
||||||
|
fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart,
|
||||||
|
dictStart,dictSize);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* write uncompressed versions of files */
|
||||||
|
DISPLAY("This is origPath: %s\nAnd this is numFiles: %d\n", origPath, numFiles);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*_*******************************************************
|
/*_*******************************************************
|
||||||
* Command line
|
* Command line
|
||||||
@ -1350,6 +1425,8 @@ int main(int argc, char** argv)
|
|||||||
int testMode = 0;
|
int testMode = 0;
|
||||||
const char* path = NULL;
|
const char* path = NULL;
|
||||||
const char* origPath = NULL;
|
const char* origPath = NULL;
|
||||||
|
int genDict = 0;
|
||||||
|
unsigned dictSize = (10 << 10); /* 10 kB default */
|
||||||
|
|
||||||
int argNb;
|
int argNb;
|
||||||
|
|
||||||
@ -1410,6 +1487,10 @@ int main(int argc, char** argv)
|
|||||||
argument++;
|
argument++;
|
||||||
if (strcmp(argument, "content-size") == 0) {
|
if (strcmp(argument, "content-size") == 0) {
|
||||||
opts.contentSize = 1;
|
opts.contentSize = 1;
|
||||||
|
} else if(strcmp(argument, "train-dict") == 0){
|
||||||
|
argument += 11;
|
||||||
|
dictSize = readInt(&argument);
|
||||||
|
genDict = 1;
|
||||||
} else {
|
} else {
|
||||||
advancedUsage(argv[0]);
|
advancedUsage(argv[0]);
|
||||||
return 1;
|
return 1;
|
||||||
@ -1441,9 +1522,16 @@ int main(int argc, char** argv)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numFiles == 0) {
|
if (numFiles == 0 && genDict == 0) {
|
||||||
return generateFile(seed, path, origPath);
|
return generateFile(seed, path, origPath);
|
||||||
} else {
|
} else if (genDict == 0){
|
||||||
return generateCorpus(seed, numFiles, path, origPath);
|
return generateCorpus(seed, numFiles, path, origPath);
|
||||||
|
} else if (numFiles == 0){
|
||||||
|
/* should generate a single file with a dictionary */
|
||||||
|
return generateCorpusWithDict(seed, 1, path, origPath, dictSize);
|
||||||
|
} else{
|
||||||
|
/* should generate multiple files with a dictionary */
|
||||||
|
return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user