zstd/lib/dictBuilder/cover.h
Jennifer Liu 9d6ed9def3 Merge fastCover into DictBuilder (#1274)
* Minor fix

* Run non-optimize FASTCOVER 5 times in benchmark

* Merge fastCover into dictBuilder

* Fix mixed declaration issue

* Add fastcover to symbol.c

* Add fastCover.c and cover.h to build

* Change fastCover.c to fastcover.c

* Update benchmark to run FASTCOVER in dictBuilder

* Undo spliting fastcover_param into cover_param and f

* Remove convert param functions

* Assign f to parameter

* Add zdict.h to Makefile in lib

* Add cover.h to BUCK

* Cast 1 to U64 before shifting

* Remove trimming of zero freq head and tail in selectSegment and rebenchmark

* Remove f as a separate parameter of tryParam

* Read 8 bytes when d is 6

* Add trimming off zero frequency head and tail

* Use best functions from COVER and remove trimming part(which leads to worse compression ratio after previous bugs were fixed)

* Add finalize= argument to FASTCOVER to specify percentage of training samples passed to ZDICT_finalizeDictionary

* Change nbDmer to always read 8 bytes even when d=6

* Add skip=# argument to allow skipping dmers in computeFrequency in FASTCOVER

* Update comments and benchmarking result

* Change default method of ZDICT_trainFromBuffer to ZDICT_optimizeTrainFromBuffer_fastCover

* Add dictType enum and fix bug about passing zParam when converting to coverParam

* Combine finalize and skip into a single parameter

* Update acceleration parameters and benchmark on 3 sample sets

* Change default splitPoint of FASTCOVER to 0.75 and benchmark first 3 sample sets

* Initialize variables outside of for loop in benchmark.c

* Update benchmark result for hg-manifest

* Remove cover.h from install-includes

* Add explanation of f

* Set default compression level for trainFromBuffer to 3

* Add assertion of fastCoverParams in DiB_trainFromFiles

* Add checkTotalCompressedSize function + some minor fixes

* Add test for multithreading fastCovr

* Initialize segmentFreqs in every FASTCOVER_selectSegment and move mutex_unnlock to end of COVER_best_finish

* Free segmentFreqs

* Initialize segmentFreqs before calling FASTCOVER_buildDictionary instead of in FASTCOVER_selectSegment

* Add FASTCOVER_MEMMULT

* Minor fix

* Update benchmarking result
2018-08-23 12:06:20 -07:00

84 lines
2.3 KiB
C

#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "zstd_internal.h" /* includes zstd.h */
#ifndef ZDICT_STATIC_LINKING_ONLY
#define ZDICT_STATIC_LINKING_ONLY
#endif
#include "zdict.h"
/**
* COVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
*
* All of the methods except COVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
*/
typedef struct COVER_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;
/**
* A segment is a range in the source as well as the score of the segment.
*/
typedef struct {
U32 begin;
U32 end;
U32 score;
} COVER_segment_t;
/**
* Checks total compressed size of a dictionary
*/
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
const size_t *samplesSizes, const BYTE *samples,
size_t *offsets,
size_t nbTrainSamples, size_t nbSamples,
BYTE *const dict, size_t dictBufferCapacity);
/**
* Returns the sum of the sample sizes.
*/
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
/**
* Initialize the `COVER_best_t`.
*/
void COVER_best_init(COVER_best_t *best);
/**
* Wait until liveJobs == 0.
*/
void COVER_best_wait(COVER_best_t *best);
/**
* Call COVER_best_wait() and then destroy the COVER_best_t.
*/
void COVER_best_destroy(COVER_best_t *best);
/**
* Called when a thread is about to be launched.
* Increments liveJobs.
*/
void COVER_best_start(COVER_best_t *best);
/**
* Called when a thread finishes executing, both on error or success.
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
*/
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
ZDICT_cover_params_t parameters, void *dict,
size_t dictSize);