Add more statistics
This commit is contained in:
parent
55f960e8db
commit
2d8e6c6608
@ -37,6 +37,7 @@ typedef struct LDM_hashTable {
|
||||
// TODO: Scanning speed
|
||||
// TODO: Memory usage
|
||||
struct LDM_compressStats {
|
||||
U32 windowSizeLog, hashTableSizeLog;
|
||||
U32 numMatches;
|
||||
U64 totalMatchLength;
|
||||
U64 totalLiteralLength;
|
||||
@ -73,7 +74,9 @@ struct LDM_CCtx {
|
||||
|
||||
LDM_compressStats stats; /* Compression statistics */
|
||||
|
||||
LDM_hashEntry hashTable[LDM_HASHTABLESIZE_U32];
|
||||
LDM_hashEntry *hashTable;
|
||||
|
||||
// LDM_hashEntry hashTable[LDM_HASHTABLESIZE_U32];
|
||||
|
||||
const BYTE *lastPosHashed; /* Last position hashed */
|
||||
hash_t lastHash; /* Hash corresponding to lastPosHashed */
|
||||
@ -90,7 +93,7 @@ struct LDM_CCtx {
|
||||
const BYTE *DEBUG_setNextHash;
|
||||
};
|
||||
|
||||
void LDM_outputHashtableOccupancy(
|
||||
void LDM_outputHashTableOccupancy(
|
||||
const LDM_hashEntry *hashTable, U32 hashTableSize) {
|
||||
U32 i = 0;
|
||||
U32 ctr = 0;
|
||||
@ -104,9 +107,8 @@ void LDM_outputHashtableOccupancy(
|
||||
100.0 * (double)(ctr) / (double)hashTableSize);
|
||||
}
|
||||
|
||||
// TODO: This can be done more efficienctly but is not that important as it
|
||||
// is only used for computing stats.
|
||||
//
|
||||
// TODO: This can be done more efficiently (but it is not that important as it
|
||||
// is only used for computing stats).
|
||||
static int intLog2(U32 x) {
|
||||
int ret = 0;
|
||||
while (x >>= 1) {
|
||||
@ -115,30 +117,57 @@ static int intLog2(U32 x) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// TODO: Maybe we would eventually prefer to have linear rather than
|
||||
// exponential buckets.
|
||||
void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx) {
|
||||
int i = 0;
|
||||
int buckets[32] = { 0 };
|
||||
|
||||
printf("\n");
|
||||
printf("Hash table histogram\n");
|
||||
for (; i < LDM_HASHTABLESIZE_U32; i++) {
|
||||
int offset = (cctx->ip - cctx->ibase) - cctx->hashTable[i].offset;
|
||||
buckets[intLog2(offset)]++;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (; i < 32; i++) {
|
||||
printf("2^%*d: %10u %6.3f%%\n", 2, i,
|
||||
buckets[i],
|
||||
100.0 * (double) buckets[i] /
|
||||
(double) LDM_HASHTABLESIZE_U32);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||
int i = 0;
|
||||
printf("=====================\n");
|
||||
printf("Compression statistics\n");
|
||||
//TODO: compute percentage matched?
|
||||
printf("Window size, hash table size (bytes): 2^%u, 2^%u\n",
|
||||
stats->windowSizeLog, stats->hashTableSizeLog);
|
||||
printf("num matches, total match length: %u, %llu\n",
|
||||
stats->numMatches,
|
||||
stats->totalMatchLength);
|
||||
printf("avg match length: %.1f\n", ((double)stats->totalMatchLength) /
|
||||
(double)stats->numMatches);
|
||||
printf("avg literal length: %.1f\n",
|
||||
((double)stats->totalLiteralLength) / (double)stats->numMatches);
|
||||
printf("avg literal length, total literalLength: %.1f, %llu\n",
|
||||
((double)stats->totalLiteralLength) / (double)stats->numMatches,
|
||||
stats->totalLiteralLength);
|
||||
printf("avg offset length: %.1f\n",
|
||||
((double)stats->totalOffset) / (double)stats->numMatches);
|
||||
printf("min offset, max offset: %u %u\n",
|
||||
printf("min offset, max offset: %u, %u\n",
|
||||
stats->minOffset, stats->maxOffset);
|
||||
|
||||
printf("\n");
|
||||
printf("offset histogram\n");
|
||||
printf("offset histogram: offset, num matches, %% of matches\n");
|
||||
|
||||
for (; i <= intLog2(stats->maxOffset); i++) {
|
||||
printf("2^%*d: %10u %6.3f%%\n", 2, i,
|
||||
stats->offsetHistogram[i],
|
||||
100.0 * (double) stats->offsetHistogram[i] /
|
||||
(double)stats->numMatches);
|
||||
(double) stats->numMatches);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
@ -379,8 +408,12 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
|
||||
cctx->anchor = cctx->ibase;
|
||||
|
||||
memset(&(cctx->stats), 0, sizeof(cctx->stats));
|
||||
memset(cctx->hashTable, 0, sizeof(cctx->hashTable));
|
||||
cctx->hashTable = calloc(LDM_HASHTABLESIZE_U32, sizeof(LDM_hashEntry));
|
||||
// memset(cctx->hashTable, 0, sizeof(cctx->hashTable));
|
||||
cctx->stats.minOffset = UINT_MAX;
|
||||
cctx->stats.windowSizeLog = LDM_WINDOW_SIZE_LOG;
|
||||
cctx->stats.hashTableSizeLog = LDM_MEMORY_USAGE;
|
||||
|
||||
|
||||
cctx->lastPosHashed = NULL;
|
||||
|
||||
@ -417,7 +450,7 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match) {
|
||||
*match = getPositionOnHash(cctx, h);
|
||||
putHashOfCurrentPositionFromHash(cctx, h, sum);
|
||||
|
||||
} while (cctx->ip - *match > WINDOW_SIZE ||
|
||||
} while (cctx->ip - *match > LDM_WINDOW_SIZE ||
|
||||
!LDM_isValidMatch(cctx->ip, *match));
|
||||
setNextHash(cctx);
|
||||
return 0;
|
||||
@ -550,6 +583,8 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||
LDM_updateLastHashFromNextHash(&cctx);
|
||||
}
|
||||
|
||||
// LDM_outputHashTableOffsetHistogram(&cctx);
|
||||
|
||||
/* Encode the last literals (no more matches). */
|
||||
{
|
||||
const size_t lastRun = cctx.iend - cctx.anchor;
|
||||
@ -559,7 +594,7 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
||||
|
||||
#ifdef COMPUTE_STATS
|
||||
LDM_printCompressStats(&cctx.stats);
|
||||
LDM_outputHashtableOccupancy(cctx.hashTable, LDM_HASHTABLESIZE_U32);
|
||||
LDM_outputHashTableOccupancy(cctx.hashTable, LDM_HASHTABLESIZE_U32);
|
||||
#endif
|
||||
|
||||
return cctx.op - cctx.obase;
|
||||
|
@ -11,16 +11,17 @@
|
||||
#define LDM_OFFSET_SIZE 4
|
||||
|
||||
// Defines the size of the hash table.
|
||||
#define LDM_MEMORY_USAGE 20
|
||||
#define LDM_MEMORY_USAGE 16
|
||||
#define LDM_HASHLOG (LDM_MEMORY_USAGE-2)
|
||||
#define LDM_HASHTABLESIZE (1 << (LDM_MEMORY_USAGE))
|
||||
#define LDM_HASHTABLESIZE_U32 ((LDM_HASHTABLESIZE) >> 2)
|
||||
|
||||
#define WINDOW_SIZE (1 << 25)
|
||||
#define LDM_WINDOW_SIZE_LOG 25
|
||||
#define LDM_WINDOW_SIZE (1 << (LDM_WINDOW_SIZE_LOG))
|
||||
|
||||
//These should be multiples of four.
|
||||
#define LDM_MIN_MATCH_LENGTH 1024
|
||||
#define LDM_HASH_LENGTH 1024
|
||||
#define LDM_MIN_MATCH_LENGTH 4
|
||||
#define LDM_HASH_LENGTH 4
|
||||
|
||||
typedef U32 offset_t;
|
||||
typedef U32 hash_t;
|
||||
@ -70,9 +71,17 @@ void LDM_initializeCCtx(LDM_CCtx *cctx,
|
||||
* Prints the percentage of the hash table occupied (where occupied is defined
|
||||
* as the entry being non-zero).
|
||||
*/
|
||||
void LDM_outputHashtableOccupancy(const LDM_hashEntry *hashTable,
|
||||
void LDM_outputHashTableOccupancy(const LDM_hashEntry *hashTable,
|
||||
U32 hashTableSize);
|
||||
|
||||
/**
|
||||
* Prints the distribution of offsets in the hash table.
|
||||
*
|
||||
* The offsets are defined as the distance of the hash table entry from the
|
||||
* current input position of the cctx.
|
||||
*/
|
||||
void LDM_outputHashTableOffsetHistogram(const LDM_CCtx *cctx);
|
||||
|
||||
/**
|
||||
* Outputs compression statistics to stdout.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user