Fix overflow bug when calculating hash
This commit is contained in:
parent
0b8fb1703b
commit
1a188fe864
@ -25,7 +25,7 @@ LDFLAGS += -lzstd
|
|||||||
|
|
||||||
default: all
|
default: all
|
||||||
|
|
||||||
all: main-circular-buffer main-integrated main-64
|
all: main-circular-buffer main-integrated main-64
|
||||||
|
|
||||||
#main-basic : basic_table.c ldm.c main-ldm.c
|
#main-basic : basic_table.c ldm.c main-ldm.c
|
||||||
# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
# $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
|
||||||
@ -41,6 +41,6 @@ main-integrated: ldm_with_table.c main-ldm.c
|
|||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
@rm -f core *.o tmp* result* *.ldm *.ldm.dec \
|
||||||
main-basic main-circular-buffer main-integrated main-64
|
main-circular-buffer main-integrated main-64
|
||||||
@echo Cleaning completed
|
@echo Cleaning completed
|
||||||
|
|
||||||
|
@ -520,8 +520,8 @@ size_t LDM_compress(const void *src, size_t srcSize,
|
|||||||
void *dst, size_t maxDstSize) {
|
void *dst, size_t maxDstSize) {
|
||||||
LDM_CCtx cctx;
|
LDM_CCtx cctx;
|
||||||
const BYTE *match = NULL;
|
const BYTE *match = NULL;
|
||||||
U32 forwardMatchLength = 0;
|
U64 forwardMatchLength = 0;
|
||||||
U32 backwardsMatchLength = 0;
|
U64 backwardsMatchLength = 0;
|
||||||
|
|
||||||
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
LDM_initializeCCtx(&cctx, src, srcSize, dst, maxDstSize);
|
||||||
LDM_outputConfiguration();
|
LDM_outputConfiguration();
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
// Note that this is not the number of buckets.
|
// Note that this is not the number of buckets.
|
||||||
// Currently this should be less than WINDOW_SIZE_LOG + 4?
|
// Currently this should be less than WINDOW_SIZE_LOG + 4?
|
||||||
#define LDM_MEMORY_USAGE 22
|
#define LDM_MEMORY_USAGE 22
|
||||||
#define HASH_BUCKET_SIZE_LOG 1 // MAX is 4 for now
|
#define HASH_BUCKET_SIZE_LOG 2 // MAX is 4 for now
|
||||||
|
|
||||||
// Defines the lag in inserting elements into the hash table.
|
// Defines the lag in inserting elements into the hash table.
|
||||||
#define LDM_LAG 0
|
#define LDM_LAG 0
|
||||||
@ -26,7 +26,8 @@
|
|||||||
#define LDM_MIN_MATCH_LENGTH 64
|
#define LDM_MIN_MATCH_LENGTH 64
|
||||||
#define LDM_HASH_LENGTH 64
|
#define LDM_HASH_LENGTH 64
|
||||||
|
|
||||||
|
#define TMP_EVICTION
|
||||||
|
#define TMP_TAG_INSERT
|
||||||
typedef struct LDM_compressStats LDM_compressStats;
|
typedef struct LDM_compressStats LDM_compressStats;
|
||||||
typedef struct LDM_CCtx LDM_CCtx;
|
typedef struct LDM_CCtx LDM_CCtx;
|
||||||
typedef struct LDM_DCtx LDM_DCtx;
|
typedef struct LDM_DCtx LDM_DCtx;
|
||||||
@ -99,12 +100,6 @@ void LDM_printCompressStats(const LDM_compressStats *stats);
|
|||||||
*/
|
*/
|
||||||
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch);
|
int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch);
|
||||||
|
|
||||||
/**
|
|
||||||
* Counts the number of bytes that match from pIn and pMatch,
|
|
||||||
* up to pInLimit.
|
|
||||||
*/
|
|
||||||
U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
|
|
||||||
const BYTE *pInLimit);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode the literal length followed by the literals.
|
* Encode the literal length followed by the literals.
|
||||||
|
@ -89,21 +89,16 @@ struct LDM_CCtx {
|
|||||||
|
|
||||||
const BYTE *lastPosHashed; /* Last position hashed */
|
const BYTE *lastPosHashed; /* Last position hashed */
|
||||||
hash_t lastHash; /* Hash corresponding to lastPosHashed */
|
hash_t lastHash; /* Hash corresponding to lastPosHashed */
|
||||||
U32 lastSum;
|
U64 lastSum;
|
||||||
|
|
||||||
const BYTE *nextIp; // TODO: this is redundant (ip + step)
|
const BYTE *nextIp; // TODO: this is redundant (ip + step)
|
||||||
const BYTE *nextPosHashed;
|
const BYTE *nextPosHashed;
|
||||||
U64 nextSum;
|
U64 nextSum;
|
||||||
|
|
||||||
// hash_t nextHash; /* Hash corresponding to nextPosHashed */
|
|
||||||
// U32 nextSum;
|
|
||||||
|
|
||||||
unsigned step; // ip step, should be 1.
|
unsigned step; // ip step, should be 1.
|
||||||
|
|
||||||
const BYTE *lagIp;
|
const BYTE *lagIp;
|
||||||
U64 lagSum;
|
U64 lagSum;
|
||||||
// hash_t lagHash;
|
|
||||||
// U32 lagSum;
|
|
||||||
|
|
||||||
U64 numHashInserts;
|
U64 numHashInserts;
|
||||||
// DEBUG
|
// DEBUG
|
||||||
@ -273,15 +268,15 @@ LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx,
|
|||||||
LDM_hashEntry *bucket = getBucket(table, hash);
|
LDM_hashEntry *bucket = getBucket(table, hash);
|
||||||
LDM_hashEntry *cur = bucket;
|
LDM_hashEntry *cur = bucket;
|
||||||
LDM_hashEntry *bestEntry = NULL;
|
LDM_hashEntry *bestEntry = NULL;
|
||||||
U32 bestMatchLength = 0;
|
U64 bestMatchLength = 0;
|
||||||
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
for (; cur < bucket + HASH_BUCKET_SIZE; ++cur) {
|
||||||
const BYTE *pMatch = cur->offset + cctx->ibase;
|
const BYTE *pMatch = cur->offset + cctx->ibase;
|
||||||
|
|
||||||
// Check checksum for faster check.
|
// Check checksum for faster check.
|
||||||
if (cur->checksum == checksum &&
|
if (cur->checksum == checksum &&
|
||||||
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
|
cctx->ip - pMatch <= LDM_WINDOW_SIZE) {
|
||||||
U32 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
|
U64 forwardMatchLength = ZSTD_count(cctx->ip, pMatch, cctx->iend);
|
||||||
U32 backwardMatchLength, totalMatchLength;
|
U64 backwardMatchLength, totalMatchLength;
|
||||||
|
|
||||||
// For speed.
|
// For speed.
|
||||||
if (forwardMatchLength < LDM_MIN_MATCH_LENGTH) {
|
if (forwardMatchLength < LDM_MIN_MATCH_LENGTH) {
|
||||||
@ -313,6 +308,8 @@ LDM_hashEntry *HASH_getBestEntry(const LDM_CCtx *cctx,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef TMP_EVICTION
|
||||||
|
|
||||||
void HASH_insert(LDM_hashTable *table,
|
void HASH_insert(LDM_hashTable *table,
|
||||||
const hash_t hash, const LDM_hashEntry entry) {
|
const hash_t hash, const LDM_hashEntry entry) {
|
||||||
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
|
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
|
||||||
@ -320,6 +317,17 @@ void HASH_insert(LDM_hashTable *table,
|
|||||||
table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
|
table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void HASH_insert(LDM_hashTable *table,
|
||||||
|
const hash_t hash, const LDM_hashEntry entry) {
|
||||||
|
*(getBucket(table, hash) + table->bucketOffsets[hash]) = entry;
|
||||||
|
table->bucketOffsets[hash]++;
|
||||||
|
table->bucketOffsets[hash] &= HASH_BUCKET_SIZE - 1;
|
||||||
|
}
|
||||||
|
#endif // TMP_EVICTION
|
||||||
|
|
||||||
|
|
||||||
U32 HASH_getSize(const LDM_hashTable *table) {
|
U32 HASH_getSize(const LDM_hashTable *table) {
|
||||||
return table->numBuckets;
|
return table->numBuckets;
|
||||||
}
|
}
|
||||||
@ -349,7 +357,7 @@ void HASH_outputTableOccupancy(const LDM_hashTable *table) {
|
|||||||
|
|
||||||
// TODO: This can be done more efficiently (but it is not that important as it
|
// TODO: This can be done more efficiently (but it is not that important as it
|
||||||
// is only used for computing stats).
|
// is only used for computing stats).
|
||||||
static int intLog2(U32 x) {
|
static int intLog2(U64 x) {
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
while (x >>= 1) {
|
while (x >>= 1) {
|
||||||
ret++;
|
ret++;
|
||||||
@ -357,32 +365,6 @@ static int intLog2(U32 x) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maybe we would eventually prefer to have linear rather than
|
|
||||||
// exponential buckets.
|
|
||||||
/**
|
|
||||||
void HASH_outputTableOffsetHistogram(const LDM_CCtx *cctx) {
|
|
||||||
U32 i = 0;
|
|
||||||
int buckets[32] = { 0 };
|
|
||||||
|
|
||||||
printf("\n");
|
|
||||||
printf("Hash table histogram\n");
|
|
||||||
for (; i < HASH_getSize(cctx->hashTable); i++) {
|
|
||||||
int offset = (cctx->ip - cctx->ibase) -
|
|
||||||
HASH_getEntryFromHash(cctx->hashTable, i)->offset;
|
|
||||||
buckets[intLog2(offset)]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
for (; i < 32; i++) {
|
|
||||||
printf("2^%*d: %10u %6.3f%%\n", 2, i,
|
|
||||||
buckets[i],
|
|
||||||
100.0 * (double) buckets[i] /
|
|
||||||
(double) HASH_getSize(cctx->hashTable));
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
void LDM_printCompressStats(const LDM_compressStats *stats) {
|
void LDM_printCompressStats(const LDM_compressStats *stats) {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
printf("=====================\n");
|
printf("=====================\n");
|
||||||
@ -436,22 +418,6 @@ int LDM_isValidMatch(const BYTE *pIn, const BYTE *pMatch) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
hash_t HASH_hashU32(U32 value) {
|
|
||||||
return ((value * 2654435761U) >> (32 - LDM_HASHLOG));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a sum computed from getChecksum to a hash value in the range
|
|
||||||
* of the hash table.
|
|
||||||
*/
|
|
||||||
#if 0
|
|
||||||
static hash_t checksumToHash(U32 sum) {
|
|
||||||
return HASH_hashU32(sum);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Upper LDM_HASH_LOG bits.
|
// Upper LDM_HASH_LOG bits.
|
||||||
static hash_t checksumToHash(U64 sum) {
|
static hash_t checksumToHash(U64 sum) {
|
||||||
return sum >> (64 - LDM_HASHLOG);
|
return sum >> (64 - LDM_HASHLOG);
|
||||||
@ -462,69 +428,19 @@ static U32 checksumFromHfHash(U64 hfHash) {
|
|||||||
return (hfHash >> (64 - 32 - LDM_HASHLOG)) & 0xFFFFFFFF;
|
return (hfHash >> (64 - 32 - LDM_HASHLOG)) & 0xFFFFFFFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/**
|
|
||||||
* Computes a checksum based on rsync's checksum.
|
|
||||||
*
|
|
||||||
* a(k,l) = \sum_{i = k}^l x_i (mod M)
|
|
||||||
* b(k,l) = \sum_{i = k}^l ((l - i + 1) * x_i) (mod M)
|
|
||||||
* checksum(k,l) = a(k,l) + 2^{16} * b(k,l)
|
|
||||||
*/
|
|
||||||
static U32 getChecksum(const BYTE *buf, U32 len) {
|
|
||||||
U32 i;
|
|
||||||
U32 s1, s2;
|
|
||||||
|
|
||||||
s1 = s2 = 0;
|
|
||||||
for (i = 0; i < (len - 4); i += 4) {
|
|
||||||
s2 += (4 * (s1 + buf[i])) + (3 * buf[i + 1]) +
|
|
||||||
(2 * buf[i + 2]) + (buf[i + 3]) +
|
|
||||||
(10 * CHECKSUM_CHAR_OFFSET);
|
|
||||||
s1 += buf[i] + buf[i + 1] + buf[i + 2] + buf[i + 3] +
|
|
||||||
+ (4 * CHECKSUM_CHAR_OFFSET);
|
|
||||||
|
|
||||||
}
|
|
||||||
for(; i < len; i++) {
|
|
||||||
s1 += buf[i] + CHECKSUM_CHAR_OFFSET;
|
|
||||||
s2 += s1;
|
|
||||||
}
|
|
||||||
return (s1 & 0xffff) + (s2 << 16);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static U64 getChecksum(const BYTE *buf, U32 len) {
|
static U64 getChecksum(const BYTE *buf, U32 len) {
|
||||||
static const U64 prime8bytes = 11400714785074694791ULL;
|
static const U64 prime8bytes = 11400714785074694791ULL;
|
||||||
|
|
||||||
// static const U64 prime8bytes = 5;
|
|
||||||
U64 ret = 0;
|
U64 ret = 0;
|
||||||
U32 i;
|
U32 i;
|
||||||
for (i = 0; i < len; i++) {
|
for (i = 0; i < len; i++) {
|
||||||
ret *= prime8bytes;
|
ret *= prime8bytes;
|
||||||
ret += buf[i] + CHECKSUM_CHAR_OFFSET;
|
ret += buf[i] + CHECKSUM_CHAR_OFFSET;
|
||||||
// printf("HERE %llu\n", ret);
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
/**
|
|
||||||
* Update a checksum computed from getChecksum(data, len).
|
|
||||||
*
|
|
||||||
* The checksum can be updated along its ends as follows:
|
|
||||||
* a(k+1, l+1) = (a(k,l) - x_k + x_{l+1}) (mod M)
|
|
||||||
* b(k+1, l+1) = (b(k,l) - (l-k+1)*x_k + (a(k+1,l+1)) (mod M)
|
|
||||||
*
|
|
||||||
* Thus toRemove should correspond to data[0].
|
|
||||||
*/
|
|
||||||
static U32 updateChecksum(U32 sum, U32 len,
|
|
||||||
BYTE toRemove, BYTE toAdd) {
|
|
||||||
U32 s1 = (sum & 0xffff) - toRemove + toAdd;
|
|
||||||
U32 s2 = (sum >> 16) - ((toRemove + CHECKSUM_CHAR_OFFSET) * len) + s1;
|
|
||||||
|
|
||||||
return (s1 & 0xffff) + (s2 << 16);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static U64 ipow(U64 base, U64 exp) {
|
static U64 ipow(U64 base, U64 exp) {
|
||||||
U64 ret = 1;
|
U64 ret = 1;
|
||||||
while (exp) {
|
while (exp) {
|
||||||
@ -542,6 +458,8 @@ static U64 updateChecksum(U64 sum, U32 len,
|
|||||||
// TODO: deduplicate.
|
// TODO: deduplicate.
|
||||||
static const U64 prime8bytes = 11400714785074694791ULL;
|
static const U64 prime8bytes = 11400714785074694791ULL;
|
||||||
|
|
||||||
|
// TODO: relying on compiler optimization here.
|
||||||
|
// The exponential can be calculated explicitly.
|
||||||
sum -= ((toRemove + CHECKSUM_CHAR_OFFSET) *
|
sum -= ((toRemove + CHECKSUM_CHAR_OFFSET) *
|
||||||
ipow(prime8bytes, len - 1));
|
ipow(prime8bytes, len - 1));
|
||||||
sum *= prime8bytes;
|
sum *= prime8bytes;
|
||||||
@ -558,7 +476,7 @@ static U64 updateChecksum(U64 sum, U32 len,
|
|||||||
*/
|
*/
|
||||||
static void setNextHash(LDM_CCtx *cctx) {
|
static void setNextHash(LDM_CCtx *cctx) {
|
||||||
#ifdef RUN_CHECKS
|
#ifdef RUN_CHECKS
|
||||||
U32 check;
|
U64 check;
|
||||||
if ((cctx->nextIp - cctx->ibase != 1) &&
|
if ((cctx->nextIp - cctx->ibase != 1) &&
|
||||||
(cctx->nextIp - cctx->DEBUG_setNextHash != 1)) {
|
(cctx->nextIp - cctx->DEBUG_setNextHash != 1)) {
|
||||||
printf("CHECK debug fail: %zu %zu\n", cctx->nextIp - cctx->ibase,
|
printf("CHECK debug fail: %zu %zu\n", cctx->nextIp - cctx->ibase,
|
||||||
@ -568,16 +486,11 @@ static void setNextHash(LDM_CCtx *cctx) {
|
|||||||
cctx->DEBUG_setNextHash = cctx->nextIp;
|
cctx->DEBUG_setNextHash = cctx->nextIp;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// cctx->nextSum = getChecksum((const char *)cctx->nextIp, LDM_HASH_LENGTH);
|
|
||||||
cctx->nextSum = updateChecksum(
|
cctx->nextSum = updateChecksum(
|
||||||
cctx->lastSum, LDM_HASH_LENGTH,
|
cctx->lastSum, LDM_HASH_LENGTH,
|
||||||
cctx->lastPosHashed[0],
|
cctx->lastPosHashed[0],
|
||||||
cctx->lastPosHashed[LDM_HASH_LENGTH]);
|
cctx->lastPosHashed[LDM_HASH_LENGTH]);
|
||||||
cctx->nextPosHashed = cctx->nextIp;
|
cctx->nextPosHashed = cctx->nextIp;
|
||||||
#if 0
|
|
||||||
cctx->nextHash = checksumToHash(cctx->nextSum);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if LDM_LAG
|
#if LDM_LAG
|
||||||
// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
|
// printf("LDM_LAG %zu\n", cctx->ip - cctx->lagIp);
|
||||||
@ -586,9 +499,6 @@ static void setNextHash(LDM_CCtx *cctx) {
|
|||||||
cctx->lagSum, LDM_HASH_LENGTH,
|
cctx->lagSum, LDM_HASH_LENGTH,
|
||||||
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
|
cctx->lagIp[0], cctx->lagIp[LDM_HASH_LENGTH]);
|
||||||
cctx->lagIp++;
|
cctx->lagIp++;
|
||||||
#if 0
|
|
||||||
cctx->lagHash = checksumToHash(cctx->lagSum);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -596,7 +506,7 @@ static void setNextHash(LDM_CCtx *cctx) {
|
|||||||
check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH);
|
check = getChecksum(cctx->nextIp, LDM_HASH_LENGTH);
|
||||||
|
|
||||||
if (check != cctx->nextSum) {
|
if (check != cctx->nextSum) {
|
||||||
printf("CHECK: setNextHash failed %u %u\n", check, cctx->nextSum);
|
printf("CHECK: setNextHash failed %llu %llu\n", check, cctx->nextSum);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((cctx->nextIp - cctx->lastPosHashed) != 1) {
|
if ((cctx->nextIp - cctx->lastPosHashed) != 1) {
|
||||||
@ -610,8 +520,6 @@ static void setNextHash(LDM_CCtx *cctx) {
|
|||||||
static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hfHash) {
|
static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hfHash) {
|
||||||
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
// Hash only every HASH_ONLY_EVERY times, based on cctx->ip.
|
||||||
// Note: this works only when cctx->step is 1.
|
// Note: this works only when cctx->step is 1.
|
||||||
U32 hash = checksumToHash(hfHash);
|
|
||||||
U32 sum = checksumFromHfHash(hfHash);
|
|
||||||
// printf("TMP %u %u %llu\n", hash, sum, hfHash);
|
// printf("TMP %u %u %llu\n", hash, sum, hfHash);
|
||||||
|
|
||||||
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
if (((cctx->ip - cctx->ibase) & HASH_ONLY_EVERY) == HASH_ONLY_EVERY) {
|
||||||
@ -619,22 +527,26 @@ static void putHashOfCurrentPositionFromHash(LDM_CCtx *cctx, U64 hfHash) {
|
|||||||
#if LDM_LAG
|
#if LDM_LAG
|
||||||
// TODO: off by 1, but whatever
|
// TODO: off by 1, but whatever
|
||||||
if (cctx->lagIp - cctx->ibase > 0) {
|
if (cctx->lagIp - cctx->ibase > 0) {
|
||||||
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, cctx->lagSum };
|
U32 hash = checksumToHash(cctx->lagSum);
|
||||||
HASH_insert(cctx->hashTable, cctx->lagHash, entry);
|
U32 sum = checksumFromHfHash(cctx->lagSum);
|
||||||
|
const LDM_hashEntry entry = { cctx->lagIp - cctx->ibase, sum };
|
||||||
|
HASH_insert(cctx->hashTable, hash, entry);
|
||||||
} else {
|
} else {
|
||||||
|
U32 hash = checksumToHash(hfHash);
|
||||||
|
U32 sum = checksumFromHfHash(hfHash);
|
||||||
|
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
|
||||||
HASH_insert(cctx->hashTable, hash, entry);
|
HASH_insert(cctx->hashTable, hash, entry);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
U32 hash = checksumToHash(hfHash);
|
||||||
|
U32 sum = checksumFromHfHash(hfHash);
|
||||||
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
|
const LDM_hashEntry entry = { cctx->ip - cctx->ibase, sum };
|
||||||
HASH_insert(cctx->hashTable, hash, entry);
|
HASH_insert(cctx->hashTable, hash, entry);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
cctx->lastPosHashed = cctx->ip;
|
cctx->lastPosHashed = cctx->ip;
|
||||||
#if 0
|
|
||||||
cctx->lastHash = hash;
|
|
||||||
#endif
|
|
||||||
cctx->lastSum = hfHash;
|
cctx->lastSum = hfHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -650,9 +562,6 @@ static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) {
|
|||||||
printf("CHECK failed: updateLastHashFromNextHash %zu\n",
|
printf("CHECK failed: updateLastHashFromNextHash %zu\n",
|
||||||
cctx->ip - cctx->ibase);
|
cctx->ip - cctx->ibase);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
#if 0
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, cctx->nextHash, cctx->nextSum);
|
|
||||||
#endif
|
#endif
|
||||||
putHashOfCurrentPositionFromHash(cctx, cctx->nextSum);
|
putHashOfCurrentPositionFromHash(cctx, cctx->nextSum);
|
||||||
}
|
}
|
||||||
@ -661,10 +570,7 @@ static void LDM_updateLastHashFromNextHash(LDM_CCtx *cctx) {
|
|||||||
* Insert hash of the current position into the hash table.
|
* Insert hash of the current position into the hash table.
|
||||||
*/
|
*/
|
||||||
static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
|
static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
|
||||||
U32 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH);
|
U64 sum = getChecksum(cctx->ip, LDM_HASH_LENGTH);
|
||||||
#if 0
|
|
||||||
hash_t hash = checksumToHash(sum);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef RUN_CHECKS
|
#ifdef RUN_CHECKS
|
||||||
if (cctx->nextPosHashed != cctx->ip && (cctx->ip != cctx->ibase)) {
|
if (cctx->nextPosHashed != cctx->ip && (cctx->ip != cctx->ibase)) {
|
||||||
@ -672,13 +578,11 @@ static void LDM_putHashOfCurrentPosition(LDM_CCtx *cctx) {
|
|||||||
cctx->ip - cctx->ibase);
|
cctx->ip - cctx->ibase);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#if 0
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, hash, sum);
|
|
||||||
#endif
|
|
||||||
putHashOfCurrentPositionFromHash(cctx, sum);
|
putHashOfCurrentPositionFromHash(cctx, sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
|
U64 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
|
||||||
const BYTE *pInLimit) {
|
const BYTE *pInLimit) {
|
||||||
const BYTE * const pStart = pIn;
|
const BYTE * const pStart = pIn;
|
||||||
while (pIn < pInLimit - 1) {
|
while (pIn < pInLimit - 1) {
|
||||||
@ -688,9 +592,9 @@ U32 LDM_countMatchLength(const BYTE *pIn, const BYTE *pMatch,
|
|||||||
pMatch++;
|
pMatch++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
return (U32)(pIn - pStart);
|
return (U64)(pIn - pStart);
|
||||||
}
|
}
|
||||||
return (U32)(pIn - pStart);
|
return (U64)(pIn - pStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
void LDM_outputConfiguration(void) {
|
void LDM_outputConfiguration(void) {
|
||||||
@ -773,10 +677,6 @@ static int LDM_findBestMatch(LDM_CCtx *cctx, const BYTE **match,
|
|||||||
U64 hash;
|
U64 hash;
|
||||||
U32 sum;
|
U32 sum;
|
||||||
setNextHash(cctx);
|
setNextHash(cctx);
|
||||||
#if 0
|
|
||||||
h = cctx->nextHash;
|
|
||||||
sum = cctx->nextSum;
|
|
||||||
#endif
|
|
||||||
hash = cctx->nextSum;
|
hash = cctx->nextSum;
|
||||||
h = checksumToHash(hash);
|
h = checksumToHash(hash);
|
||||||
sum = checksumFromHfHash(hash);
|
sum = checksumFromHfHash(hash);
|
||||||
|
@ -94,9 +94,10 @@ static int compress(const char *fname, const char *oname) {
|
|||||||
// Truncate file to compressedSize.
|
// Truncate file to compressedSize.
|
||||||
ftruncate(fdout, compressedSize);
|
ftruncate(fdout, compressedSize);
|
||||||
|
|
||||||
printf("%25s : %10lu -> %10lu - %s (%.1f%%)\n", fname,
|
printf("%25s : %10lu -> %10lu - %s (%.2fx --- %.1f%%)\n", fname,
|
||||||
(size_t)statbuf.st_size, (size_t)compressedSize, oname,
|
(size_t)statbuf.st_size, (size_t)compressedSize, oname,
|
||||||
(double)compressedSize / (statbuf.st_size) * 100);
|
(statbuf.st_size) / (double)compressedSize,
|
||||||
|
(double)compressedSize / (double)(statbuf.st_size) * 100.0);
|
||||||
|
|
||||||
timeTaken = (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
|
timeTaken = (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 +
|
||||||
(double) (tv2.tv_sec - tv1.tv_sec),
|
(double) (tv2.tv_sec - tv1.tv_sec),
|
||||||
|
Loading…
Reference in New Issue
Block a user