Compare commits

...

3 Commits
dev ... hcdc

Author SHA1 Message Date
Yann Collet
64334afd8f fixed search
but slower than dev
and some traces are still present in new code
2021-04-27 01:07:27 -07:00
Yann Collet
182b2ab3e3 Merge branch 'dev' into hcdc 2021-04-26 22:41:09 -07:00
Yann Collet
4905e57e41 initial experiment
just simplify hash chain search
2021-04-26 22:33:08 -07:00

View File

@ -1289,6 +1289,119 @@ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen)
}
int
LZ4HC_InsertAndGetLongerMatch (
LZ4HC_CCtx_internal* hc4,
const BYTE* const ip,
const BYTE* const iHighLimit,
int longest,
const BYTE** matchpos,
const int maxNbAttempts)
{
U16* const chainTable = hc4->chainTable;
U32* const HashTable = hc4->hashTable;
const BYTE* const base = hc4->base;
const U32 ipIndex = (U32)(ip - base);
const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
const U32 maxOff = ipIndex - lowestMatchIndex;
int nbAttempts = maxNbAttempts;
U32 const pattern = LZ4_read32(ip);
U32 matchIndex;
DEBUGLOG(6, "LZ4HC_InsertAndGetLongerMatch (ipIndex = %u)", ipIndex);
LZ4HC_Insert(hc4, ip); /* Update chains up to ip (excluded) */
matchIndex = HashTable[LZ4HC_hashPtr(ip)];
DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)",
matchIndex, lowestMatchIndex);
#define printf(...) // DEBUGLOG(2, __VA_ARGS__)
/* single chain (head) */
while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
printf("candidate at pos: %u \n", matchIndex);
nbAttempts--;
assert(matchIndex < ipIndex);
{ /* within current Prefix */
const BYTE* const matchPtr = base + matchIndex;
assert(matchPtr < ip);
assert(longest >= 1);
if ( (matchPtr[longest] == ip[longest])
&& (LZ4_read32(matchPtr) == pattern) ) {
int const matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
if (matchLength > longest) {
printf("found better match of len %i at offset %u \n", matchLength, ipIndex - matchIndex);
longest = matchLength;
*matchpos = matchPtr;
if (matchIndex + (U32)matchLength < ipIndex) break; /* non-overlapping match -> let's move to 2-chains*/
} } }
matchIndex -= DELTANEXTU16(chainTable, matchIndex);
}
#if 1
if (nbAttempts == 0) return longest;
if (matchIndex < lowestMatchIndex) return longest;
printf("non-overlapping candidate at pos %u has length %i \n", matchIndex, longest);
U32 offset = ipIndex - matchIndex;
U32 headOff = offset + DELTANEXTU16(chainTable, matchIndex);
U32 tailOff = offset + DELTANEXTU16(chainTable, matchIndex + (U32)longest - (MINMATCH-0));
printf("initial headOff / tailOff = %u / %u \n", headOff, tailOff);
while ( (headOff <= maxOff) && (tailOff <= maxOff) && (headOff != tailOff) ) {
if (tailOff < headOff) {
assert(DELTANEXTU16(chainTable, ipIndex - tailOff + (U32)longest - (MINMATCH-0)) > 0);
tailOff = tailOff + DELTANEXTU16(chainTable, ipIndex - tailOff + (U32)longest - (MINMATCH-0));
printf("new tailOff = %u \n", tailOff);
} else {
headOff = headOff + DELTANEXTU16(chainTable, ipIndex - headOff);
printf("new headOff = %u \n", headOff);
} }
/* double chain (head & tail) */
printf("resulting headOff / tailOff = %u / %u \n", headOff, tailOff);
if (tailOff > maxOff) return longest;
if (tailOff != headOff) return longest;
printf("longest = %i \n", longest);
matchIndex = ipIndex - tailOff;
while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) {
nbAttempts--;
assert(matchIndex < ipIndex);
{ /* within current Prefix */
const BYTE* const matchPtr = base + matchIndex;
assert(matchPtr < ip);
assert(longest >= 1);
printf("matchIndex = %u \n", matchIndex);
if ( (matchPtr[longest] == ip[longest])
&& (LZ4_read32(matchPtr) == pattern) ) {
int const matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit);
if (matchLength > longest) {
printf("found better match of len %i at offset %u \n", matchLength, ipIndex - matchIndex);
longest = matchLength;
*matchpos = matchPtr;
} } }
/* update */
offset = ipIndex - matchIndex;
headOff = offset + DELTANEXTU16(chainTable, matchIndex);
tailOff = offset + DELTANEXTU16(chainTable, matchIndex + (U32)longest - (MINMATCH-0));
while ( (headOff <= maxOff) && (tailOff <= maxOff) && (headOff != tailOff) ) {
if (tailOff < headOff) {
tailOff = tailOff + DELTANEXTU16(chainTable, ipIndex - tailOff + (U32)longest - (MINMATCH-0));
} else {
headOff = headOff + DELTANEXTU16(chainTable, ipIndex - headOff);
} }
if (tailOff != headOff) break;
if (tailOff > maxOff) break;
matchIndex = ipIndex - tailOff;
}
#endif
return longest;
}
typedef struct {
int off;
int len;
@ -1306,7 +1419,8 @@ LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx,
/* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos),
* but this won't be the case here, as we define iLowLimit==ip,
* so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */
int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
//int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed);
int matchLength = LZ4HC_InsertAndGetLongerMatch(ctx, ip, iHighLimit, minLen, &matchPtr, nbSearches); (void)dict;
if (matchLength <= minLen) return match;
if (favorDecSpeed) {
if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */