Fix SkOpts::hash_fn slightly

We were folding together a string of 4 byte values as 8 byte values,
causing us to CRC in quite a few zeros. This appears to really poison
the algorithm, resulting in frequent hash collisions.

Change-Id: I1e363088d821d2fc0bbc392b78d1e24690fdc70a
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/432938
Commit-Queue: Herb Derby <herb@google.com>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Brian Osman 2021-07-26 12:04:33 -04:00 committed by SkCQ
parent 2df03e64bf
commit 0074706b80
2 changed files with 16 additions and 6 deletions

View File

@ -19,6 +19,7 @@
#if 1 && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
#include <immintrin.h>
static uint32_t crc32c_1(uint32_t seed, uint8_t v) { return _mm_crc32_u8(seed, v); }
static uint32_t crc32c_4(uint32_t seed, uint32_t v) { return _mm_crc32_u32(seed, v); }
static uint32_t crc32c_8(uint32_t seed, uint64_t v) {
#if 1 && (defined(__x86_64__) || defined(_M_X64))
return _mm_crc32_u64(seed, v);
@ -30,6 +31,7 @@
#elif 1 && defined(SK_ARM_HAS_CRC32)
#include <arm_acle.h>
static uint32_t crc32c_1(uint32_t seed, uint8_t v) { return __crc32cb(seed, v); }
static uint32_t crc32c_4(uint32_t seed, uint32_t v) { return __crc32cw(seed, v); }
static uint32_t crc32c_8(uint32_t seed, uint64_t v) { return __crc32cd(seed, v); }
#else
// See https://www.w3.org/TR/PNG/#D-CRCAppendix,
@ -87,6 +89,14 @@
return crc32c_table[(seed ^ v) & 0xff]
^ (seed >> 8);
}
static uint32_t crc32c_4(uint32_t seed, uint32_t v) {
// Nothing special... just crc32c_1() each byte.
for (int i = 0; i < 4; i++) {
seed = crc32c_1(seed, (uint8_t)v);
v >>= 8;
}
return seed;
}
static uint32_t crc32c_8(uint32_t seed, uint64_t v) {
// Nothing special... just crc32c_1() each byte.
for (int i = 0; i < 8; i++) {
@ -115,7 +125,7 @@ namespace SK_OPTS_NS {
ptr += 24;
len -= 24;
}
seed = crc32c_8(a, crc32c_8(b,c));
seed = crc32c_4(a, crc32c_4(b,c));
}
while (len >= 8) {
seed = crc32c_8(seed, sk_unaligned_load<uint64_t>(ptr));

View File

@ -82,9 +82,9 @@ DEF_TEST(ChecksumConsistent, r) {
REPORTER_ASSERT(r, SkOpts::hash(bytes, 1) == 0x00000000, "%08x", SkOpts::hash(bytes, 1));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 2) == 0xf26b8303, "%08x", SkOpts::hash(bytes, 2));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 7) == 0x18678721, "%08x", SkOpts::hash(bytes, 7));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 32) == 0x2d3617af, "%08x", SkOpts::hash(bytes, 32));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 63) == 0xd482f6b1, "%08x", SkOpts::hash(bytes, 63));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 64) == 0x2e5a06a9, "%08x", SkOpts::hash(bytes, 64));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 99) == 0x5214485b, "%08x", SkOpts::hash(bytes, 99));
REPORTER_ASSERT(r, SkOpts::hash(bytes,255) == 0xce206bd3, "%08x", SkOpts::hash(bytes,255));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 32) == 0x9d1ef96b, "%08x", SkOpts::hash(bytes, 32));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 63) == 0xc4b07d3a, "%08x", SkOpts::hash(bytes, 63));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 64) == 0x3535a461, "%08x", SkOpts::hash(bytes, 64));
REPORTER_ASSERT(r, SkOpts::hash(bytes, 99) == 0x3f98a130, "%08x", SkOpts::hash(bytes, 99));
REPORTER_ASSERT(r, SkOpts::hash(bytes,255) == 0x3b9ceab2, "%08x", SkOpts::hash(bytes,255));
}