Revert "restore murmur3 for older iOS devices"
This reverts commit 3c161467f0
.
Reason for revert: need to keep iOS simulator (Build-Mac-Clang-x64-Release-iOS) in mind
Original change's description:
> restore murmur3 for older iOS devices
>
> For reference, the relative costs are roughly,
> - our hash with CRC32c instructions 1x
> - Murmur3 11x
> - our hash with CRC32c fallback 23x
>
> So this should be a ~2x speedup for those
> older iOS devices not using an arm64e slice.
>
> Bug: skia:11001
> Cq-Include-Trybots: luci.skia.skia.primary:Test-iOS-Clang-iPadPro-GPU-PowerVRGT7800-arm64-Debug-All,Test-iOS-Clang-iPhone11-GPU-AppleA13-arm64-Debug-All,Test-iOS-Clang-iPhone6-GPU-PowerVRGX6450-arm64-Debug-All,Test-iOS-Clang-iPhone7-GPU-PowerVRGT7600-arm64-Debug-All,Test-iOS-Clang-iPhone8-GPU-AppleA11-arm64-Debug-All
> Change-Id: Ib56195ddc0c522380d263d56e767331d9f635728
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/340178
> Commit-Queue: Mike Klein <mtklein@google.com>
> Reviewed-by: Brian Osman <brianosman@google.com>
TBR=mtklein@google.com,brianosman@google.com
Change-Id: I0e3b23e63c33910e482031d7475feb624bd6e1f6
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:11001
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/340396
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
469d67e7d9
commit
5334e8976d
@ -12,22 +12,11 @@
|
||||
#include "include/private/SkChecksum.h"
|
||||
#include "src/core/SkUtils.h" // sk_unaligned_load
|
||||
|
||||
// hash_fn() delivers consistent results on a given machine no matter the CPU features we detect,
|
||||
// but it does not guarantee consistent results across different machines.
|
||||
//
|
||||
// Most modern CPUs have hardware CRC32c instructions, and we've designed hash_fn() to favor the
|
||||
// case where we can detect those instructions, sometimes at compile time, sometimes at runtime.
|
||||
// If we don't detect those CRC32c instructions (ARMv8.0, ≤SSE4.2) then we use a software fallback
|
||||
// to emulate those instructions, guaranteeing identical results.
|
||||
//
|
||||
// iOS has made runtime CPU feature detection impossible, and older iOS devices don't support
|
||||
// CRC32c instructions, so we won't see support at compile time. In this case we don't bother
|
||||
// with CRC32c or fallback at all there, instead using Murmur3 which ends up faster overall.
|
||||
// iOS builds that include an `-arch arm64e` slice and that run on a device that supports it
|
||||
// (iPhone XS and up) will detect CRC32c support at compile time in that slice and use the
|
||||
// previous paragraph's faster approach.
|
||||
// This function is designed primarily to deliver consistent results no matter the platform,
|
||||
// but then also is optimized for speed on modern machines with CRC32c instructions.
|
||||
// (ARM supports both CRC32 and CRC32c, but Intel only CRC32c, so we use CRC32c.)
|
||||
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
|
||||
#if 1 && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
|
||||
#include <immintrin.h>
|
||||
static uint32_t crc32c_1(uint32_t seed, uint8_t v) { return _mm_crc32_u8(seed, v); }
|
||||
static uint32_t crc32c_8(uint32_t seed, uint64_t v) {
|
||||
@ -38,11 +27,11 @@
|
||||
return _mm_crc32_u32(seed, (uint32_t)(v >> 32));
|
||||
#endif
|
||||
}
|
||||
#elif defined(SK_ARM_HAS_CRC32)
|
||||
#elif 1 && defined(SK_ARM_HAS_CRC32)
|
||||
#include <arm_acle.h>
|
||||
static uint32_t crc32c_1(uint32_t seed, uint8_t v) { return __crc32cb(seed, v); }
|
||||
static uint32_t crc32c_8(uint32_t seed, uint64_t v) { return __crc32cd(seed, v); }
|
||||
#elif !defined(SK_BUILD_FOR_IOS)
|
||||
#else
|
||||
// See https://www.w3.org/TR/PNG/#D-CRCAppendix,
|
||||
// but this is CRC32c, so built with 0x82f63b78, not 0xedb88320 like you'll see there.
|
||||
#if 0
|
||||
@ -110,46 +99,6 @@
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
#if defined(SK_BUILD_FOR_IOS) && !defined(SK_ARM_HAS_CRC32)
|
||||
|
||||
// This is Murmur3, our iOS-without-CRC32 approach mentioned in the third paragraph at the top.
|
||||
inline uint32_t hash_fn(const void* data, size_t len, uint32_t seed) {
|
||||
auto ptr = (const uint8_t*)data;
|
||||
const size_t original_len = len;
|
||||
|
||||
while (len >= 4) {
|
||||
uint32_t k = sk_unaligned_load<uint32_t>(ptr);
|
||||
k *= 0xcc9e2d51;
|
||||
k = (k << 15) | (k >> 17);
|
||||
k *= 0x1b873593;
|
||||
|
||||
seed ^= k;
|
||||
seed = (seed << 13) | (seed >> 19);
|
||||
seed *= 5;
|
||||
seed += 0xe6546b64;
|
||||
|
||||
len -= 4;
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
uint32_t k = 0;
|
||||
switch (len & 3) {
|
||||
case 3: k ^= ptr[2] << 16; [[fallthrough]];
|
||||
case 2: k ^= ptr[1] << 8; [[fallthrough]];
|
||||
case 1: k ^= ptr[0] << 0;
|
||||
k *= 0xcc9e2d51;
|
||||
k = (k << 15) | (k >> 17);
|
||||
k *= 0x1b873593;
|
||||
seed ^= k;
|
||||
}
|
||||
|
||||
seed ^= original_len;
|
||||
return SkChecksum::Mix(seed);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// This is our main fast CRC32c approach.
|
||||
inline uint32_t hash_fn(const void* data, size_t len, uint32_t seed) {
|
||||
auto ptr = (const uint8_t*)data;
|
||||
|
||||
@ -181,8 +130,6 @@ namespace SK_OPTS_NS {
|
||||
return seed;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace SK_OPTS_NS
|
||||
|
||||
#endif//SkChecksum_opts_DEFINED
|
||||
|
@ -73,25 +73,11 @@ DEF_TEST(ChecksumCollisions, r) {
|
||||
}
|
||||
|
||||
DEF_TEST(ChecksumConsistent, r) {
|
||||
// We've decided to make SkOpts::hash() always return consistent results on a given machine,
|
||||
// so spot check a few:
|
||||
// We've decided to make SkOpts::hash() always return consistent results, so spot check a few:
|
||||
uint8_t bytes[256];
|
||||
for (int i = 0; i < 256; i++) {
|
||||
bytes[i] = i;
|
||||
}
|
||||
#if defined(SK_BUILD_FOR_IOS) && !defined(SK_ARM_HAS_CRC32)
|
||||
// Test Murmur3.
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 0) == 0x00000000, "%08x", SkOpts::hash(bytes, 0));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 1) == 0x514e28b7, "%08x", SkOpts::hash(bytes, 1));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 2) == 0x70e1a2c0, "%08x", SkOpts::hash(bytes, 2));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 7) == 0x8d7e4914, "%08x", SkOpts::hash(bytes, 7));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 32) == 0xcac37638, "%08x", SkOpts::hash(bytes, 32));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 63) == 0x14acd341, "%08x", SkOpts::hash(bytes, 63));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 64) == 0x894ea70b, "%08x", SkOpts::hash(bytes, 64));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 99) == 0xb183f8ad, "%08x", SkOpts::hash(bytes, 99));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes,255) == 0x6334b600, "%08x", SkOpts::hash(bytes,255));
|
||||
#else
|
||||
// Test our custom CRC32c-using hash.
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 0) == 0x00000000, "%08x", SkOpts::hash(bytes, 0));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 1) == 0x00000000, "%08x", SkOpts::hash(bytes, 1));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 2) == 0xf26b8303, "%08x", SkOpts::hash(bytes, 2));
|
||||
@ -101,5 +87,4 @@ DEF_TEST(ChecksumConsistent, r) {
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 64) == 0x2e5a06a9, "%08x", SkOpts::hash(bytes, 64));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes, 99) == 0x5214485b, "%08x", SkOpts::hash(bytes, 99));
|
||||
REPORTER_ASSERT(r, SkOpts::hash(bytes,255) == 0xce206bd3, "%08x", SkOpts::hash(bytes,255));
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user