Use ARMv8 CRC32 instructions for SkOpts::hash().
For large inputs, this runs ~11x faster than Murmur3. My bench drops from 1µs to 88ns. Like x86-64, this runs fastest if we work in 24 byte chunks. 16 byte chunks run at about 0.75x this speed, 8 byte chunks at about 0.4x (which would still be about 5x faster than Murmur3). This'll require plumbing support for opts_crc32 into Chrome first before it can roll. perf.skia.org charts we want to watch: https://perf.skia.org/#5490 Seach for compute_hash in these logs to see the difference: baseline: https://luci-milo.appspot.com/swarming/task/30ba22f3dfe30e10/steps/nanobench/0/stdout trybot: https://luci-milo.appspot.com/swarming/task/30bbc406cbf62d10/steps/nanobench/0/stdout BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2260823002 CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2260823002
This commit is contained in:
parent
dd3259eb95
commit
78559a78f9
@ -30,7 +30,7 @@
|
||||
'<(skia_src_path)/opts/SkBlitRow_opts_arm_neon.cpp',
|
||||
],
|
||||
'crc32_sources': [
|
||||
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
|
||||
'<(skia_src_path)/opts/SkOpts_crc32.cpp',
|
||||
],
|
||||
|
||||
'mips_dsp_sources': [
|
||||
|
@ -81,15 +81,22 @@ namespace SkOpts {
|
||||
void Init_sse42();
|
||||
void Init_avx();
|
||||
void Init_avx2() {}
|
||||
void Init_crc32();
|
||||
|
||||
static void init() {
|
||||
#if defined(SK_CPU_X86) && !defined(SK_BUILD_NO_OPTS)
|
||||
#if !defined(SK_BUILD_NO_OPTS)
|
||||
#if defined(SK_CPU_X86)
|
||||
if (SkCpu::Supports(SkCpu::SSSE3)) { Init_ssse3(); }
|
||||
if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
|
||||
if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
|
||||
if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); }
|
||||
if (SkCpu::Supports(SkCpu::AVX2 )) { Init_avx2(); }
|
||||
|
||||
#elif defined(SK_CPU_ARM64)
|
||||
if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); }
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void Init() {
|
||||
|
@ -13,10 +13,10 @@
|
||||
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42
|
||||
#include <immintrin.h>
|
||||
#elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32)
|
||||
#include <arm_acle.h>
|
||||
#endif
|
||||
|
||||
// TODO: ARMv8 has optional CRC instructions similar to SSE 4.2
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
template <typename T>
|
||||
@ -127,6 +127,50 @@ static inline T unaligned_load(const uint8_t* src) {
|
||||
return hash;
|
||||
}
|
||||
|
||||
#elif defined(SK_CPU_ARM64) && defined(__ARM_FEATURE_CRC32)
|
||||
static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
|
||||
auto data = (const uint8_t*)vdata;
|
||||
if (bytes >= 24) {
|
||||
uint32_t a = hash,
|
||||
b = hash,
|
||||
c = hash;
|
||||
size_t steps = bytes/24;
|
||||
while (steps --> 0) {
|
||||
a = __crc32d(a, unaligned_load<uint64_t>(data+ 0));
|
||||
b = __crc32d(b, unaligned_load<uint64_t>(data+ 8));
|
||||
c = __crc32d(c, unaligned_load<uint64_t>(data+16));
|
||||
data += 24;
|
||||
}
|
||||
bytes %= 24;
|
||||
hash = a^b^c;
|
||||
}
|
||||
|
||||
SkASSERT(bytes < 24);
|
||||
if (bytes >= 16) {
|
||||
hash = __crc32d(hash, unaligned_load<uint64_t>(data));
|
||||
bytes -= 8;
|
||||
data += 8;
|
||||
}
|
||||
|
||||
SkASSERT(bytes < 16);
|
||||
if (bytes & 8) {
|
||||
hash = __crc32d(hash, unaligned_load<uint64_t>(data));
|
||||
data += 8;
|
||||
}
|
||||
if (bytes & 4) {
|
||||
hash = __crc32w(hash, unaligned_load<uint32_t>(data));
|
||||
data += 4;
|
||||
}
|
||||
if (bytes & 2) {
|
||||
hash = __crc32h(hash, unaligned_load<uint16_t>(data));
|
||||
data += 2;
|
||||
}
|
||||
if (bytes & 1) {
|
||||
hash = __crc32b(hash, unaligned_load<uint8_t>(data));
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
#else
|
||||
// This is Murmur3.
|
||||
static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t hash) {
|
||||
|
17
src/opts/SkOpts_crc32.cpp
Normal file
17
src/opts/SkOpts_crc32.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkOpts.h"
|
||||
|
||||
#define SK_OPTS_NS crc32
|
||||
#include "SkChecksum_opts.h"
|
||||
|
||||
namespace SkOpts {
|
||||
void Init_crc32() {
|
||||
hash_fn = crc32::hash_fn;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user