diff --git a/BUILD.gn b/BUILD.gn index 76b04d59b6..ff3c492ffd 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -158,6 +158,13 @@ source_set("opts_sse41") { cflags = [ "-msse4.1" ] } +source_set("opts_sse42") { + configs += skia_library_configs + + sources = opts_gypi.sse42_sources + cflags = [ "-msse4.2" ] +} + source_set("opts_avx") { configs += skia_library_configs @@ -172,6 +179,7 @@ component("skia") { deps = [ ":opts_avx", ":opts_sse41", + ":opts_sse42", ":opts_ssse3", "//third_party/expat", "//third_party/giflib", diff --git a/bench/ChecksumBench.cpp b/bench/ChecksumBench.cpp index 4c2ac1a396..f84cd3d3bc 100644 --- a/bench/ChecksumBench.cpp +++ b/bench/ChecksumBench.cpp @@ -7,13 +7,14 @@ #include "Benchmark.h" #include "SkCanvas.h" #include "SkChecksum.h" +#include "SkOpts.h" #include "SkMD5.h" #include "SkRandom.h" #include "SkTemplates.h" enum ChecksumType { kMD5_ChecksumType, - kMurmur3_ChecksumType, + kHash_ChecksumType, }; class ComputeChecksumBench : public Benchmark { @@ -40,7 +41,7 @@ protected: const char* onGetName() override { switch (fType) { case kMD5_ChecksumType: return "compute_md5"; - case kMurmur3_ChecksumType: return "compute_murmur3"; + case kHash_ChecksumType: return "compute_hash"; default: SK_ABORT("Invalid Type"); return ""; } @@ -56,9 +57,9 @@ protected: md5.finish(digest); } } break; - case kMurmur3_ChecksumType: { + case kHash_ChecksumType: { for (int i = 0; i < loops; i++) { - volatile uint32_t result = SkChecksum::Murmur3(fData, sizeof(fData)); + volatile uint32_t result = SkOpts::hash(fData, sizeof(fData)); sk_ignore_unused_variable(result); } }break; @@ -73,4 +74,4 @@ private: /////////////////////////////////////////////////////////////////////////////// DEF_BENCH( return new ComputeChecksumBench(kMD5_ChecksumType); ) -DEF_BENCH( return new ComputeChecksumBench(kMurmur3_ChecksumType); ) +DEF_BENCH( return new ComputeChecksumBench(kHash_ChecksumType); ) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index b18510ebd4..fd1b98cc56 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -130,12 +130,14 @@ endif() # Certain files must be compiled with support for SSSE3, SSE4.1, AVX, or AVX2 intrinsics. file (GLOB_RECURSE ssse3_srcs ../src/*ssse3*.cpp ../src/*SSSE3*.cpp) -file (GLOB_RECURSE sse41_srcs ../src/*sse4*.cpp ../src/*SSE4*.cpp) +file (GLOB_RECURSE sse41_srcs ../src/*sse41*.cpp ../src/*SSE41*.cpp) +file (GLOB_RECURSE sse42_srcs ../src/*sse42*.cpp ../src/*SSE42*.cpp) file (GLOB_RECURSE avx_srcs ../src/*_avx.cpp) file (GLOB_RECURSE avx2_srcs ../src/*_avx2.cpp) if (NOT WIN32) set_source_files_properties(${ssse3_srcs} PROPERTIES COMPILE_FLAGS -mssse3) set_source_files_properties(${sse41_srcs} PROPERTIES COMPILE_FLAGS -msse4.1) + set_source_files_properties(${sse42_srcs} PROPERTIES COMPILE_FLAGS -msse4.2) set_source_files_properties(${avx_srcs} PROPERTIES COMPILE_FLAGS -mavx) set_source_files_properties(${avx2_srcs} PROPERTIES COMPILE_FLAGS -mavx2) endif() diff --git a/gyp/core.gypi b/gyp/core.gypi index 306c90f5bc..76ed86bc91 100644 --- a/gyp/core.gypi +++ b/gyp/core.gypi @@ -65,7 +65,6 @@ '<(skia_src_path)/core/SkCachedData.cpp', '<(skia_src_path)/core/SkCanvas.cpp', '<(skia_src_path)/core/SkCanvasPriv.h', - '<(skia_src_path)/core/SkChecksum.cpp', '<(skia_src_path)/core/SkChunkAlloc.cpp', '<(skia_src_path)/core/SkClipStack.cpp', '<(skia_src_path)/core/SkColor.cpp', diff --git a/gyp/opts.gypi b/gyp/opts.gypi index 6db9d0ced8..e89334d216 100644 --- a/gyp/opts.gypi +++ b/gyp/opts.gypi @@ -49,13 +49,13 @@ 'sse41_sources': [ '<(skia_src_path)/opts/SkOpts_sse41.cpp', ], + 'sse42_sources': [ + '<(skia_src_path)/opts/SkOpts_sse42.cpp', + ], 'avx_sources': [ '<(skia_src_path)/opts/SkOpts_avx.cpp', ], # These targets are empty, but XCode doesn't like that, so add an empty file to each. - 'sse42_sources': [ - '<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp', - ], 'avx2_sources': [ '<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp', ], diff --git a/include/private/SkChecksum.h b/include/private/SkChecksum.h index 6289a444ae..8a04c89ae7 100644 --- a/include/private/SkChecksum.h +++ b/include/private/SkChecksum.h @@ -12,6 +12,12 @@ #include "SkTLogic.h" #include "SkTypes.h" +// #include "SkOpts.h" +// It's sort of pesky to be able to include SkOpts.h here, so we'll just re-declare what we need. +namespace SkOpts { + extern uint32_t (*hash_fn)(const void*, size_t, uint32_t); +} + class SkChecksum : SkNoncopyable { public: /** @@ -41,17 +47,6 @@ public: hash ^= hash >> 16; return hash; } - - /** - * Calculate 32-bit Murmur hash (murmur3). - * See en.wikipedia.org/wiki/MurmurHash. - * - * @param data Memory address of the data block to be processed. - * @param size Size of the data block in bytes. - * @param seed Initial hash seed. (optional) - * @return hash result - */ - static uint32_t Murmur3(const void* data, size_t bytes, uint32_t seed=0); }; // SkGoodHash should usually be your first choice in hashing data. @@ -64,11 +59,11 @@ struct SkGoodHash { template SK_WHEN(sizeof(K) != 4, uint32_t) operator()(const K& k) const { - return SkChecksum::Murmur3(&k, sizeof(K)); + return SkOpts::hash_fn(&k, sizeof(K), 0); } uint32_t operator()(const SkString& k) const { - return SkChecksum::Murmur3(k.c_str(), k.size()); + return SkOpts::hash_fn(k.c_str(), k.size(), 0); } }; diff --git a/src/core/SkChecksum.cpp b/src/core/SkChecksum.cpp deleted file mode 100644 index 4457eb4f99..0000000000 --- a/src/core/SkChecksum.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkChecksum.h" - -uint32_t SkChecksum::Murmur3(const void* data, size_t bytes, uint32_t seed) { - // Use may_alias to remind the compiler we're intentionally violating strict aliasing, - // and so not to apply strict-aliasing-based optimizations. - typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t; - typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t; - - // Handle 4 bytes at a time while possible. - const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data; - const size_t words = bytes/4; - uint32_t hash = seed; - for (size_t i = 0; i < words; i++) { - uint32_t k = safe_data[i]; - k *= 0xcc9e2d51; - k = (k << 15) | (k >> 17); - k *= 0x1b873593; - - hash ^= k; - hash = (hash << 13) | (hash >> 19); - hash *= 5; - hash += 0xe6546b64; - } - - // Handle last 0-3 bytes. - const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words); - uint32_t k = 0; - switch (bytes & 3) { - case 3: k ^= safe_tail[2] << 16; - case 2: k ^= safe_tail[1] << 8; - case 1: k ^= safe_tail[0] << 0; - k *= 0xcc9e2d51; - k = (k << 15) | (k >> 17); - k *= 0x1b873593; - hash ^= k; - } - - hash ^= bytes; - return SkChecksum::Mix(hash); -} diff --git a/src/core/SkDescriptor.h b/src/core/SkDescriptor.h index 71f71336fd..efa02783be 100644 --- a/src/core/SkDescriptor.h +++ b/src/core/SkDescriptor.h @@ -9,7 +9,7 @@ #ifndef SkDescriptor_DEFINED #define SkDescriptor_DEFINED -#include "SkChecksum.h" +#include "SkOpts.h" #include "SkTypes.h" class SkDescriptor : SkNoncopyable { @@ -123,7 +123,7 @@ private: static uint32_t ComputeChecksum(const SkDescriptor* desc) { const uint32_t* ptr = (const uint32_t*)desc + 1; // skip the checksum field size_t len = desc->fLength - sizeof(uint32_t); - return SkChecksum::Murmur3(ptr, len); + return SkOpts::hash(ptr, len); } // private so no one can create one except our factories diff --git a/src/core/SkImageFilterCache.cpp b/src/core/SkImageFilterCache.cpp index ba8a32c889..c7104def37 100644 --- a/src/core/SkImageFilterCache.cpp +++ b/src/core/SkImageFilterCache.cpp @@ -7,9 +7,9 @@ #include "SkImageFilterCache.h" -#include "SkChecksum.h" #include "SkMutex.h" #include "SkOnce.h" +#include "SkOpts.h" #include "SkRefCnt.h" #include "SkSpecialImage.h" #include "SkTDynamicHash.h" @@ -47,7 +47,7 @@ public: return v.fKey; } static uint32_t Hash(const Key& key) { - return SkChecksum::Murmur3(reinterpret_cast(&key), sizeof(Key)); + return SkOpts::hash(reinterpret_cast(&key), sizeof(Key)); } SK_DECLARE_INTERNAL_LLIST_INTERFACE(Value); }; diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 5263fe46e0..a4da111630 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -26,6 +26,7 @@ #include "SkBlitMask_opts.h" #include "SkBlitRow_opts.h" #include "SkBlurImageFilter_opts.h" +#include "SkChecksum_opts.h" #include "SkColorCubeFilter_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkSwizzler_opts.h" @@ -70,12 +71,14 @@ namespace SkOpts { DEFINE_DEFAULT(inverted_CMYK_to_BGR1); DEFINE_DEFAULT(srcover_srgb_srgb); + + DEFINE_DEFAULT(hash_fn); #undef DEFINE_DEFAULT // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); void Init_sse41(); - void Init_sse42() {} + void Init_sse42(); void Init_avx(); void Init_avx2() {} diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 7c6cfb0dfb..44e337d950 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -65,6 +65,12 @@ namespace SkOpts { // Blend ndst src pixels over dst, where both src and dst point to sRGB pixels (RGBA or BGRA). // If nsrc < ndst, we loop over src to create a pattern. extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc); + + // The fastest high quality 32-bit hash we can provide on this platform. + extern uint32_t (*hash_fn)(const void*, size_t, uint32_t seed); + static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) { + return hash_fn(data, bytes, seed); + } } #endif//SkOpts_DEFINED diff --git a/src/core/SkPaint.cpp b/src/core/SkPaint.cpp index 07c10d5a78..44b2928c25 100644 --- a/src/core/SkPaint.cpp +++ b/src/core/SkPaint.cpp @@ -7,7 +7,6 @@ #include "SkPaint.h" #include "SkAutoKern.h" -#include "SkChecksum.h" #include "SkColorFilter.h" #include "SkData.h" #include "SkDraw.h" @@ -19,6 +18,7 @@ #include "SkMutex.h" #include "SkReadBuffer.h" #include "SkWriteBuffer.h" +#include "SkOpts.h" #include "SkPaintDefaults.h" #include "SkPathEffect.h" #include "SkRasterizer.h" @@ -2388,6 +2388,6 @@ uint32_t SkPaint::getHash() const { // so fBitfields should be 10 pointers and 6 32-bit values from the start. static_assert(offsetof(SkPaint, fBitfields) == 9 * sizeof(void*) + 6 * sizeof(uint32_t), "SkPaint_notPackedTightly"); - return SkChecksum::Murmur3(reinterpret_cast(this), - offsetof(SkPaint, fBitfields) + sizeof(fBitfields)); + return SkOpts::hash(reinterpret_cast(this), + offsetof(SkPaint, fBitfields) + sizeof(fBitfields)); } diff --git a/src/core/SkResourceCache.cpp b/src/core/SkResourceCache.cpp index e465132288..4bdc8dd9e0 100644 --- a/src/core/SkResourceCache.cpp +++ b/src/core/SkResourceCache.cpp @@ -5,10 +5,10 @@ * found in the LICENSE file. */ -#include "SkChecksum.h" #include "SkMessageBus.h" #include "SkMipMap.h" #include "SkMutex.h" +#include "SkOpts.h" #include "SkPixelRef.h" #include "SkResourceCache.h" #include "SkTraceMemoryDump.h" @@ -46,9 +46,9 @@ void SkResourceCache::Key::init(void* nameSpace, uint64_t sharedID, size_t dataS fSharedID_lo = (uint32_t)sharedID; fSharedID_hi = (uint32_t)(sharedID >> 32); fNamespace = nameSpace; - // skip unhashed fields when computing the murmur - fHash = SkChecksum::Murmur3(this->as32() + kUnhashedLocal32s, - (fCount32 - kUnhashedLocal32s) << 2); + // skip unhashed fields when computing the hash + fHash = SkOpts::hash(this->as32() + kUnhashedLocal32s, + (fCount32 - kUnhashedLocal32s) << 2); } #include "SkTDynamicHash.h" diff --git a/src/gpu/GrProgramDesc.h b/src/gpu/GrProgramDesc.h index ec6447d622..b17d146a8c 100644 --- a/src/gpu/GrProgramDesc.h +++ b/src/gpu/GrProgramDesc.h @@ -10,7 +10,8 @@ #include "GrColor.h" #include "GrTypesPriv.h" -#include "SkChecksum.h" +#include "SkOpts.h" +#include "SkTArray.h" /** This class describes a program to generate. It also serves as a program cache key. Very little of this is GL-specific. The GL-specific parts could be factored out into a subclass. */ @@ -112,7 +113,7 @@ protected: uint32_t* checksum = this->atOffset(); *checksum = 0; // We'll hash through these bytes, so make sure they're initialized. - *checksum = SkChecksum::Murmur3(fKey.begin(), keyLength); + *checksum = SkOpts::hash(fKey.begin(), keyLength); } // The key, stored in fKey, is composed of four parts: diff --git a/src/gpu/GrResourceCache.cpp b/src/gpu/GrResourceCache.cpp index 71e40f1a53..62360ed535 100644 --- a/src/gpu/GrResourceCache.cpp +++ b/src/gpu/GrResourceCache.cpp @@ -11,9 +11,9 @@ #include "GrCaps.h" #include "GrGpuResourceCacheAccess.h" #include "GrTracing.h" -#include "SkChecksum.h" #include "SkGr.h" #include "SkMessageBus.h" +#include "SkOpts.h" #include "SkTSort.h" DECLARE_SKMESSAGEBUS_MESSAGE(GrUniqueKeyInvalidatedMessage); @@ -43,7 +43,7 @@ GrUniqueKey::Domain GrUniqueKey::GenerateDomain() { } uint32_t GrResourceKeyHash(const uint32_t* data, size_t size) { - return SkChecksum::Murmur3(data, size); + return SkOpts::hash(data, size); } ////////////////////////////////////////////////////////////////////////////// @@ -687,7 +687,7 @@ void GrResourceCache::validate() const { SkASSERT(SkBudgeted::kNo == resource->resourcePriv().isBudgeted() || uniqueKey.isValid()); if (!uniqueKey.isValid()) { - ++fCouldBeScratch; + ++fCouldBeScratch; SkASSERT(fScratchMap->countForKey(scratchKey)); } SkASSERT(!resource->resourcePriv().refsWrappedObjects()); diff --git a/src/gpu/batches/GrAADistanceFieldPathRenderer.h b/src/gpu/batches/GrAADistanceFieldPathRenderer.h index db17b07d36..985b2f1537 100755 --- a/src/gpu/batches/GrAADistanceFieldPathRenderer.h +++ b/src/gpu/batches/GrAADistanceFieldPathRenderer.h @@ -13,7 +13,7 @@ #include "GrRect.h" #include "GrShape.h" -#include "SkChecksum.h" +#include "SkOpts.h" #include "SkTDynamicHash.h" class GrContext; @@ -81,7 +81,7 @@ private: } static inline uint32_t Hash(Key key) { - return SkChecksum::Murmur3(key.data(), sizeof(uint32_t) * key.count32()); + return SkOpts::hash(key.data(), sizeof(uint32_t) * key.count32()); } }; diff --git a/src/gpu/effects/GrTextureStripAtlas.h b/src/gpu/effects/GrTextureStripAtlas.h index 91ce61c232..5b90a342d7 100644 --- a/src/gpu/effects/GrTextureStripAtlas.h +++ b/src/gpu/effects/GrTextureStripAtlas.h @@ -9,7 +9,7 @@ #define GrTextureStripAtlas_DEFINED #include "SkBitmap.h" -#include "SkChecksum.h" +#include "SkOpts.h" #include "SkGr.h" #include "SkTDArray.h" #include "SkTDynamicHash.h" @@ -142,7 +142,7 @@ private: public: // for SkTDynamicHash static const Desc& GetKey(const AtlasEntry& entry) { return entry.fDesc; } - static uint32_t Hash(const Desc& desc) { return SkChecksum::Murmur3(&desc, sizeof(Desc)); } + static uint32_t Hash(const Desc& desc) { return SkOpts::hash(&desc, sizeof(Desc)); } // AtlasEntry proper AtlasEntry() : fAtlas(nullptr) {} diff --git a/src/gpu/text/GrAtlasTextBlob.h b/src/gpu/text/GrAtlasTextBlob.h index f76d02644f..afc11a9bd4 100644 --- a/src/gpu/text/GrAtlasTextBlob.h +++ b/src/gpu/text/GrAtlasTextBlob.h @@ -14,6 +14,7 @@ #include "GrMemoryPool.h" #include "SkDescriptor.h" #include "SkMaskFilter.h" +#include "SkOpts.h" #include "SkPathEffect.h" #include "SkRasterizer.h" #include "SkSurfaceProps.h" @@ -89,7 +90,7 @@ public: } static uint32_t Hash(const Key& key) { - return SkChecksum::Murmur3(&key, sizeof(Key)); + return SkOpts::hash(&key, sizeof(Key)); } void operator delete(void* p) { diff --git a/src/gpu/text/GrStencilAndCoverTextContext.h b/src/gpu/text/GrStencilAndCoverTextContext.h index 9b29719cb7..d4f1abd38a 100644 --- a/src/gpu/text/GrStencilAndCoverTextContext.h +++ b/src/gpu/text/GrStencilAndCoverTextContext.h @@ -11,6 +11,7 @@ #include "GrDrawContext.h" #include "GrStyle.h" #include "SkDrawFilter.h" +#include "SkOpts.h" #include "SkTextBlob.h" #include "SkTHash.h" #include "SkTInternalLList.h" @@ -120,7 +121,7 @@ private: static uint32_t Hash(const Key& key) { SkASSERT(key.count() > 1); // 1-length keys should be using the blob-id hash map. - return SkChecksum::Murmur3(key.begin(), sizeof(uint32_t) * key.count()); + return SkOpts::hash(key.begin(), sizeof(uint32_t) * key.count()); } TextBlob(uint32_t blobId, const SkTextBlob* skBlob, const SkPaint& skPaint) diff --git a/src/gpu/vk/GrVkPipelineStateCache.cpp b/src/gpu/vk/GrVkPipelineStateCache.cpp index 5e4013df14..d404a8d444 100644 --- a/src/gpu/vk/GrVkPipelineStateCache.cpp +++ b/src/gpu/vk/GrVkPipelineStateCache.cpp @@ -11,6 +11,7 @@ #include "GrProcessor.h" #include "GrVkPipelineState.h" #include "GrVkPipelineStateBuilder.h" +#include "SkOpts.h" #include "glsl/GrGLSLFragmentProcessor.h" #include "glsl/GrGLSLProgramDataManager.h" @@ -112,8 +113,8 @@ sk_sp GrVkResourceProvider::PipelineStateCache::refPipelineSt int keyLength = desc.fStateKey.count(); SkASSERT(0 == (keyLength % 4)); // Seed the checksum with the checksum of the programDesc then add the vulkan key to it. - desc.fChecksum = SkChecksum::Murmur3(desc.fStateKey.begin(), keyLength, - desc.fProgramDesc.getChecksum()); + desc.fChecksum = SkOpts::hash(desc.fStateKey.begin(), keyLength, + desc.fProgramDesc.getChecksum()); Entry* entry = nullptr; if (Entry** entryptr = fHashTable.find(desc)) { diff --git a/src/opts/SkChecksum_opts.h b/src/opts/SkChecksum_opts.h new file mode 100644 index 0000000000..346b16b3f5 --- /dev/null +++ b/src/opts/SkChecksum_opts.h @@ -0,0 +1,130 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkChecksum_opts_DEFINED +#define SkChecksum_opts_DEFINED + +#include "SkChecksum.h" +#include "SkTypes.h" + +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 + #include +#endif + +// TODO: ARMv8 has optional CRC instructions similar to SSE 4.2 +// TODO: 32-bit x86 version: same sort of idea using only _mm_crc32_u32() and smaller + +namespace SK_OPTS_NS { + +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE42 && (defined(__x86_64__) || defined(_M_X64)) + template + static inline T unaligned_load(const uint8_t* src) { + T val; + memcpy(&val, src, sizeof(val)); + return val; + } + + static uint32_t hash_fn(const void* vdata, size_t bytes, uint32_t seed) { + auto data = (const uint8_t*)vdata; + + // _mm_crc32_u64() operates on 64-bit registers, so we use uint64_t for a while. + uint64_t hash = seed; + if (bytes >= 24) { + // We'll create 3 independent hashes, each using _mm_crc32_u64() + // to hash 8 bytes per step. Both 3 and independent are important: + // we can execute 3 of these instructions in parallel on a single core. + uint64_t a = hash, + b = hash, + c = hash; + size_t steps = bytes/24; + while (steps --> 0) { + a = _mm_crc32_u64(a, unaligned_load(data+ 0)); + b = _mm_crc32_u64(b, unaligned_load(data+ 8)); + c = _mm_crc32_u64(c, unaligned_load(data+16)); + data += 24; + } + bytes %= 24; + hash = a^b^c; + } + + SkASSERT(bytes < 24); + if (bytes >= 16) { + hash = _mm_crc32_u64(hash, unaligned_load(data)); + bytes -= 8; + data += 8; + } + + SkASSERT(bytes < 16); + if (bytes & 8) { + hash = _mm_crc32_u64(hash, unaligned_load(data)); + data += 8; + } + + // The remainder of these _mm_crc32_u*() operate on a 32-bit register. + // We don't lose anything here: only the bottom 32-bits were populated. + auto hash32 = (uint32_t)hash; + + if (bytes & 4) { + hash32 = _mm_crc32_u32(hash32, unaligned_load(data)); + data += 4; + } + if (bytes & 2) { + hash32 = _mm_crc32_u16(hash32, unaligned_load(data)); + data += 2; + } + if (bytes & 1) { + hash32 = _mm_crc32_u8(hash32, unaligned_load(data)); + } + return hash32; + } + +#else + static uint32_t hash_fn(const void* data, size_t bytes, uint32_t seed) { + // This is Murmur3. + + // Use may_alias to remind the compiler we're intentionally violating strict aliasing, + // and so not to apply strict-aliasing-based optimizations. + typedef uint32_t SK_ATTRIBUTE(may_alias) aliased_uint32_t; + typedef uint8_t SK_ATTRIBUTE(may_alias) aliased_uint8_t; + + // Handle 4 bytes at a time while possible. + const aliased_uint32_t* safe_data = (const aliased_uint32_t*)data; + const size_t words = bytes/4; + uint32_t hash = seed; + for (size_t i = 0; i < words; i++) { + uint32_t k = safe_data[i]; + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + k *= 0x1b873593; + + hash ^= k; + hash = (hash << 13) | (hash >> 19); + hash *= 5; + hash += 0xe6546b64; + } + + // Handle last 0-3 bytes. + const aliased_uint8_t* safe_tail = (const uint8_t*)(safe_data + words); + uint32_t k = 0; + switch (bytes & 3) { + case 3: k ^= safe_tail[2] << 16; + case 2: k ^= safe_tail[1] << 8; + case 1: k ^= safe_tail[0] << 0; + k *= 0xcc9e2d51; + k = (k << 15) | (k >> 17); + k *= 0x1b873593; + hash ^= k; + } + + hash ^= bytes; + return SkChecksum::Mix(hash); + } +#endif + +} // namespace SK_OPTS_NS + +#endif//SkChecksum_opts_DEFINED diff --git a/src/opts/SkOpts_sse42.cpp b/src/opts/SkOpts_sse42.cpp new file mode 100644 index 0000000000..1883182192 --- /dev/null +++ b/src/opts/SkOpts_sse42.cpp @@ -0,0 +1,18 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkOpts.h" + +#define SK_OPTS_NS sse42 +#include "SkChecksum_opts.h" + +namespace SkOpts { + void Init_sse42() { + hash_fn = sse42::hash_fn; + } +} + diff --git a/src/pdf/SkPDFGraphicState.h b/src/pdf/SkPDFGraphicState.h index 84491ba0d3..49723bd723 100644 --- a/src/pdf/SkPDFGraphicState.h +++ b/src/pdf/SkPDFGraphicState.h @@ -10,7 +10,7 @@ #define SkPDFGraphicState_DEFINED #include "SkPDFTypes.h" -#include "SkChecksum.h" +#include "SkOpts.h" class SkPaint; class SkPDFCanon; @@ -63,7 +63,7 @@ public: bool operator==(const SkPDFGraphicState& rhs) const { return 0 == memcmp(&fStrokeWidth, &rhs.fStrokeWidth, 12); } - uint32_t hash() const { return SkChecksum::Murmur3(&fStrokeWidth, 12); } + uint32_t hash() const { return SkOpts::hash(&fStrokeWidth, 12); } private: const SkScalar fStrokeWidth; diff --git a/src/utils/SkWhitelistTypefaces.cpp b/src/utils/SkWhitelistTypefaces.cpp index ac82f7c6cb..139e697ca0 100644 --- a/src/utils/SkWhitelistTypefaces.cpp +++ b/src/utils/SkWhitelistTypefaces.cpp @@ -5,8 +5,8 @@ * found in the LICENSE file. */ -#include "SkChecksum.h" #include "SkFontDescriptor.h" +#include "SkOpts.h" #include "SkStream.h" #include "SkString.h" #include "SkTypeface.h" @@ -80,7 +80,7 @@ static uint32_t compute_checksum(const SkTypeface* tf) { if (!fontStream->peek(data.begin(), length)) { return 0; } - return SkChecksum::Murmur3(data.begin(), length); + return SkOpts::hash(data.begin(), length); } static void serialize_sub(const char* fontName, SkFontStyle style, SkWStream* wstream) { diff --git a/tests/ChecksumTest.cpp b/tests/ChecksumTest.cpp index cf9d65c59e..2aaf2688b7 100644 --- a/tests/ChecksumTest.cpp +++ b/tests/ChecksumTest.cpp @@ -5,21 +5,11 @@ * found in the LICENSE file. */ -#include "SkChecksum.h" +#include "SkOpts.h" #include "SkRandom.h" #include "Test.h" - -// Murmur3 has an optional third seed argument, so we wrap it to fit a uniform type. -static uint32_t murmur_noseed(const uint32_t* d, size_t l) { return SkChecksum::Murmur3(d, l); } - -#define ASSERT(x) REPORTER_ASSERT(r, x) - DEF_TEST(Checksum, r) { - // Algorithms to test. They're currently all uint32_t(const uint32_t*, size_t). - typedef uint32_t(*algorithmProc)(const uint32_t*, size_t); - const algorithmProc kAlgorithms[] = { &murmur_noseed }; - // Put 128 random bytes into two identical buffers. Any multiple of 4 will do. const size_t kBytes = SkAlign4(128); SkRandom rand; @@ -28,38 +18,26 @@ DEF_TEST(Checksum, r) { data[i] = tweaked[i] = rand.nextU(); } - // Test each algorithm. - for (size_t i = 0; i < SK_ARRAY_COUNT(kAlgorithms); ++i) { - const algorithmProc algorithm = kAlgorithms[i]; + // Hash of nullptr is always 0. + REPORTER_ASSERT(r, SkOpts::hash(nullptr, 0) == 0); - // Hash of nullptr is always 0. - ASSERT(algorithm(nullptr, 0) == 0); + const uint32_t hash = SkOpts::hash(data, kBytes); + // Should be deterministic. + REPORTER_ASSERT(r, hash == SkOpts::hash(data, kBytes)); - const uint32_t hash = algorithm(data, kBytes); - // Should be deterministic. - ASSERT(hash == algorithm(data, kBytes)); - - // Changing any single element should change the hash. - for (size_t j = 0; j < SK_ARRAY_COUNT(tweaked); ++j) { - const uint32_t saved = tweaked[j]; - tweaked[j] = rand.nextU(); - const uint32_t tweakedHash = algorithm(tweaked, kBytes); - ASSERT(tweakedHash != hash); - ASSERT(tweakedHash == algorithm(tweaked, kBytes)); - tweaked[j] = saved; - } + // Changing any single element should change the hash. + for (size_t j = 0; j < SK_ARRAY_COUNT(tweaked); ++j) { + const uint32_t saved = tweaked[j]; + tweaked[j] = rand.nextU(); + const uint32_t tweakedHash = SkOpts::hash(tweaked, kBytes); + REPORTER_ASSERT(r, tweakedHash != hash); + REPORTER_ASSERT(r, tweakedHash == SkOpts::hash(tweaked, kBytes)); + tweaked[j] = saved; } } DEF_TEST(GoodHash, r) { - ASSERT(SkGoodHash()(( int32_t)4) == 614249093); // 4 bytes. Hits SkChecksum::Mix fast path. - ASSERT(SkGoodHash()((uint32_t)4) == 614249093); // (Ditto) - - // None of these are 4 byte sized, so they use SkChecksum::Murmur3, not SkChecksum::Mix. - ASSERT(SkGoodHash()((uint64_t)4) == 3491892518); - ASSERT(SkGoodHash()((uint16_t)4) == 899251846); - ASSERT(SkGoodHash()( (uint8_t)4) == 962700458); - - // Tests SkString is correctly specialized. - ASSERT(SkGoodHash()(SkString("Hi")) == 55667557); + // 4 bytes --> hits SkChecksum::Mix fast path. + REPORTER_ASSERT(r, SkGoodHash()(( int32_t)4) == 614249093); + REPORTER_ASSERT(r, SkGoodHash()((uint32_t)4) == 614249093); } diff --git a/tools/UrlDataManager.h b/tools/UrlDataManager.h index 49646ca607..bae1dc7047 100644 --- a/tools/UrlDataManager.h +++ b/tools/UrlDataManager.h @@ -8,8 +8,8 @@ #ifndef SkUrlDataManager_DEFINED #define SkUrlDataManager_DEFINED -#include "SkChecksum.h" #include "SkData.h" +#include "SkOpts.h" #include "SkString.h" #include "SkTDynamicHash.h" @@ -52,7 +52,7 @@ private: } static uint32_t Hash(const SkData& key) { - return SkChecksum::Murmur3(key.bytes(), key.size()); + return SkOpts::hash(key.bytes(), key.size()); } }; @@ -62,7 +62,7 @@ private: } static uint32_t Hash(const SkString& key) { - return SkChecksum::Murmur3(key.c_str(), strlen(key.c_str())); + return SkOpts::hash(key.c_str(), strlen(key.c_str())); } };