Replace SkConsistentChecksum with SkCityHash (now including CityHash via DEPS)

Alternative to https://codereview.appspot.com/6847087/ ('Change SkConsistentChecksum to use SuperFastHash')
Review URL: https://codereview.appspot.com/6867060

git-svn-id: http://skia.googlecode.com/svn/trunk@6701 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
epoger@google.com 2012-12-07 15:12:01 +00:00
parent d21444aab7
commit 0bba6bd78a
8 changed files with 166 additions and 144 deletions

1
DEPS
View File

@ -9,6 +9,7 @@ use_relative_paths = True
# #
deps = { deps = {
"third_party/externals/angle" : "http://angleproject.googlecode.com/svn/trunk@1268", "third_party/externals/angle" : "http://angleproject.googlecode.com/svn/trunk@1268",
"third_party/externals/cityhash" : "http://cityhash.googlecode.com/svn/trunk@11",
"third_party/externals/freetype" : "https://android.googlesource.com/platform/external/freetype.git", "third_party/externals/freetype" : "https://android.googlesource.com/platform/external/freetype.git",
"third_party/externals/gyp" : "http://gyp.googlecode.com/svn/trunk@1517", "third_party/externals/gyp" : "http://gyp.googlecode.com/svn/trunk@1517",
"third_party/externals/libjpeg" : "http://src.chromium.org/svn/trunk/src/third_party/libjpeg@125399", "third_party/externals/libjpeg" : "http://src.chromium.org/svn/trunk/src/third_party/libjpeg@125399",

View File

@ -5,6 +5,9 @@
'product_name': 'skia_utils', 'product_name': 'skia_utils',
'type': 'static_library', 'type': 'static_library',
'standalone_static_library': 1, 'standalone_static_library': 1,
'dependencies': [
'cityhash',
],
'include_dirs': [ 'include_dirs': [
'../include/config', '../include/config',
'../include/core', '../include/core',
@ -23,6 +26,7 @@
'../include/utils/SkCountdown.h', '../include/utils/SkCountdown.h',
'../include/utils/SkRunnable.h', '../include/utils/SkRunnable.h',
'../include/utils/SkThreadPool.h', '../include/utils/SkThreadPool.h',
'../src/utils/SkCityHash.cpp',
'../src/utils/SkCondVar.cpp', '../src/utils/SkCondVar.cpp',
'../src/utils/SkCountdown.cpp', '../src/utils/SkCountdown.cpp',
'../src/utils/SkThreadPool.cpp', '../src/utils/SkThreadPool.cpp',
@ -193,6 +197,25 @@
], ],
}, },
}, },
{
'target_name': 'cityhash',
'type': 'static_library',
'standalone_static_library': 1,
'include_dirs': [
'../include/config',
'../include/core',
'../src/utils/cityhash',
'../third_party/externals/cityhash/src',
],
'sources': [
'../third_party/externals/cityhash/src/city.cc',
],
'direct_dependent_settings': {
'include_dirs': [
'../third_party/externals/cityhash/src',
],
},
},
], ],
} }

23
src/utils/SkCityHash.cpp Normal file
View File

@ -0,0 +1,23 @@
/*
* Copyright 2012 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/**
* Pass any calls through to the CityHash library.
* This is the only source file that accesses the CityHash code directly.
*/
#include "SkCityHash.h"
#include "SkTypes.h"
#include "city.h"
uint32_t SkCityHash::Compute32(const char *data, size_t size) {
return CityHash32(data, size);
}
uint64_t SkCityHash::Compute64(const char *data, size_t size) {
return CityHash64(data, size);
}

47
src/utils/SkCityHash.h Normal file
View File

@ -0,0 +1,47 @@
/*
* Copyright 2012 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/**
* Hash functions, using the CityHash algorithm.
*
* Results are guaranteed to be:
* 1. consistent across revisions of the library (for a given set
* of bytes, the checksum generated at one revision of the Skia
* library will match the one generated on any other revision of
* the Skia library)
* 2. consistent across platforms (for a given
* set of bytes, the checksum generated on one platform will
* match the one generated on any other platform)
*/
#ifndef SkCityHash_DEFINED
#define SkCityHash_DEFINED
#include "SkTypes.h"
class SkCityHash : SkNoncopyable {
public:
/**
* Compute a 32-bit checksum for a given data block.
*
* @param data Memory address of the data block to be processed.
* @param size Size of the data block in bytes.
* @return checksum result
*/
static uint32_t Compute32(const char *data, size_t size);
/**
* Compute a 64-bit checksum for a given data block.
*
* @param data Memory address of the data block to be processed.
* @param size Size of the data block in bytes.
* @return checksum result
*/
static uint64_t Compute64(const char *data, size_t size);
};
#endif

View File

@ -1,95 +0,0 @@
/*
* Copyright 2012 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkConsistentChecksum_DEFINED
#define SkConsistentChecksum_DEFINED
#include "SkTypes.h"
class SkConsistentChecksum : SkNoncopyable {
private:
/*
* Our Rotate and Mash helpers are meant to automatically do the right
* thing depending if sizeof(uintptr_t) is 4 or 8.
*/
enum {
ROTR = 17,
ROTL = sizeof(uintptr_t) * 8 - ROTR,
HALFBITS = sizeof(uintptr_t) * 4
};
static inline uintptr_t Mash(uintptr_t total, uintptr_t value) {
return ((total >> ROTR) | (total << ROTL)) ^ value;
}
public:
/**
* Compute a 32-bit checksum for a given data block
*
* WARNING: As of 1 Nov 2012, this algorithm is still in
* flux... but once we get it doing what we want, it will be:
* 1. consistent across revisions of the library (for a given set
* of bytes, the checksum generated at one revision of the Skia
* library will match the one generated on any other revision of
* the Skia library)
* 2. consistent across platforms (for a given
* set of bytes, the checksum generated on one platform will
* match the one generated on any other platform)
*
* @param data Memory address of the data block to be processed. Must be
* 32-bit aligned.
* @param size Size of the data block in bytes. Must be a multiple of 4.
* @return checksum result
*/
static uint32_t Compute(const uint32_t* data, size_t size) {
SkASSERT(SkIsAlign4(size));
/*
* We want to let the compiler use 32bit or 64bit addressing and math
* so we use uintptr_t as our magic type. This makes the code a little
* more obscure (we can't hard-code 32 or 64 anywhere, but have to use
* sizeof()).
*/
uintptr_t result = 0;
const uintptr_t* ptr = reinterpret_cast<const uintptr_t*>(data);
/*
* count the number of quad element chunks. This takes into account
* if we're on a 32bit or 64bit arch, since we use sizeof(uintptr_t)
* to compute how much to shift-down the size.
*/
size_t n4 = size / (sizeof(uintptr_t) << 2);
for (size_t i = 0; i < n4; ++i) {
result = Mash(result, *ptr++);
result = Mash(result, *ptr++);
result = Mash(result, *ptr++);
result = Mash(result, *ptr++);
}
size &= ((sizeof(uintptr_t) << 2) - 1);
data = reinterpret_cast<const uint32_t*>(ptr);
const uint32_t* stop = data + (size >> 2);
while (data < stop) {
result = Mash(result, *data++);
}
/*
* smash us down to 32bits if we were 64. Note that when uintptr_t is
* 32bits, this code-path should go away, but I still got a warning
* when I wrote
* result ^= result >> 32;
* since >>32 is undefined for 32bit ints, hence the wacky HALFBITS
* define.
*/
if (8 == sizeof(result)) {
result ^= result >> HALFBITS;
}
return static_cast<uint32_t>(result);
}
};
#endif

View File

@ -0,0 +1,2 @@
This directory contains files needed to build third_party/externals/cityhash
(such as the config.h file that would normally be created by autoconf)

View File

@ -0,0 +1,17 @@
/*
* Copyright 2012 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
/**
* Converts from Skia build flags to the macro definitions cityhash normally
* gets from autoconf.
*/
#include "SkTypes.h"
#ifdef SK_CPU_BENDIAN
#define WORDS_BIGENDIAN 1
#endif

View File

@ -7,7 +7,10 @@
*/ */
#include "Test.h" #include "Test.h"
#include "SkChecksum.h" #include "SkChecksum.h"
#include "SkConsistentChecksum.h" #include "SkCityHash.h"
// Word size that is large enough to hold results of any checksum type.
typedef uint64_t checksum_result;
namespace skiatest { namespace skiatest {
class ChecksumTestClass : public Test { class ChecksumTestClass : public Test {
@ -22,26 +25,25 @@ namespace skiatest {
private: private:
enum Algorithm { enum Algorithm {
kSkChecksum, kSkChecksum,
kSkConsistentChecksum kSkCityHash32,
kSkCityHash64
}; };
// Call Compute(data, size) on the appropriate checksum algorithm, // Call Compute(data, size) on the appropriate checksum algorithm,
// depending on this->fWhichAlgorithm. // depending on this->fWhichAlgorithm.
uint32_t ComputeChecksum(uint32_t* data, size_t size) { checksum_result ComputeChecksum(const char *data, size_t size) {
// Our checksum algorithms require 32-bit aligned data. switch(fWhichAlgorithm) {
// If either of these tests fail, then the algorithm case kSkChecksum:
// doesn't have a chance.
REPORTER_ASSERT_MESSAGE(fReporter, REPORTER_ASSERT_MESSAGE(fReporter,
reinterpret_cast<uintptr_t>(data) % 4 == 0, reinterpret_cast<uintptr_t>(data) % 4 == 0,
"test data pointer is not 32-bit aligned"); "test data pointer is not 32-bit aligned");
REPORTER_ASSERT_MESSAGE(fReporter, SkIsAlign4(size), REPORTER_ASSERT_MESSAGE(fReporter, SkIsAlign4(size),
"test data size is not 32-bit aligned"); "test data size is not 32-bit aligned");
return SkChecksum::Compute(reinterpret_cast<const uint32_t *>(data), size);
switch(fWhichAlgorithm) { case kSkCityHash32:
case kSkChecksum: return SkCityHash::Compute32(data, size);
return SkChecksum::Compute(data, size); case kSkCityHash64:
case kSkConsistentChecksum: return SkCityHash::Compute64(data, size);
return SkConsistentChecksum::Compute(data, size);
default: default:
SkString message("fWhichAlgorithm has unknown value "); SkString message("fWhichAlgorithm has unknown value ");
message.appendf("%d", fWhichAlgorithm); message.appendf("%d", fWhichAlgorithm);
@ -55,16 +57,22 @@ namespace skiatest {
// generates the same results if called twice over the same data. // generates the same results if called twice over the same data.
void TestChecksumSelfConsistency(size_t buf_size) { void TestChecksumSelfConsistency(size_t buf_size) {
SkAutoMalloc storage(buf_size); SkAutoMalloc storage(buf_size);
uint32_t* ptr = (uint32_t*)storage.get(); char* ptr = reinterpret_cast<char *>(storage.get());
char* cptr = (char*)ptr;
REPORTER_ASSERT(fReporter,
GetTestDataChecksum(8, 0) ==
GetTestDataChecksum(8, 0));
REPORTER_ASSERT(fReporter,
GetTestDataChecksum(8, 0) !=
GetTestDataChecksum(8, 1));
sk_bzero(ptr, buf_size); sk_bzero(ptr, buf_size);
uint32_t prev = 0; checksum_result prev = 0;
// assert that as we change values (from 0 to non-zero) in // assert that as we change values (from 0 to non-zero) in
// our buffer, we get a different value // our buffer, we get a different value
for (size_t i = 0; i < buf_size; ++i) { for (size_t i = 0; i < buf_size; ++i) {
cptr[i] = (i & 0x7f) + 1; // need some non-zero value here ptr[i] = (i & 0x7f) + 1; // need some non-zero value here
// Try checksums of different-sized chunks, but always // Try checksums of different-sized chunks, but always
// 32-bit aligned and big enough to contain all the // 32-bit aligned and big enough to contain all the
@ -73,9 +81,9 @@ namespace skiatest {
size_t checksum_size = (((i/4)+1)*4); size_t checksum_size = (((i/4)+1)*4);
REPORTER_ASSERT(fReporter, checksum_size <= buf_size); REPORTER_ASSERT(fReporter, checksum_size <= buf_size);
uint32_t curr = ComputeChecksum(ptr, checksum_size); checksum_result curr = ComputeChecksum(ptr, checksum_size);
REPORTER_ASSERT(fReporter, prev != curr); REPORTER_ASSERT(fReporter, prev != curr);
uint32_t again = ComputeChecksum(ptr, checksum_size); checksum_result again = ComputeChecksum(ptr, checksum_size);
REPORTER_ASSERT(fReporter, again == curr); REPORTER_ASSERT(fReporter, again == curr);
prev = curr; prev = curr;
} }
@ -84,48 +92,49 @@ namespace skiatest {
// Return the checksum of a buffer of bytes 'len' long. // Return the checksum of a buffer of bytes 'len' long.
// The pattern of values within the buffer will be consistent // The pattern of values within the buffer will be consistent
// for every call, based on 'seed'. // for every call, based on 'seed'.
uint32_t GetTestDataChecksum(size_t len, char seed=0) { checksum_result GetTestDataChecksum(size_t len, char seed=0) {
SkAutoMalloc storage(len); SkAutoMalloc storage(len);
uint32_t* start = (uint32_t *)storage.get(); char* start = reinterpret_cast<char *>(storage.get());
char* ptr = (char *)start; char* ptr = start;
for (size_t i = 0; i < len; ++i) { for (size_t i = 0; i < len; ++i) {
*ptr++ = ((seed+i) & 0x7f); *ptr++ = ((seed+i) & 0x7f);
} }
uint32_t result = ComputeChecksum(start, len); checksum_result result = ComputeChecksum(start, len);
return result; return result;
} }
void RunTest() { void RunTest() {
// Test self-consistency of checksum algorithms. // Test self-consistency of checksum algorithms.
fWhichAlgorithm = kSkChecksum; fWhichAlgorithm = kSkChecksum;
REPORTER_ASSERT(fReporter,
GetTestDataChecksum(8, 0) ==
GetTestDataChecksum(8, 0));
REPORTER_ASSERT(fReporter,
GetTestDataChecksum(8, 0) !=
GetTestDataChecksum(8, 1));
TestChecksumSelfConsistency(128); TestChecksumSelfConsistency(128);
fWhichAlgorithm = kSkConsistentChecksum; fWhichAlgorithm = kSkCityHash32;
REPORTER_ASSERT(fReporter, TestChecksumSelfConsistency(128);
GetTestDataChecksum(8, 0) == fWhichAlgorithm = kSkCityHash64;
GetTestDataChecksum(8, 0));
REPORTER_ASSERT(fReporter,
GetTestDataChecksum(8, 0) !=
GetTestDataChecksum(8, 1));
TestChecksumSelfConsistency(128); TestChecksumSelfConsistency(128);
// Test checksum results that should be consistent across // Test checksum results that should be consistent across
// versions and platforms. // versions and platforms.
fWhichAlgorithm = kSkChecksum; fWhichAlgorithm = kSkChecksum;
REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0); REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0);
fWhichAlgorithm = kSkConsistentChecksum; fWhichAlgorithm = kSkCityHash32;
REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0); REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0xdc56d17a);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(4) == 0x03020100); REPORTER_ASSERT(fReporter, GetTestDataChecksum(4) == 0x616e1132);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(8) == 0x07860485); REPORTER_ASSERT(fReporter, GetTestDataChecksum(8) == 0xeb0fd2d6);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0x5321e430);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x924a10e4);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0xd4de9dc9);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0xecf0325d);
fWhichAlgorithm = kSkCityHash64;
REPORTER_ASSERT(fReporter, ComputeChecksum(NULL, 0) == 0x9ae16a3b2f90404f);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(4) == 0x82bffd898958e540);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(8) == 0xad5a13e1e8e93b98);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0x10b153630af1f395);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x7db71dc4adcc6647);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0xeee763519b91b010);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0x2fe19e0b2239bc23);
// TODO: note the weakness exposed by these collisions... // TODO: note the weakness exposed by these collisions...
// We need to improve the SkConsistentChecksum algorithm // We need to improve the SkChecksum algorithm.
// (and maybe SkChecksum too?)
// We would prefer that these asserts FAIL! // We would prefer that these asserts FAIL!
// Filed as https://code.google.com/p/skia/issues/detail?id=981 // Filed as https://code.google.com/p/skia/issues/detail?id=981
// ('SkChecksum algorithm allows for way too many collisions') // ('SkChecksum algorithm allows for way too many collisions')
@ -134,11 +143,6 @@ namespace skiatest {
GetTestDataChecksum(128) == GetTestDataChecksum(256)); GetTestDataChecksum(128) == GetTestDataChecksum(256));
REPORTER_ASSERT(fReporter, REPORTER_ASSERT(fReporter,
GetTestDataChecksum(132) == GetTestDataChecksum(260)); GetTestDataChecksum(132) == GetTestDataChecksum(260));
fWhichAlgorithm = kSkConsistentChecksum;
REPORTER_ASSERT(fReporter, GetTestDataChecksum(128) == 0);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(132) == 0x03020100);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(256) == 0);
REPORTER_ASSERT(fReporter, GetTestDataChecksum(260) == 0x03020100);
} }
Reporter* fReporter; Reporter* fReporter;