Clean up some benches that answer questions we're no longer asking.

NOTREECHECKS=true

BUG=skia:
R=reed@google.com, mtklein@google.com

Author: mtklein@chromium.org

Review URL: https://codereview.chromium.org/512503002
This commit is contained in:
mtklein 2014-08-27 06:34:16 -07:00 committed by Commit bot
parent 5e8dbd31de
commit 4473be874f
4 changed files with 0 additions and 370 deletions

View File

@ -67,87 +67,6 @@ BENCH(memcpy32_memcpy, 1000)
BENCH(memcpy32_memcpy, 10000)
BENCH(memcpy32_memcpy, 100000)
// Let the compiler's autovectorizer do what it thinks is best.
static void memcpy32_autovectorize(uint32_t* dst, const uint32_t* src, int count) {
while (count --> 0) {
*dst++ = *src++;
}
}
BENCH(memcpy32_autovectorize, 10)
BENCH(memcpy32_autovectorize, 100)
BENCH(memcpy32_autovectorize, 1000)
BENCH(memcpy32_autovectorize, 10000)
BENCH(memcpy32_autovectorize, 100000)
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
// Align dst to 16 bytes, then use aligned stores. src isn't algined, so use unaligned loads.
static void memcpy32_sse2_align(uint32_t* dst, const uint32_t* src, int count) {
if (count >= 16) {
while (uintptr_t(dst) & 0xF) {
*dst++ = *src++;
count--;
}
__m128i* dst128 = reinterpret_cast<__m128i*>(dst);
const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
dst += 16 * (count / 16);
src += 16 * (count / 16);
while (count >= 16) {
__m128i a = _mm_loadu_si128(src128++);
__m128i b = _mm_loadu_si128(src128++);
__m128i c = _mm_loadu_si128(src128++);
__m128i d = _mm_loadu_si128(src128++);
_mm_store_si128(dst128++, a);
_mm_store_si128(dst128++, b);
_mm_store_si128(dst128++, c);
_mm_store_si128(dst128++, d);
count -= 16;
}
}
while (count --> 0) {
*dst++ = *src++;
}
}
BENCH(memcpy32_sse2_align, 10)
BENCH(memcpy32_sse2_align, 100)
BENCH(memcpy32_sse2_align, 1000)
BENCH(memcpy32_sse2_align, 10000)
BENCH(memcpy32_sse2_align, 100000)
// Leave both dst and src unaliged, and so use unaligned stores for dst and unaligned loads for src.
static void memcpy32_sse2_unalign(uint32_t* dst, const uint32_t* src, int count) {
__m128i* dst128 = reinterpret_cast<__m128i*>(dst);
const __m128i* src128 = reinterpret_cast<const __m128i*>(src);
dst += 16 * (count / 16);
src += 16 * (count / 16);
while (count >= 16) {
__m128i a = _mm_loadu_si128(src128++);
__m128i b = _mm_loadu_si128(src128++);
__m128i c = _mm_loadu_si128(src128++);
__m128i d = _mm_loadu_si128(src128++);
_mm_storeu_si128(dst128++, a);
_mm_storeu_si128(dst128++, b);
_mm_storeu_si128(dst128++, c);
_mm_storeu_si128(dst128++, d);
count -= 16;
}
while (count --> 0) {
*dst++ = *src++;
}
}
BENCH(memcpy32_sse2_unalign, 10)
BENCH(memcpy32_sse2_unalign, 100)
BENCH(memcpy32_sse2_unalign, 1000)
BENCH(memcpy32_sse2_unalign, 10000)
BENCH(memcpy32_sse2_unalign, 100000)
// Test our chosen best, from SkUtils.h
BENCH(sk_memcpy32, 10)
BENCH(sk_memcpy32, 100)
@ -155,6 +74,4 @@ BENCH(sk_memcpy32, 1000)
BENCH(sk_memcpy32, 10000)
BENCH(sk_memcpy32, 100000)
#endif // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#undef BENCH

View File

@ -56,110 +56,3 @@ private:
DEF_BENCH( return new ChunkAllocBench(64); )
DEF_BENCH( return new ChunkAllocBench(8*1024); )
static int* calloc(size_t num) {
return (int*)sk_calloc_throw(num*sizeof(int));
}
static int* malloc_bzero(size_t num) {
const size_t bytes = num*sizeof(int);
int* ints = (int*)sk_malloc_throw(bytes);
sk_bzero(ints, bytes);
return ints;
}
class ZerosBench : public Benchmark {
size_t fNum;
bool fRead;
bool fWrite;
bool fUseCalloc;
SkString fName;
public:
ZerosBench(size_t num, bool read, bool write, bool useCalloc)
: fNum(num)
, fRead(read)
, fWrite(write)
, fUseCalloc(useCalloc) {
fName.printf("memory_%s", useCalloc ? "calloc" : "malloc_bzero");
if (read && write) {
fName.appendf("_rw");
} else if (read) {
fName.appendf("_r");
} else if (write) {
fName.appendf("_w");
}
fName.appendf("_" SK_SIZE_T_SPECIFIER, num);
}
virtual bool isSuitableFor(Backend backend) SK_OVERRIDE {
return backend == kNonRendering_Backend;
}
protected:
virtual const char* onGetName() SK_OVERRIDE {
return fName.c_str();
}
virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE {
for (int i = 0; i < loops; i++) {
int* zeros = fUseCalloc ? calloc(fNum) : malloc_bzero(fNum);
if (fRead) {
volatile int x = 15;
for (size_t j = 0; j < fNum; j++) {
x ^= zeros[j];
}
}
if (fWrite) {
for (size_t j = 0; j < fNum; j++) {
zeros[j] = 15;
}
}
sk_free(zeros);
}
}
};
// zero count r w useCalloc?
DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 0))
DEF_BENCH(return new ZerosBench(1024*1024, 0, 0, 1))
DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 0))
DEF_BENCH(return new ZerosBench(1024*1024, 0, 1, 1))
DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 0))
DEF_BENCH(return new ZerosBench(1024*1024, 1, 0, 1))
DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 0))
DEF_BENCH(return new ZerosBench(1024*1024, 1, 1, 1))
DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 0))
DEF_BENCH(return new ZerosBench(256*1024, 0, 0, 1))
DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 0))
DEF_BENCH(return new ZerosBench(256*1024, 0, 1, 1))
DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 0))
DEF_BENCH(return new ZerosBench(256*1024, 1, 0, 1))
DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 0))
DEF_BENCH(return new ZerosBench(256*1024, 1, 1, 1))
DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 0))
DEF_BENCH(return new ZerosBench(4*1024, 0, 0, 1))
DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 0))
DEF_BENCH(return new ZerosBench(4*1024, 0, 1, 1))
DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 0))
DEF_BENCH(return new ZerosBench(4*1024, 1, 0, 1))
DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 0))
DEF_BENCH(return new ZerosBench(4*1024, 1, 1, 1))
DEF_BENCH(return new ZerosBench(300, 0, 0, 0))
DEF_BENCH(return new ZerosBench(300, 0, 0, 1))
DEF_BENCH(return new ZerosBench(300, 0, 1, 0))
DEF_BENCH(return new ZerosBench(300, 0, 1, 1))
DEF_BENCH(return new ZerosBench(300, 1, 0, 0))
DEF_BENCH(return new ZerosBench(300, 1, 0, 1))
DEF_BENCH(return new ZerosBench(300, 1, 1, 0))
DEF_BENCH(return new ZerosBench(300, 1, 1, 1))
DEF_BENCH(return new ZerosBench(4, 0, 0, 0))
DEF_BENCH(return new ZerosBench(4, 0, 0, 1))
DEF_BENCH(return new ZerosBench(4, 0, 1, 0))
DEF_BENCH(return new ZerosBench(4, 0, 1, 1))
DEF_BENCH(return new ZerosBench(4, 1, 0, 0))
DEF_BENCH(return new ZerosBench(4, 1, 0, 1))
DEF_BENCH(return new ZerosBench(4, 1, 1, 0))
DEF_BENCH(return new ZerosBench(4, 1, 1, 1))

View File

@ -1,179 +0,0 @@
/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "Benchmark.h"
#include "SkRandom.h"
#include "SkChunkAlloc.h"
#include "SkDeque.h"
#include "SkTArray.h"
#include "SkTDArray.h"
// This file has several benchmarks using various data structures to do stack-like things:
// - push
// - push, immediately pop
// - push many, pop all of them
// - serial access
// - random access
// When a data structure doesn't suppport an operation efficiently, we leave that combination out.
// Where possible we hint to the data structure to allocate in 4K pages.
//
// These benchmarks may help you decide which data structure to use for a dynamically allocated
// ordered list of allocations that grows on one end.
//
// Current overall winner (01/2014): SkTDArray.
// It wins every benchmark on every machine I tried (Desktop, Nexus S, Laptop).
template <typename Impl>
struct StackBench : public Benchmark {
virtual bool isSuitableFor(Backend b) SK_OVERRIDE { return b == kNonRendering_Backend; }
virtual const char* onGetName() SK_OVERRIDE { return Impl::kName; }
virtual void onDraw(const int loops, SkCanvas*) SK_OVERRIDE { Impl::bench(loops); }
};
#define BENCH(name) \
struct name { static const char* const kName; static void bench(int); }; \
const char* const name::kName = #name; \
DEF_BENCH(return new StackBench<name>();) \
void name::bench(int loops)
static const int K = 2049;
// Add K items, then iterate through them serially many times.
BENCH(Deque_Serial) {
SkDeque s(sizeof(int), 1024);
for (int i = 0; i < K; i++) *(int*)s.push_back() = i;
volatile int junk = 0;
for (int j = 0; j < loops; j++) {
SkDeque::Iter it(s, SkDeque::Iter::kFront_IterStart);
while(void* p = it.next()) {
junk += *(int*)p;
}
}
}
BENCH(TArray_Serial) {
SkTArray<int, true> s;
for (int i = 0; i < K; i++) s.push_back(i);
volatile int junk = 0;
for (int j = 0; j < loops; j++) {
for (int i = 0; i < s.count(); i++) junk += s[i];
}
}
BENCH(TDArray_Serial) {
SkTDArray<int> s;
for (int i = 0; i < K; i++) s.push(i);
volatile int junk = 0;
for (int j = 0; j < loops; j++) {
for (int i = 0; i < s.count(); i++) junk += s[i];
}
}
// Add K items, then randomly access them many times.
BENCH(TArray_RandomAccess) {
SkTArray<int, true> s;
for (int i = 0; i < K; i++) s.push_back(i);
SkRandom rand;
volatile int junk = 0;
for (int i = 0; i < K*loops; i++) {
junk += s[rand.nextULessThan(K)];
}
}
BENCH(TDArray_RandomAccess) {
SkTDArray<int> s;
for (int i = 0; i < K; i++) s.push(i);
SkRandom rand;
volatile int junk = 0;
for (int i = 0; i < K*loops; i++) {
junk += s[rand.nextULessThan(K)];
}
}
// Push many times.
BENCH(ChunkAlloc_Push) {
SkChunkAlloc s(4096);
for (int i = 0; i < K*loops; i++) s.allocThrow(sizeof(int));
}
BENCH(Deque_Push) {
SkDeque s(sizeof(int), 1024);
for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i;
}
BENCH(TArray_Push) {
SkTArray<int, true> s;
for (int i = 0; i < K*loops; i++) s.push_back(i);
}
BENCH(TDArray_Push) {
SkTDArray<int> s;
for (int i = 0; i < K*loops; i++) s.push(i);
}
// Push then immediately pop many times.
BENCH(ChunkAlloc_PushPop) {
SkChunkAlloc s(4096);
for (int i = 0; i < K*loops; i++) {
void* p = s.allocThrow(sizeof(int));
s.unalloc(p);
}
}
BENCH(Deque_PushPop) {
SkDeque s(sizeof(int), 1024);
for (int i = 0; i < K*loops; i++) {
*(int*)s.push_back() = i;
s.pop_back();
}
}
BENCH(TArray_PushPop) {
SkTArray<int, true> s;
for (int i = 0; i < K*loops; i++) {
s.push_back(i);
s.pop_back();
}
}
BENCH(TDArray_PushPop) {
SkTDArray<int> s;
for (int i = 0; i < K*loops; i++) {
s.push(i);
s.pop();
}
}
// Push many items, then pop them all.
BENCH(Deque_PushAllPopAll) {
SkDeque s(sizeof(int), 1024);
for (int i = 0; i < K*loops; i++) *(int*)s.push_back() = i;
for (int i = 0; i < K*loops; i++) s.pop_back();
}
BENCH(TArray_PushAllPopAll) {
SkTArray<int, true> s;
for (int i = 0; i < K*loops; i++) s.push_back(i);
for (int i = 0; i < K*loops; i++) s.pop_back();
}
BENCH(TDArray_PushAllPopAll) {
SkTDArray<int> s;
for (int i = 0; i < K*loops; i++) s.push(i);
for (int i = 0; i < K*loops; i++) s.pop();
}

View File

@ -92,7 +92,6 @@
'../bench/ShaderMaskBench.cpp',
'../bench/SkipZeroesBench.cpp',
'../bench/SortBench.cpp',
'../bench/StackBench.cpp',
'../bench/StrokeBench.cpp',
'../bench/TableBench.cpp',
'../bench/TextBench.cpp',