skia2/bench/SkBlend_optsBench.cpp
mtklein 0358a6ac00 Update SkOpts namespaces.
If we make sure all SkOpts functions are static, we can give the namespaces any
name we like.  This lets us drop the sk_ prefix and give a real indication of
the default SIMD instruction set rather than just saying sk_default.

Both of these changes help debugger, profiler, and crash report readability.
Perhaps more importantly, keeping these functions static helps prevent
accidentally linking in unused versions of functions, as you see here with
sk_avx::srcover_srgb_srgb().

This requires we update SkBlend_opts tests and benches to call SkOpts functions
through SkOpts rather than declaring the methods externally.  In practice this
drops testing of the SSE2 version on machines with SSE4.  If we still really
need to test/bench the compile time best SIMD level version of this method
against the runtime detected best, we can include SkBlend_opts.h into the tests
or benches directly, similar to what we do for the trivial, brute-force, or best
non-SIMD versions.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2145833002
CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2145833002
2016-07-13 08:02:20 -07:00

187 lines
5.7 KiB
C++

/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <tuple>
#include "Benchmark.h"
#include "Resources.h"
#include "SkCpu.h"
#include "SkImage.h"
#include "SkImage_Base.h"
#include "SkNx.h"
#include "SkOpts.h"
#include "SkPM4fPriv.h"
#include "SkString.h"
#define INNER_LOOPS 10
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
}
ndst -= n;
}
}
static void best_non_simd_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
srcover_srgb8888_srgb_1(dst++, *src++);
srcover_srgb8888_srgb_1(dst, *src);
};
while (ndst >0) {
int count = SkTMin(ndst, nsrc);
ndst -= count;
const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
const uint64_t* end = dsrc + (count >> 1);
do {
if ((~*dsrc & 0xFF000000FF000000) == 0) {
do {
*ddst++ = *dsrc++;
} while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
} else if ((*dsrc & 0xFF000000FF000000) == 0) {
do {
dsrc++;
ddst++;
} while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
} else {
srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
reinterpret_cast<const uint32_t*>(dsrc++));
}
} while (dsrc < end);
if ((count & 1) != 0) {
srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
*reinterpret_cast<const uint32_t*>(dsrc));
}
}
}
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb8888_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
class SrcOverVSkOptsBruteForce {
public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
brute_force_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsTrivial {
public:
static SkString Name() { return SkString{"VSkOptsTrivial"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
trivial_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsNonSimdCore {
public:
static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
best_non_simd_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsDefault {
public:
static SkString Name() { return SkString{"VSkOptsDefault"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
SkOpts::srcover_srgb_srgb(dst, src, count, count);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
template <typename Blender>
class LinearSrcOverBench : public Benchmark {
public:
LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
fName = "LinearSrcOver_";
fName.append(fileName);
fName.append(Blender::Name());
}
protected:
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
const char* onGetName() override { return fName.c_str(); }
void onPreDraw(SkCanvas*) override {
if (!fPixmap.addr()) {
sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str());
SkBitmap bm;
if (!as_IB(image)->getROPixels(&bm)) {
SkFAIL("Could not read resource");
}
bm.peekPixels(&fPixmap);
fCount = fPixmap.rowBytesAsPixels();
fDst.reset(fCount);
sk_bzero(fDst.get(), fPixmap.rowBytes());
}
}
void onDraw(int loops, SkCanvas*) override {
SkASSERT(fPixmap.colorType() == kN32_SkColorType);
const int width = fPixmap.rowBytesAsPixels();
for (int i = 0; i < loops * INNER_LOOPS; ++i) {
const uint32_t* src = fPixmap.addr32();
for (int y = 0; y < fPixmap.height(); y++) {
Blender::BlendN(fDst.get(), src, width);
src += width;
}
}
}
void onPostDraw(SkCanvas*) override {
// Make sure the compiler does not optimize away the operation.
volatile uint32_t v = 0;
for (int i = 0; i < fCount; i++) {
v ^= fDst[i];
}
}
private:
int fCount;
SkAutoTArray<uint32_t> fDst;
SkString fFileName;
SkString fName;
SkPixmap fPixmap;
typedef Benchmark INHERITED;
};
#define BENCHES(fileName) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); )
BENCHES("yellow_rose.png")
BENCHES("baby_tux.png")
BENCHES("plane.png")
BENCHES("mandrill_512.png")
BENCHES("iconstrip.png")