skia2/bench/SkBlend_optsBench.cpp
mtklein 0c902473d6 Correct sRGB <-> linear everywhere.
This trims the SkPM4fPriv methods down to just foolproof methods.
(Anything trying to build these itself is probably wrong.)

Things like Sk4f srgb_to_linear(Sk4f) can't really exist anymore,
at least not efficiently, so this refactor is somewhat more invasive
than you might think.  Generally this means things using to_4f() are
also making a misstep... that's gone too.

It also does not make sense to try to play games with linear floats
with 255 bias any more.  That hack can't work with real sRGB coding.

Rather than update them, I've removed a couple of L32 xfermode fast
paths.  I'd even rather drop it entirely...

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2163683002
CQ_INCLUDE_TRYBOTS=master.client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review-Url: https://codereview.chromium.org/2163683002
2016-07-20 18:10:07 -07:00

201 lines
6.0 KiB
C++

/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include <tuple>
#include "Benchmark.h"
#include "Resources.h"
#include "SkCpu.h"
#include "SkImage.h"
#include "SkImage_Base.h"
#include "SkNx.h"
#include "SkOpts.h"
#include "SkPM4fPriv.h"
#include "SkString.h"
#define INNER_LOOPS 10
static inline void brute_srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
auto d = Sk4f_fromS32(*dst),
s = Sk4f_fromS32( src);
*dst = Sk4f_toS32(s + d * (1.0f - s[3]));
}
static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) {
if (src >= 0xFF000000) {
*dst = src;
return;
}
brute_srcover_srgb_srgb_1(dst, src);
}
static void brute_force_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
brute_srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
static void trivial_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
while (ndst > 0) {
int n = SkTMin(ndst, nsrc);
for (int i = 0; i < n; i++) {
srcover_srgb_srgb_1(dst++, src[i]);
}
ndst -= n;
}
}
static void best_non_simd_srcover_srgb_srgb(
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
srcover_srgb_srgb_1(dst++, *src++);
srcover_srgb_srgb_1(dst, *src);
};
while (ndst >0) {
int count = SkTMin(ndst, nsrc);
ndst -= count;
const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
const uint64_t* end = dsrc + (count >> 1);
do {
if ((~*dsrc & 0xFF000000FF000000) == 0) {
do {
*ddst++ = *dsrc++;
} while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
} else if ((*dsrc & 0xFF000000FF000000) == 0) {
do {
dsrc++;
ddst++;
} while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
} else {
srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
reinterpret_cast<const uint32_t*>(dsrc++));
}
} while (dsrc < end);
if ((count & 1) != 0) {
srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
*reinterpret_cast<const uint32_t*>(dsrc));
}
}
}
class SrcOverVSkOptsBruteForce {
public:
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
brute_force_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsTrivial {
public:
static SkString Name() { return SkString{"VSkOptsTrivial"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
trivial_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsNonSimdCore {
public:
static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
best_non_simd_srcover_srgb_srgb(dst, src, count, count);
}
};
class SrcOverVSkOptsDefault {
public:
static SkString Name() { return SkString{"VSkOptsDefault"}; }
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
SkOpts::srcover_srgb_srgb(dst, src, count, count);
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
template <typename Blender>
class LinearSrcOverBench : public Benchmark {
public:
LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
fName = "LinearSrcOver_";
fName.append(fileName);
fName.append(Blender::Name());
}
protected:
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
const char* onGetName() override { return fName.c_str(); }
void onPreDraw(SkCanvas*) override {
if (!fPixmap.addr()) {
sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str());
SkBitmap bm;
if (!as_IB(image)->getROPixels(&bm)) {
SkFAIL("Could not read resource");
}
bm.peekPixels(&fPixmap);
fCount = fPixmap.rowBytesAsPixels();
fDst.reset(fCount);
sk_bzero(fDst.get(), fPixmap.rowBytes());
}
}
void onDraw(int loops, SkCanvas*) override {
SkASSERT(fPixmap.colorType() == kN32_SkColorType);
const int width = fPixmap.rowBytesAsPixels();
for (int i = 0; i < loops * INNER_LOOPS; ++i) {
const uint32_t* src = fPixmap.addr32();
for (int y = 0; y < fPixmap.height(); y++) {
Blender::BlendN(fDst.get(), src, width);
src += width;
}
}
}
void onPostDraw(SkCanvas*) override {
// Make sure the compiler does not optimize away the operation.
volatile uint32_t v = 0;
for (int i = 0; i < fCount; i++) {
v ^= fDst[i];
}
}
private:
int fCount;
SkAutoTArray<uint32_t> fDst;
SkString fFileName;
SkString fName;
SkPixmap fPixmap;
typedef Benchmark INHERITED;
};
#define BENCHES(fileName) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); )
BENCHES("yellow_rose.png")
BENCHES("baby_tux.png")
BENCHES("plane.png")
BENCHES("mandrill_512.png")
BENCHES("iconstrip.png")