0358a6ac00
If we make sure all SkOpts functions are static, we can give the namespaces any name we like. This lets us drop the sk_ prefix and give a real indication of the default SIMD instruction set rather than just saying sk_default. Both of these changes help debugger, profiler, and crash report readability. Perhaps more importantly, keeping these functions static helps prevent accidentally linking in unused versions of functions, as you see here with sk_avx::srcover_srgb_srgb(). This requires we update SkBlend_opts tests and benches to call SkOpts functions through SkOpts rather than declaring the methods externally. In practice this drops testing of the SSE2 version on machines with SSE4. If we still really need to test/bench the compile time best SIMD level version of this method against the runtime detected best, we can include SkBlend_opts.h into the tests or benches directly, similar to what we do for the trivial, brute-force, or best non-SIMD versions. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2145833002 CQ_INCLUDE_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2145833002
187 lines
5.7 KiB
C++
187 lines
5.7 KiB
C++
/*
|
|
* Copyright 2016 Google Inc.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
* found in the LICENSE file.
|
|
*/
|
|
|
|
#include <tuple>
|
|
|
|
#include "Benchmark.h"
|
|
#include "Resources.h"
|
|
#include "SkCpu.h"
|
|
#include "SkImage.h"
|
|
#include "SkImage_Base.h"
|
|
#include "SkNx.h"
|
|
#include "SkOpts.h"
|
|
#include "SkPM4fPriv.h"
|
|
#include "SkString.h"
|
|
|
|
#define INNER_LOOPS 10
|
|
|
|
static void brute_force_srcover_srgb_srgb(
|
|
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
|
while (ndst > 0) {
|
|
int n = SkTMin(ndst, nsrc);
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
|
|
}
|
|
ndst -= n;
|
|
}
|
|
}
|
|
|
|
static void best_non_simd_srcover_srgb_srgb(
|
|
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
|
uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
|
|
|
|
auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
|
|
srcover_srgb8888_srgb_1(dst++, *src++);
|
|
srcover_srgb8888_srgb_1(dst, *src);
|
|
};
|
|
|
|
while (ndst >0) {
|
|
int count = SkTMin(ndst, nsrc);
|
|
ndst -= count;
|
|
const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
|
|
const uint64_t* end = dsrc + (count >> 1);
|
|
do {
|
|
if ((~*dsrc & 0xFF000000FF000000) == 0) {
|
|
do {
|
|
*ddst++ = *dsrc++;
|
|
} while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
|
|
} else if ((*dsrc & 0xFF000000FF000000) == 0) {
|
|
do {
|
|
dsrc++;
|
|
ddst++;
|
|
} while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
|
|
} else {
|
|
srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
|
|
reinterpret_cast<const uint32_t*>(dsrc++));
|
|
}
|
|
} while (dsrc < end);
|
|
|
|
if ((count & 1) != 0) {
|
|
srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
|
|
*reinterpret_cast<const uint32_t*>(dsrc));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void trivial_srcover_srgb_srgb(
|
|
uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
|
while (ndst > 0) {
|
|
int n = SkTMin(ndst, nsrc);
|
|
|
|
for (int i = 0; i < n; i++) {
|
|
srcover_srgb8888_srgb_1(dst++, src[i]);
|
|
}
|
|
ndst -= n;
|
|
}
|
|
}
|
|
|
|
class SrcOverVSkOptsBruteForce {
|
|
public:
|
|
static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
|
|
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
|
brute_force_srcover_srgb_srgb(dst, src, count, count);
|
|
}
|
|
};
|
|
|
|
class SrcOverVSkOptsTrivial {
|
|
public:
|
|
static SkString Name() { return SkString{"VSkOptsTrivial"}; }
|
|
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
|
trivial_srcover_srgb_srgb(dst, src, count, count);
|
|
}
|
|
};
|
|
|
|
class SrcOverVSkOptsNonSimdCore {
|
|
public:
|
|
static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
|
|
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
|
best_non_simd_srcover_srgb_srgb(dst, src, count, count);
|
|
}
|
|
};
|
|
|
|
class SrcOverVSkOptsDefault {
|
|
public:
|
|
static SkString Name() { return SkString{"VSkOptsDefault"}; }
|
|
static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
|
SkOpts::srcover_srgb_srgb(dst, src, count, count);
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename Blender>
|
|
class LinearSrcOverBench : public Benchmark {
|
|
public:
|
|
LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
|
|
fName = "LinearSrcOver_";
|
|
fName.append(fileName);
|
|
fName.append(Blender::Name());
|
|
}
|
|
|
|
protected:
|
|
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
|
const char* onGetName() override { return fName.c_str(); }
|
|
|
|
void onPreDraw(SkCanvas*) override {
|
|
if (!fPixmap.addr()) {
|
|
sk_sp<SkImage> image = GetResourceAsImage(fFileName.c_str());
|
|
SkBitmap bm;
|
|
if (!as_IB(image)->getROPixels(&bm)) {
|
|
SkFAIL("Could not read resource");
|
|
}
|
|
bm.peekPixels(&fPixmap);
|
|
fCount = fPixmap.rowBytesAsPixels();
|
|
fDst.reset(fCount);
|
|
sk_bzero(fDst.get(), fPixmap.rowBytes());
|
|
}
|
|
}
|
|
|
|
void onDraw(int loops, SkCanvas*) override {
|
|
SkASSERT(fPixmap.colorType() == kN32_SkColorType);
|
|
|
|
const int width = fPixmap.rowBytesAsPixels();
|
|
|
|
for (int i = 0; i < loops * INNER_LOOPS; ++i) {
|
|
const uint32_t* src = fPixmap.addr32();
|
|
for (int y = 0; y < fPixmap.height(); y++) {
|
|
Blender::BlendN(fDst.get(), src, width);
|
|
src += width;
|
|
}
|
|
}
|
|
}
|
|
|
|
void onPostDraw(SkCanvas*) override {
|
|
// Make sure the compiler does not optimize away the operation.
|
|
volatile uint32_t v = 0;
|
|
for (int i = 0; i < fCount; i++) {
|
|
v ^= fDst[i];
|
|
}
|
|
}
|
|
|
|
private:
|
|
int fCount;
|
|
SkAutoTArray<uint32_t> fDst;
|
|
SkString fFileName;
|
|
SkString fName;
|
|
SkPixmap fPixmap;
|
|
|
|
typedef Benchmark INHERITED;
|
|
};
|
|
|
|
#define BENCHES(fileName) \
|
|
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsBruteForce>(fileName); ) \
|
|
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsTrivial>(fileName); ) \
|
|
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsNonSimdCore>(fileName); ) \
|
|
DEF_BENCH( return new LinearSrcOverBench<SrcOverVSkOptsDefault>(fileName); )
|
|
|
|
BENCHES("yellow_rose.png")
|
|
BENCHES("baby_tux.png")
|
|
BENCHES("plane.png")
|
|
BENCHES("mandrill_512.png")
|
|
BENCHES("iconstrip.png")
|