skia2/bench/MemsetBench.cpp
Mike Klein 7ac2be2020 Reland "add ERMS (enhanced rep mov/sto) SkOpts slice"
This is a reland of 26ad8ccdec
... now with MSAN support.

Original change's description:
> add ERMS (enhanced rep mov/sto) SkOpts slice
>
> Intel's got two CPUID bits indicating the speed of rep mov/sto
> (memcpy/memset),
>
>     - ERMS, Enhanced Rep Mov/Sto, older, large copies are fast?
>     - FSRM, Fast Short Rep Mov, newer, small copies are fast?
>
> ERMS has been around a long time on Intel, but is relatively recent on
> Ryzen, and FSRM is new across the board.  The startup cost for
> ERMS-but-not-FSRM copies really is noticeable, so we cut over to the
> previous SSE/AVX routines when N is small.
>
> I've left the memset benchmarks as I found them most useful when
> tuning the small/large cutoff in this CL.
>
> Change-Id: I3ac4e3f34796aba0ea86aabbe9dda7526919456a
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/332580
> Reviewed-by: Herb Derby <herb@google.com>
> Commit-Queue: Mike Klein <mtklein@google.com>

Cq-Include-Trybots: luci.skia.skia.primary:Test-Debian10-Clang-GCE-CPU-AVX2-x86_64-Release-All-MSAN
Change-Id: Ia293bba90022c48c884599331ef35aa67644729b
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/334343
Commit-Queue: Mike Klein <mtklein@google.com>
Reviewed-by: Herb Derby <herb@google.com>
2020-11-12 18:00:39 +00:00

78 lines
2.6 KiB
C++

/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "bench/Benchmark.h"
#include "include/private/SkTemplates.h"
#include "src/core/SkUtils.h"
template <typename T>
class MemsetBench : public Benchmark {
public:
explicit MemsetBench(size_t bytes)
: fN(bytes / sizeof(T))
, fBuffer(fN)
, fName(SkStringPrintf("memset%zu_%zu", sizeof(T)*8, bytes)) {}
bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
const char* onGetName() override { return fName.c_str(); }
void onDraw(int loops, SkCanvas*) override;
private:
int fN;
SkAutoTMalloc<T> fBuffer;
SkString fName;
};
template <> void MemsetBench<uint64_t>::onDraw(int loops, SkCanvas*) {
for (int i = 0; i < 1000*loops; i++) {
sk_memset64(fBuffer.get(), 0xFACEFACEFACEFACE, fN);
}
}
template <> void MemsetBench<uint32_t>::onDraw(int loops, SkCanvas*) {
for (int i = 0; i < 1000*loops; i++) {
sk_memset32(fBuffer.get(), 0xFACEB004, fN);
}
}
template <> void MemsetBench<uint16_t>::onDraw(int loops, SkCanvas*) {
for (int i = 0; i < 1000*loops; i++) {
sk_memset16(fBuffer.get(), 0x4973, fN);
}
}
DEF_BENCH(return (new MemsetBench<uint64_t>(16)));
DEF_BENCH(return (new MemsetBench<uint64_t>(64)));
DEF_BENCH(return (new MemsetBench<uint64_t>(256)));
DEF_BENCH(return (new MemsetBench<uint64_t>(512)));
DEF_BENCH(return (new MemsetBench<uint64_t>(768)));
DEF_BENCH(return (new MemsetBench<uint64_t>(1024)));
DEF_BENCH(return (new MemsetBench<uint64_t>(2048)));
DEF_BENCH(return (new MemsetBench<uint64_t>(4096)));
DEF_BENCH(return (new MemsetBench<uint64_t>(65536)));
DEF_BENCH(return (new MemsetBench<uint32_t>(16)));
DEF_BENCH(return (new MemsetBench<uint32_t>(64)));
DEF_BENCH(return (new MemsetBench<uint32_t>(256)));
DEF_BENCH(return (new MemsetBench<uint32_t>(512)));
DEF_BENCH(return (new MemsetBench<uint32_t>(768)));
DEF_BENCH(return (new MemsetBench<uint32_t>(1024)));
DEF_BENCH(return (new MemsetBench<uint32_t>(2048)));
DEF_BENCH(return (new MemsetBench<uint32_t>(4096)));
DEF_BENCH(return (new MemsetBench<uint32_t>(65536)));
DEF_BENCH(return (new MemsetBench<uint16_t>(16)));
DEF_BENCH(return (new MemsetBench<uint16_t>(64)));
DEF_BENCH(return (new MemsetBench<uint16_t>(256)));
DEF_BENCH(return (new MemsetBench<uint16_t>(512)));
DEF_BENCH(return (new MemsetBench<uint16_t>(768)));
DEF_BENCH(return (new MemsetBench<uint16_t>(1024)));
DEF_BENCH(return (new MemsetBench<uint16_t>(2048)));
DEF_BENCH(return (new MemsetBench<uint16_t>(4096)));
DEF_BENCH(return (new MemsetBench<uint16_t>(65536)));