Remove sk_memcpy32
It's only implemented on x86, where the exisiting benchmark says memcpy() is faster for all cases: Timer overhead: 24ns curr/maxrss loops min median mean max stddev samples config bench 10/10 MB 1 35.9µs 36.2µs 36.2µs 36.6µs 1% ▁▂▄▅▅▃█▄▄▅ nonrendering sk_memcpy32_100000 10/10 MB 13 2.27µs 2.28µs 2.28µs 2.29µs 0% █▄▃▅▃▁▃▅▁▄ nonrendering sk_memcpy32_10000 11/11 MB 677 91.6ns 95.9ns 94.5ns 99.4ns 3% ▅▅▅▅▅█▁▁▁▁ nonrendering sk_memcpy32_1000 11/11 MB 1171 20ns 20.9ns 21.3ns 23.4ns 6% ▁▁▇▃▃▃█▇▃▃ nonrendering sk_memcpy32_100 11/11 MB 1952 14ns 14ns 14.3ns 15.2ns 3% ▁▁██▁▁▁▁▁▁ nonrendering sk_memcpy32_10 11/11 MB 5 33.6µs 33.7µs 34.1µs 35.2µs 2% ▆▇█▁▁▁▁▁▁▁ nonrendering memcpy32_memcpy_100000 11/11 MB 18 2.12µs 2.22µs 2.24µs 2.39µs 5% ▂█▄▇█▄▇▁▁▁ nonrendering memcpy32_memcpy_10000 11/11 MB 1112 87.3ns 87.3ns 89.1ns 93.7ns 3% ▄██▄▁▁▁▁▁▁ nonrendering memcpy32_memcpy_1000 11/11 MB 2124 12.8ns 13.3ns 13.5ns 14.8ns 6% ▁▁▁█▃▃█▇▃▃ nonrendering memcpy32_memcpy_100 11/11 MB 3077 9ns 9.41ns 9.52ns 10.2ns 4% ▃█▁█▃▃▃▃▃▃ nonrendering memcpy32_memcpy_10 (Why? One fewer thing to port to SkOpts.) BUG=skia:4117 Review URL: https://codereview.chromium.org/1256763003
This commit is contained in:
parent
ce2c5055ce
commit
58fd2c8af4
@ -1,77 +0,0 @@
|
||||
/*
|
||||
* Copyright 2014 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "Benchmark.h"
|
||||
#include "SkRandom.h"
|
||||
#include "SkTemplates.h"
|
||||
#include "SkUtils.h"
|
||||
|
||||
template <typename Memcpy32>
|
||||
class Memcpy32Bench : public Benchmark {
|
||||
public:
|
||||
explicit Memcpy32Bench(int count, Memcpy32 memcpy32, const char* name)
|
||||
: fCount(count)
|
||||
, fMemcpy32(memcpy32)
|
||||
, fName(SkStringPrintf("%s_%d", name, count)) {}
|
||||
|
||||
const char* onGetName() override {
|
||||
return fName.c_str();
|
||||
}
|
||||
|
||||
bool isSuitableFor(Backend backend) override {
|
||||
return backend == kNonRendering_Backend;
|
||||
}
|
||||
|
||||
void onPreDraw() override {
|
||||
fDst.reset(fCount);
|
||||
fSrc.reset(fCount);
|
||||
|
||||
SkRandom rand;
|
||||
for (int i = 0; i < fCount; i++) {
|
||||
fSrc[i] = rand.nextU();
|
||||
}
|
||||
}
|
||||
|
||||
void onDraw(const int loops, SkCanvas*) override {
|
||||
for (int i = 0; i < loops; i++) {
|
||||
fMemcpy32(fDst, fSrc, fCount);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
SkAutoTMalloc<uint32_t> fDst, fSrc;
|
||||
|
||||
int fCount;
|
||||
Memcpy32 fMemcpy32;
|
||||
const SkString fName;
|
||||
};
|
||||
|
||||
template <typename Memcpy32>
|
||||
static Memcpy32Bench<Memcpy32>* Bench(int count, Memcpy32 memcpy32, const char* name) {
|
||||
return new Memcpy32Bench<Memcpy32>(count, memcpy32, name);
|
||||
}
|
||||
#define BENCH(memcpy32, count) DEF_BENCH(return Bench(count, memcpy32, #memcpy32); )
|
||||
|
||||
|
||||
// Let the libc developers do what they think is best.
|
||||
static void memcpy32_memcpy(uint32_t* dst, const uint32_t* src, int count) {
|
||||
memcpy(dst, src, sizeof(uint32_t) * count);
|
||||
}
|
||||
BENCH(memcpy32_memcpy, 10)
|
||||
BENCH(memcpy32_memcpy, 100)
|
||||
BENCH(memcpy32_memcpy, 1000)
|
||||
BENCH(memcpy32_memcpy, 10000)
|
||||
BENCH(memcpy32_memcpy, 100000)
|
||||
|
||||
// Test our chosen best, from SkUtils.h
|
||||
BENCH(sk_memcpy32, 10)
|
||||
BENCH(sk_memcpy32, 100)
|
||||
BENCH(sk_memcpy32, 1000)
|
||||
BENCH(sk_memcpy32, 10000)
|
||||
BENCH(sk_memcpy32, 100000)
|
||||
|
||||
#undef BENCH
|
@ -61,15 +61,6 @@ SkMemset32Proc SkMemset32GetPlatformProc();
|
||||
|
||||
#undef SK_SMALL_MEMSET
|
||||
|
||||
/** Similar to memcpy(), but it copies count 32bit values from src to dst.
|
||||
@param dst The memory to have value copied into it
|
||||
@param src The memory to have value copied from it
|
||||
@param count The number of values should be copied.
|
||||
*/
|
||||
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count);
|
||||
typedef void (*SkMemcpy32Proc)(uint32_t dst[], const uint32_t src[], int count);
|
||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define kMaxBytesInUTF8Sequence 4
|
||||
|
@ -41,7 +41,7 @@ static inline void copy_color_table(const SkImageInfo& dstInfo, SkColorTable* co
|
||||
SkASSERT(NULL != inputColorPtr);
|
||||
SkASSERT(NULL != inputColorCount);
|
||||
SkASSERT(NULL != colorTable);
|
||||
sk_memcpy32(inputColorPtr, colorTable->readColors(), *inputColorCount);
|
||||
memcpy(inputColorPtr, colorTable->readColors(), *inputColorCount * 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,7 @@ static void S32_Opaque_BlitRow32(SkPMColor* SK_RESTRICT dst,
|
||||
const SkPMColor* SK_RESTRICT src,
|
||||
int count, U8CPU alpha) {
|
||||
SkASSERT(255 == alpha);
|
||||
sk_memcpy32(dst, src, count);
|
||||
memcpy(dst, src, count * 4);
|
||||
}
|
||||
|
||||
static void S32_Blend_BlitRow32(SkPMColor* SK_RESTRICT dst,
|
||||
|
@ -109,10 +109,6 @@ static void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
|
||||
}
|
||||
}
|
||||
|
||||
static void sk_memcpy32_portable(uint32_t dst[], const uint32_t src[], int count) {
|
||||
memcpy(dst, src, count * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
namespace {
|
||||
// These three methods technically need external linkage to be passed as template parameters.
|
||||
// Since they can't be static, we hide them in an anonymous namespace instead.
|
||||
@ -127,11 +123,6 @@ SkMemset32Proc choose_memset32() {
|
||||
return proc ? proc : sk_memset32_portable;
|
||||
}
|
||||
|
||||
SkMemcpy32Proc choose_memcpy32() {
|
||||
SkMemcpy32Proc proc = SkMemcpy32GetPlatformProc();
|
||||
return proc ? proc : sk_memcpy32_portable;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void sk_memset16_large(uint16_t dst[], uint16_t value, int count) {
|
||||
@ -144,11 +135,6 @@ void sk_memset32_large(uint32_t dst[], uint32_t value, int count) {
|
||||
proc.get()(dst, value, count);
|
||||
}
|
||||
|
||||
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count) {
|
||||
SK_DECLARE_STATIC_LAZY_FN_PTR(SkMemcpy32Proc, proc, choose_memcpy32);
|
||||
proc.get()(dst, src, count);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/* 0xxxxxxx 1 total
|
||||
|
@ -67,33 +67,3 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
||||
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
|
||||
{
|
||||
if (count >= 16) {
|
||||
while (((size_t)dst) & 0x0F) {
|
||||
*dst++ = *src++;
|
||||
--count;
|
||||
}
|
||||
__m128i *dst128 = reinterpret_cast<__m128i*>(dst);
|
||||
const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
|
||||
while (count >= 16) {
|
||||
__m128i a = _mm_loadu_si128(src128++);
|
||||
__m128i b = _mm_loadu_si128(src128++);
|
||||
__m128i c = _mm_loadu_si128(src128++);
|
||||
__m128i d = _mm_loadu_si128(src128++);
|
||||
|
||||
_mm_store_si128(dst128++, a);
|
||||
_mm_store_si128(dst128++, b);
|
||||
_mm_store_si128(dst128++, c);
|
||||
_mm_store_si128(dst128++, d);
|
||||
count -= 16;
|
||||
}
|
||||
dst = reinterpret_cast<uint32_t*>(dst128);
|
||||
src = reinterpret_cast<const uint32_t*>(src128);
|
||||
}
|
||||
while (count > 0) {
|
||||
*dst++ = *src++;
|
||||
--count;
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,5 @@
|
||||
|
||||
void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
|
||||
void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
|
||||
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count);
|
||||
|
||||
#endif
|
||||
|
@ -30,7 +30,3 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -16,7 +16,3 @@ SkMemset16Proc SkMemset16GetPlatformProc() {
|
||||
SkMemset32Proc SkMemset32GetPlatformProc() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -317,14 +317,6 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
|
||||
}
|
||||
}
|
||||
|
||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
||||
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
|
||||
return sk_memcpy32_SSE2;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
|
||||
|
Loading…
Reference in New Issue
Block a user