Remove sk_memcpy32
It's only implemented on x86, where the exisiting benchmark says memcpy() is faster for all cases: Timer overhead: 24ns curr/maxrss loops min median mean max stddev samples config bench 10/10 MB 1 35.9µs 36.2µs 36.2µs 36.6µs 1% ▁▂▄▅▅▃█▄▄▅ nonrendering sk_memcpy32_100000 10/10 MB 13 2.27µs 2.28µs 2.28µs 2.29µs 0% █▄▃▅▃▁▃▅▁▄ nonrendering sk_memcpy32_10000 11/11 MB 677 91.6ns 95.9ns 94.5ns 99.4ns 3% ▅▅▅▅▅█▁▁▁▁ nonrendering sk_memcpy32_1000 11/11 MB 1171 20ns 20.9ns 21.3ns 23.4ns 6% ▁▁▇▃▃▃█▇▃▃ nonrendering sk_memcpy32_100 11/11 MB 1952 14ns 14ns 14.3ns 15.2ns 3% ▁▁██▁▁▁▁▁▁ nonrendering sk_memcpy32_10 11/11 MB 5 33.6µs 33.7µs 34.1µs 35.2µs 2% ▆▇█▁▁▁▁▁▁▁ nonrendering memcpy32_memcpy_100000 11/11 MB 18 2.12µs 2.22µs 2.24µs 2.39µs 5% ▂█▄▇█▄▇▁▁▁ nonrendering memcpy32_memcpy_10000 11/11 MB 1112 87.3ns 87.3ns 89.1ns 93.7ns 3% ▄██▄▁▁▁▁▁▁ nonrendering memcpy32_memcpy_1000 11/11 MB 2124 12.8ns 13.3ns 13.5ns 14.8ns 6% ▁▁▁█▃▃█▇▃▃ nonrendering memcpy32_memcpy_100 11/11 MB 3077 9ns 9.41ns 9.52ns 10.2ns 4% ▃█▁█▃▃▃▃▃▃ nonrendering memcpy32_memcpy_10 (Why? One fewer thing to port to SkOpts.) BUG=skia:4117 Review URL: https://codereview.chromium.org/1256763003
This commit is contained in:
parent
ce2c5055ce
commit
58fd2c8af4
@ -1,77 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright 2014 Google Inc.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license that can be
|
|
||||||
* found in the LICENSE file.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "Benchmark.h"
|
|
||||||
#include "SkRandom.h"
|
|
||||||
#include "SkTemplates.h"
|
|
||||||
#include "SkUtils.h"
|
|
||||||
|
|
||||||
template <typename Memcpy32>
|
|
||||||
class Memcpy32Bench : public Benchmark {
|
|
||||||
public:
|
|
||||||
explicit Memcpy32Bench(int count, Memcpy32 memcpy32, const char* name)
|
|
||||||
: fCount(count)
|
|
||||||
, fMemcpy32(memcpy32)
|
|
||||||
, fName(SkStringPrintf("%s_%d", name, count)) {}
|
|
||||||
|
|
||||||
const char* onGetName() override {
|
|
||||||
return fName.c_str();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isSuitableFor(Backend backend) override {
|
|
||||||
return backend == kNonRendering_Backend;
|
|
||||||
}
|
|
||||||
|
|
||||||
void onPreDraw() override {
|
|
||||||
fDst.reset(fCount);
|
|
||||||
fSrc.reset(fCount);
|
|
||||||
|
|
||||||
SkRandom rand;
|
|
||||||
for (int i = 0; i < fCount; i++) {
|
|
||||||
fSrc[i] = rand.nextU();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void onDraw(const int loops, SkCanvas*) override {
|
|
||||||
for (int i = 0; i < loops; i++) {
|
|
||||||
fMemcpy32(fDst, fSrc, fCount);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
SkAutoTMalloc<uint32_t> fDst, fSrc;
|
|
||||||
|
|
||||||
int fCount;
|
|
||||||
Memcpy32 fMemcpy32;
|
|
||||||
const SkString fName;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Memcpy32>
|
|
||||||
static Memcpy32Bench<Memcpy32>* Bench(int count, Memcpy32 memcpy32, const char* name) {
|
|
||||||
return new Memcpy32Bench<Memcpy32>(count, memcpy32, name);
|
|
||||||
}
|
|
||||||
#define BENCH(memcpy32, count) DEF_BENCH(return Bench(count, memcpy32, #memcpy32); )
|
|
||||||
|
|
||||||
|
|
||||||
// Let the libc developers do what they think is best.
|
|
||||||
static void memcpy32_memcpy(uint32_t* dst, const uint32_t* src, int count) {
|
|
||||||
memcpy(dst, src, sizeof(uint32_t) * count);
|
|
||||||
}
|
|
||||||
BENCH(memcpy32_memcpy, 10)
|
|
||||||
BENCH(memcpy32_memcpy, 100)
|
|
||||||
BENCH(memcpy32_memcpy, 1000)
|
|
||||||
BENCH(memcpy32_memcpy, 10000)
|
|
||||||
BENCH(memcpy32_memcpy, 100000)
|
|
||||||
|
|
||||||
// Test our chosen best, from SkUtils.h
|
|
||||||
BENCH(sk_memcpy32, 10)
|
|
||||||
BENCH(sk_memcpy32, 100)
|
|
||||||
BENCH(sk_memcpy32, 1000)
|
|
||||||
BENCH(sk_memcpy32, 10000)
|
|
||||||
BENCH(sk_memcpy32, 100000)
|
|
||||||
|
|
||||||
#undef BENCH
|
|
@ -61,15 +61,6 @@ SkMemset32Proc SkMemset32GetPlatformProc();
|
|||||||
|
|
||||||
#undef SK_SMALL_MEMSET
|
#undef SK_SMALL_MEMSET
|
||||||
|
|
||||||
/** Similar to memcpy(), but it copies count 32bit values from src to dst.
|
|
||||||
@param dst The memory to have value copied into it
|
|
||||||
@param src The memory to have value copied from it
|
|
||||||
@param count The number of values should be copied.
|
|
||||||
*/
|
|
||||||
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count);
|
|
||||||
typedef void (*SkMemcpy32Proc)(uint32_t dst[], const uint32_t src[], int count);
|
|
||||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc();
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define kMaxBytesInUTF8Sequence 4
|
#define kMaxBytesInUTF8Sequence 4
|
||||||
|
@ -41,7 +41,7 @@ static inline void copy_color_table(const SkImageInfo& dstInfo, SkColorTable* co
|
|||||||
SkASSERT(NULL != inputColorPtr);
|
SkASSERT(NULL != inputColorPtr);
|
||||||
SkASSERT(NULL != inputColorCount);
|
SkASSERT(NULL != inputColorCount);
|
||||||
SkASSERT(NULL != colorTable);
|
SkASSERT(NULL != colorTable);
|
||||||
sk_memcpy32(inputColorPtr, colorTable->readColors(), *inputColorCount);
|
memcpy(inputColorPtr, colorTable->readColors(), *inputColorCount * 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,7 +16,7 @@ static void S32_Opaque_BlitRow32(SkPMColor* SK_RESTRICT dst,
|
|||||||
const SkPMColor* SK_RESTRICT src,
|
const SkPMColor* SK_RESTRICT src,
|
||||||
int count, U8CPU alpha) {
|
int count, U8CPU alpha) {
|
||||||
SkASSERT(255 == alpha);
|
SkASSERT(255 == alpha);
|
||||||
sk_memcpy32(dst, src, count);
|
memcpy(dst, src, count * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void S32_Blend_BlitRow32(SkPMColor* SK_RESTRICT dst,
|
static void S32_Blend_BlitRow32(SkPMColor* SK_RESTRICT dst,
|
||||||
|
@ -109,10 +109,6 @@ static void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sk_memcpy32_portable(uint32_t dst[], const uint32_t src[], int count) {
|
|
||||||
memcpy(dst, src, count * sizeof(uint32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// These three methods technically need external linkage to be passed as template parameters.
|
// These three methods technically need external linkage to be passed as template parameters.
|
||||||
// Since they can't be static, we hide them in an anonymous namespace instead.
|
// Since they can't be static, we hide them in an anonymous namespace instead.
|
||||||
@ -127,11 +123,6 @@ SkMemset32Proc choose_memset32() {
|
|||||||
return proc ? proc : sk_memset32_portable;
|
return proc ? proc : sk_memset32_portable;
|
||||||
}
|
}
|
||||||
|
|
||||||
SkMemcpy32Proc choose_memcpy32() {
|
|
||||||
SkMemcpy32Proc proc = SkMemcpy32GetPlatformProc();
|
|
||||||
return proc ? proc : sk_memcpy32_portable;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void sk_memset16_large(uint16_t dst[], uint16_t value, int count) {
|
void sk_memset16_large(uint16_t dst[], uint16_t value, int count) {
|
||||||
@ -144,11 +135,6 @@ void sk_memset32_large(uint32_t dst[], uint32_t value, int count) {
|
|||||||
proc.get()(dst, value, count);
|
proc.get()(dst, value, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count) {
|
|
||||||
SK_DECLARE_STATIC_LAZY_FN_PTR(SkMemcpy32Proc, proc, choose_memcpy32);
|
|
||||||
proc.get()(dst, src, count);
|
|
||||||
}
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
/* 0xxxxxxx 1 total
|
/* 0xxxxxxx 1 total
|
||||||
|
@ -67,33 +67,3 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
|
|||||||
--count;
|
--count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
|
|
||||||
{
|
|
||||||
if (count >= 16) {
|
|
||||||
while (((size_t)dst) & 0x0F) {
|
|
||||||
*dst++ = *src++;
|
|
||||||
--count;
|
|
||||||
}
|
|
||||||
__m128i *dst128 = reinterpret_cast<__m128i*>(dst);
|
|
||||||
const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
|
|
||||||
while (count >= 16) {
|
|
||||||
__m128i a = _mm_loadu_si128(src128++);
|
|
||||||
__m128i b = _mm_loadu_si128(src128++);
|
|
||||||
__m128i c = _mm_loadu_si128(src128++);
|
|
||||||
__m128i d = _mm_loadu_si128(src128++);
|
|
||||||
|
|
||||||
_mm_store_si128(dst128++, a);
|
|
||||||
_mm_store_si128(dst128++, b);
|
|
||||||
_mm_store_si128(dst128++, c);
|
|
||||||
_mm_store_si128(dst128++, d);
|
|
||||||
count -= 16;
|
|
||||||
}
|
|
||||||
dst = reinterpret_cast<uint32_t*>(dst128);
|
|
||||||
src = reinterpret_cast<const uint32_t*>(src128);
|
|
||||||
}
|
|
||||||
while (count > 0) {
|
|
||||||
*dst++ = *src++;
|
|
||||||
--count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -12,6 +12,5 @@
|
|||||||
|
|
||||||
void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
|
void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
|
||||||
void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
|
void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
|
||||||
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -30,7 +30,3 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
|
|||||||
return nullptr;
|
return nullptr;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
@ -16,7 +16,3 @@ SkMemset16Proc SkMemset16GetPlatformProc() {
|
|||||||
SkMemset32Proc SkMemset32GetPlatformProc() {
|
SkMemset32Proc SkMemset32GetPlatformProc() {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
@ -317,14 +317,6 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
|
|
||||||
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
|
|
||||||
return sk_memcpy32_SSE2;
|
|
||||||
} else {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
|
SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
|
||||||
|
Loading…
Reference in New Issue
Block a user