Remove sk_memcpy32

It's only implemented on x86, where the exisiting benchmark says memcpy() is
faster for all cases:

Timer overhead: 24ns
curr/maxrss    loops    min    median    mean    max    stddev    samples       config    bench
  10/10  MB    1    35.9µs    36.2µs    36.2µs    36.6µs    1%    ▁▂▄▅▅▃█▄▄▅    nonrendering    sk_memcpy32_100000
  10/10  MB    13    2.27µs    2.28µs    2.28µs    2.29µs    0%    █▄▃▅▃▁▃▅▁▄    nonrendering    sk_memcpy32_10000
  11/11  MB    677    91.6ns    95.9ns    94.5ns    99.4ns    3%    ▅▅▅▅▅█▁▁▁▁    nonrendering    sk_memcpy32_1000
  11/11  MB    1171    20ns    20.9ns    21.3ns    23.4ns    6%    ▁▁▇▃▃▃█▇▃▃    nonrendering    sk_memcpy32_100
  11/11  MB    1952    14ns    14ns    14.3ns    15.2ns    3%    ▁▁██▁▁▁▁▁▁    nonrendering    sk_memcpy32_10
  11/11  MB    5    33.6µs    33.7µs    34.1µs    35.2µs    2%    ▆▇█▁▁▁▁▁▁▁    nonrendering    memcpy32_memcpy_100000
  11/11  MB    18    2.12µs    2.22µs    2.24µs    2.39µs    5%    ▂█▄▇█▄▇▁▁▁    nonrendering    memcpy32_memcpy_10000
  11/11  MB    1112    87.3ns    87.3ns    89.1ns    93.7ns    3%    ▄██▄▁▁▁▁▁▁    nonrendering    memcpy32_memcpy_1000
  11/11  MB    2124    12.8ns    13.3ns    13.5ns    14.8ns    6%    ▁▁▁█▃▃█▇▃▃    nonrendering    memcpy32_memcpy_100
  11/11  MB    3077    9ns    9.41ns    9.52ns    10.2ns    4%    ▃█▁█▃▃▃▃▃▃    nonrendering    memcpy32_memcpy_10

(Why?  One fewer thing to port to SkOpts.)

BUG=skia:4117

Review URL: https://codereview.chromium.org/1256763003
This commit is contained in:
mtklein 2015-07-27 11:08:28 -07:00 committed by Commit bot
parent ce2c5055ce
commit 58fd2c8af4
10 changed files with 2 additions and 149 deletions

View File

@ -1,77 +0,0 @@
/*
* Copyright 2014 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "Benchmark.h"
#include "SkRandom.h"
#include "SkTemplates.h"
#include "SkUtils.h"
template <typename Memcpy32>
class Memcpy32Bench : public Benchmark {
public:
explicit Memcpy32Bench(int count, Memcpy32 memcpy32, const char* name)
: fCount(count)
, fMemcpy32(memcpy32)
, fName(SkStringPrintf("%s_%d", name, count)) {}
const char* onGetName() override {
return fName.c_str();
}
bool isSuitableFor(Backend backend) override {
return backend == kNonRendering_Backend;
}
void onPreDraw() override {
fDst.reset(fCount);
fSrc.reset(fCount);
SkRandom rand;
for (int i = 0; i < fCount; i++) {
fSrc[i] = rand.nextU();
}
}
void onDraw(const int loops, SkCanvas*) override {
for (int i = 0; i < loops; i++) {
fMemcpy32(fDst, fSrc, fCount);
}
}
private:
SkAutoTMalloc<uint32_t> fDst, fSrc;
int fCount;
Memcpy32 fMemcpy32;
const SkString fName;
};
template <typename Memcpy32>
static Memcpy32Bench<Memcpy32>* Bench(int count, Memcpy32 memcpy32, const char* name) {
return new Memcpy32Bench<Memcpy32>(count, memcpy32, name);
}
#define BENCH(memcpy32, count) DEF_BENCH(return Bench(count, memcpy32, #memcpy32); )
// Let the libc developers do what they think is best.
static void memcpy32_memcpy(uint32_t* dst, const uint32_t* src, int count) {
memcpy(dst, src, sizeof(uint32_t) * count);
}
BENCH(memcpy32_memcpy, 10)
BENCH(memcpy32_memcpy, 100)
BENCH(memcpy32_memcpy, 1000)
BENCH(memcpy32_memcpy, 10000)
BENCH(memcpy32_memcpy, 100000)
// Test our chosen best, from SkUtils.h
BENCH(sk_memcpy32, 10)
BENCH(sk_memcpy32, 100)
BENCH(sk_memcpy32, 1000)
BENCH(sk_memcpy32, 10000)
BENCH(sk_memcpy32, 100000)
#undef BENCH

View File

@ -61,15 +61,6 @@ SkMemset32Proc SkMemset32GetPlatformProc();
#undef SK_SMALL_MEMSET
/** Similar to memcpy(), but it copies count 32bit values from src to dst.
@param dst The memory to have value copied into it
@param src The memory to have value copied from it
@param count The number of values should be copied.
*/
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count);
typedef void (*SkMemcpy32Proc)(uint32_t dst[], const uint32_t src[], int count);
SkMemcpy32Proc SkMemcpy32GetPlatformProc();
///////////////////////////////////////////////////////////////////////////////
#define kMaxBytesInUTF8Sequence 4

View File

@ -41,7 +41,7 @@ static inline void copy_color_table(const SkImageInfo& dstInfo, SkColorTable* co
SkASSERT(NULL != inputColorPtr);
SkASSERT(NULL != inputColorCount);
SkASSERT(NULL != colorTable);
sk_memcpy32(inputColorPtr, colorTable->readColors(), *inputColorCount);
memcpy(inputColorPtr, colorTable->readColors(), *inputColorCount * 4);
}
}

View File

@ -16,7 +16,7 @@ static void S32_Opaque_BlitRow32(SkPMColor* SK_RESTRICT dst,
const SkPMColor* SK_RESTRICT src,
int count, U8CPU alpha) {
SkASSERT(255 == alpha);
sk_memcpy32(dst, src, count);
memcpy(dst, src, count * 4);
}
static void S32_Blend_BlitRow32(SkPMColor* SK_RESTRICT dst,

View File

@ -109,10 +109,6 @@ static void sk_memset32_portable(uint32_t dst[], uint32_t value, int count) {
}
}
static void sk_memcpy32_portable(uint32_t dst[], const uint32_t src[], int count) {
memcpy(dst, src, count * sizeof(uint32_t));
}
namespace {
// These three methods technically need external linkage to be passed as template parameters.
// Since they can't be static, we hide them in an anonymous namespace instead.
@ -127,11 +123,6 @@ SkMemset32Proc choose_memset32() {
return proc ? proc : sk_memset32_portable;
}
SkMemcpy32Proc choose_memcpy32() {
SkMemcpy32Proc proc = SkMemcpy32GetPlatformProc();
return proc ? proc : sk_memcpy32_portable;
}
} // namespace
void sk_memset16_large(uint16_t dst[], uint16_t value, int count) {
@ -144,11 +135,6 @@ void sk_memset32_large(uint32_t dst[], uint32_t value, int count) {
proc.get()(dst, value, count);
}
void sk_memcpy32(uint32_t dst[], const uint32_t src[], int count) {
SK_DECLARE_STATIC_LAZY_FN_PTR(SkMemcpy32Proc, proc, choose_memcpy32);
proc.get()(dst, src, count);
}
///////////////////////////////////////////////////////////////////////////////
/* 0xxxxxxx 1 total

View File

@ -67,33 +67,3 @@ void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count)
--count;
}
}
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count)
{
if (count >= 16) {
while (((size_t)dst) & 0x0F) {
*dst++ = *src++;
--count;
}
__m128i *dst128 = reinterpret_cast<__m128i*>(dst);
const __m128i *src128 = reinterpret_cast<const __m128i*>(src);
while (count >= 16) {
__m128i a = _mm_loadu_si128(src128++);
__m128i b = _mm_loadu_si128(src128++);
__m128i c = _mm_loadu_si128(src128++);
__m128i d = _mm_loadu_si128(src128++);
_mm_store_si128(dst128++, a);
_mm_store_si128(dst128++, b);
_mm_store_si128(dst128++, c);
_mm_store_si128(dst128++, d);
count -= 16;
}
dst = reinterpret_cast<uint32_t*>(dst128);
src = reinterpret_cast<const uint32_t*>(src128);
}
while (count > 0) {
*dst++ = *src++;
--count;
}
}

View File

@ -12,6 +12,5 @@
void sk_memset16_SSE2(uint16_t *dst, uint16_t value, int count);
void sk_memset32_SSE2(uint32_t *dst, uint32_t value, int count);
void sk_memcpy32_SSE2(uint32_t *dst, const uint32_t *src, int count);
#endif

View File

@ -30,7 +30,3 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
return nullptr;
#endif
}
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
return NULL;
}

View File

@ -16,7 +16,3 @@ SkMemset16Proc SkMemset16GetPlatformProc() {
SkMemset32Proc SkMemset32GetPlatformProc() {
return NULL;
}
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
return NULL;
}

View File

@ -317,14 +317,6 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
}
}
SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
return sk_memcpy32_SSE2;
} else {
return NULL;
}
}
////////////////////////////////////////////////////////////////////////////////
SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {