Implement a NEON version of morphology. This is good for ~2.2X speedup on Tegra3.

R=mtklein@google.com, mtklein, reed@google.com

Review URL: https://codereview.chromium.org/68123003

git-svn-id: http://skia.googlecode.com/svn/trunk@12219 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
senorblanco@chromium.org 2013-11-11 16:48:51 +00:00
parent 13f9e0e4b2
commit f376f5de93
4 changed files with 122 additions and 1 deletions

View File

@ -80,7 +80,6 @@
'../src/opts/SkBlitRow_opts_arm.cpp', '../src/opts/SkBlitRow_opts_arm.cpp',
'../src/opts/SkBlitRow_opts_arm.h', '../src/opts/SkBlitRow_opts_arm.h',
'../src/opts/SkBlurImage_opts_none.cpp', '../src/opts/SkBlurImage_opts_none.cpp',
'../src/opts/SkMorphology_opts_none.cpp',
'../src/opts/SkXfermode_opts_arm.cpp', '../src/opts/SkXfermode_opts_arm.cpp',
], ],
'conditions': [ 'conditions': [
@ -179,6 +178,7 @@
'../src/opts/SkBitmapProcState_matrix_clamp_neon.h', '../src/opts/SkBitmapProcState_matrix_clamp_neon.h',
'../src/opts/SkBitmapProcState_matrix_repeat_neon.h', '../src/opts/SkBitmapProcState_matrix_repeat_neon.h',
'../src/opts/SkBlitRow_opts_arm_neon.cpp', '../src/opts/SkBlitRow_opts_arm_neon.cpp',
'../src/opts/SkMorphology_opts_neon.cpp',
'../src/opts/SkXfermode_opts_arm_neon.cpp', '../src/opts/SkXfermode_opts_arm_neon.cpp',
], ],
}, },

View File

@ -0,0 +1,80 @@
/*
* Copyright 2013 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkColorPriv.h"
#include "SkMorphology_opts.h"
#include "SkMorphology_opts_neon.h"
#include <arm_neon.h>
/* neon version of dilateX, dilateY, erodeX, erodeY.
* portable versions are in src/effects/SkMorphologyImageFilter.cpp.
*/
enum MorphType {
kDilate, kErode
};
enum MorphDirection {
kX, kY
};
template<MorphType type, MorphDirection direction>
static void SkMorph_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride)
{
const int srcStrideX = direction == kX ? 1 : srcStride;
const int dstStrideX = direction == kX ? 1 : dstStride;
const int srcStrideY = direction == kX ? srcStride : 1;
const int dstStrideY = direction == kX ? dstStride : 1;
radius = SkMin32(radius, width - 1);
const SkPMColor* upperSrc = src + radius * srcStrideX;
for (int x = 0; x < width; ++x) {
const SkPMColor* lp = src;
const SkPMColor* up = upperSrc;
SkPMColor* dptr = dst;
for (int y = 0; y < height; ++y) {
uint8x8_t max = vdup_n_u8(type == kDilate ? 0 : 255);
for (const SkPMColor* p = lp; p <= up; p += srcStrideX) {
uint8x8_t src_pixel = vreinterpret_u8_u32(vdup_n_u32(*p));
max = type == kDilate ? vmax_u8(src_pixel, max) : vmin_u8(src_pixel, max);
}
*dptr = vget_lane_u32(vreinterpret_u32_u8(max), 0);
dptr += dstStrideY;
lp += srcStrideY;
up += srcStrideY;
}
if (x >= radius) src += srcStrideX;
if (x + radius < width - 1) upperSrc += srcStrideX;
dst += dstStrideX;
}
}
void SkDilateX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride)
{
SkMorph_neon<kDilate, kX>(src, dst, radius, width, height, srcStride, dstStride);
}
void SkErodeX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride)
{
SkMorph_neon<kErode, kX>(src, dst, radius, width, height, srcStride, dstStride);
}
void SkDilateY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride)
{
SkMorph_neon<kDilate, kY>(src, dst, radius, width, height, srcStride, dstStride);
}
void SkErodeY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride)
{
SkMorph_neon<kErode, kY>(src, dst, radius, width, height, srcStride, dstStride);
}

View File

@ -0,0 +1,15 @@
/*
* Copyright 2013 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
void SkDilateX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride);
void SkDilateY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride);
void SkErodeX_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride);
void SkErodeY_neon(const SkPMColor* src, SkPMColor* dst, int radius,
int width, int height, int srcStride, int dstStride);

View File

@ -17,6 +17,8 @@
#include "SkUtils.h" #include "SkUtils.h"
#include "SkUtilsArm.h" #include "SkUtilsArm.h"
#include "SkMorphology_opts.h"
#include "SkMorphology_opts_neon.h"
#if defined(SK_CPU_LENDIAN) && !SK_ARM_NEON_IS_NONE #if defined(SK_CPU_LENDIAN) && !SK_ARM_NEON_IS_NONE
extern "C" void memset16_neon(uint16_t dst[], uint16_t value, int count); extern "C" void memset16_neon(uint16_t dst[], uint16_t value, int count);
@ -65,3 +67,27 @@ SkMemset32Proc SkMemset32GetPlatformProc() {
SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
return NULL; return NULL;
} }
SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
#if SK_ARM_NEON_IS_NONE
return NULL;
#else
#if SK_ARM_NEON_IS_DYNAMIC
if (!sk_cpu_arm_has_neon()) {
return NULL;
}
#endif
switch (type) {
case kDilateX_SkMorphologyProcType:
return SkDilateX_neon;
case kDilateY_SkMorphologyProcType:
return SkDilateY_neon;
case kErodeX_SkMorphologyProcType:
return SkErodeX_neon;
case kErodeY_SkMorphologyProcType:
return SkErodeY_neon;
default:
return NULL;
}
#endif
}