Revert of ARM Skia NEON patches - 35 - First AArch64 support (https://codereview.chromium.org/143423004/)

Reason for revert:
GYP's failing on most (all?) bots.

Original issue's description:
> ARM Skia NEON patches - 35 - First AArch64 support
>
> Aarch64 support
>
> This change contains the necessary modifications to have Skia build and
> run properly on an ARMv8 processor in aarch64 execution state.
>
> Here's a list of the changes:
>
>  - add an arm64 target to the build system + SK_CPU_ARM64 flag
>
>  - MatrixTest was failing when built in Release mode. Fused MAC
>    instructions were generated which made some intermediate results
>    more accurate. As the test relies on result comparison, the more
>    precise results when compared to others led to a gap bigger than
>    what was tolerated. As I don't know if some actual skia code relies
>    on results being comparable, I've disabled fused MAC instruction
>    with -ffp-contract=off for arm64.
>
>  - Modify include/core/SkOnce.h to have barriers work.
>
>  - SK_CPU_ARM64 implies SK_ARM_NEON_MODE_ALWAYS.
>
>  - use existing Xfermode optimisations with modifications that can be
>    removed in the future when toolchains are ready. Also save a few
>    instructions is two Xfermodes (will apply to ARM too).
>
>  - use existing SkBoxBlur and SkMorphology optimisations.
>
>  - use existing SkBlitMask optimisations
>
>  - use existing BitmapProcState and Convolution optimisations.
>
> Future changes will include:
>
>  - Blitters (only partialy merged upstream)
>
>  - SkUtils (there's little value in sending asm optimisations without
>    having them benchmarked on real hardware).
>
> Signed-off-by: Kevin PETIT <kevin.petit@arm.com>
>
> BUG=skia:
>
> Committed: http://code.google.com/p/skia/source/detail?r=13980

R=djsollen@google.com, reed@google.com, halcanary@google.com, kevin.petit@arm.com
TBR=djsollen@google.com, halcanary@google.com, kevin.petit@arm.com, reed@google.com
NOTREECHECKS=true
NOTRY=true
BUG=skia:

Author: mtklein@google.com

Review URL: https://codereview.chromium.org/216113005

git-svn-id: http://skia.googlecode.com/svn/trunk@13983 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
commit-bot@chromium.org 2014-03-28 18:05:29 +00:00
parent 972f9cd7a0
commit d643a90ee2
9 changed files with 22 additions and 110 deletions

View File

@ -8,12 +8,6 @@
'SK_FORCE_DISTANCEFIELD_FONTS=<(skia_force_distancefield_fonts)', 'SK_FORCE_DISTANCEFIELD_FONTS=<(skia_force_distancefield_fonts)',
], ],
'conditions' : [ 'conditions' : [
[ 'skia_arch_type == "arm64"', {
'cflags': [
'-ffp-contract=off',
],
}],
[ 'skia_os == "win"', [ 'skia_os == "win"',
{ {
'defines': [ 'defines': [

View File

@ -72,7 +72,7 @@
}, { }, {
'skia_poppler_enabled%': 0, 'skia_poppler_enabled%': 0,
}], }],
[ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "mac"] or skia_arch_type == "arm64"', { [ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "mac"]', {
'skia_arch_width%': 64, 'skia_arch_width%': 64,
}, { }, {
'skia_arch_width%': 32, 'skia_arch_width%': 32,

View File

@ -122,23 +122,6 @@
'-mno-apcs-frame', '-mno-apcs-frame',
] ]
}], }],
[ 'skia_arch_type == "arm64"', {
'sources': [
'../src/opts/SkBitmapProcState_arm_neon.cpp',
'../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
'../src/opts/SkBitmapProcState_opts_arm.cpp',
'../src/opts/SkBlitMask_opts_arm.cpp',
'../src/opts/SkBlitMask_opts_arm_neon.cpp',
'../src/opts/SkBlitRow_opts_none.cpp',
'../src/opts/SkBlurImage_opts_arm.cpp',
'../src/opts/SkBlurImage_opts_neon.cpp',
'../src/opts/SkMorphology_opts_arm.cpp',
'../src/opts/SkMorphology_opts_neon.cpp',
'../src/opts/SkUtils_opts_none.cpp',
'../src/opts/SkXfermode_opts_arm.cpp',
'../src/opts/SkXfermode_opts_arm_neon.cpp',
],
}],
], ],
}, },
# For the same lame reasons as what is done for skia_opts, we have to # For the same lame reasons as what is done for skia_opts, we have to

View File

@ -85,10 +85,12 @@ inline static void compiler_barrier() {
#endif #endif
inline static void full_barrier_on_arm() { inline static void full_barrier_on_arm() {
#if (defined(SK_CPU_ARM) && SK_ARM_ARCH >= 7) || defined(SK_CPU_ARM64) #ifdef SK_CPU_ARM
asm volatile("dmb ish" : : : "memory"); # if SK_ARM_ARCH >= 7
#elif defined(SK_CPU_ARM) asm volatile("dmb" : : : "memory");
# else
asm volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory"); asm volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
# endif
#endif #endif
} }

View File

@ -174,10 +174,6 @@
#endif #endif
#endif #endif
#if defined(__aarch64__)
#define SK_CPU_ARM64
#endif
////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////
#if !defined(SKIA_IMPLEMENTATION) #if !defined(SKIA_IMPLEMENTATION)

View File

@ -7,7 +7,7 @@
usage() { usage() {
cat >&2 <<EOF cat >&2 <<EOF
arm64_make - this script builds a AArch64 version of skia that arm64_make - this script builds a ARMv7 Aarch64 version of skia that
does not depend on external libraries, perfect for putting in an does not depend on external libraries, perfect for putting in an
embedded system running Linux. embedded system running Linux.
@ -45,7 +45,12 @@ done
export GYP_DEFINES="${GYP_DEFINES} \ export GYP_DEFINES="${GYP_DEFINES} \
skia_gpu=0 \ skia_gpu=0 \
skia_arch_type=arm64 \ skia_arch_type=arm \
skia_arch_width=64 \
armv7=1 \
armv8=1 \
arm_neon=0 \
arm_thumb=0 \
" "
"$(dirname "$0")/barelinux_make" -t "$BUILD_TYPE" "$(dirname "$0")/barelinux_make" -t "$BUILD_TYPE"

View File

@ -23,7 +23,7 @@
#if defined(SK_CPU_ARM) && defined(__ARM_HAVE_OPTIONAL_NEON_SUPPORT) #if defined(SK_CPU_ARM) && defined(__ARM_HAVE_OPTIONAL_NEON_SUPPORT)
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_DYNAMIC # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_DYNAMIC
#elif defined(SK_CPU_ARM) && defined(__ARM_HAVE_NEON) || defined(SK_CPU_ARM64) #elif defined(SK_CPU_ARM) && defined(__ARM_HAVE_NEON)
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_ALWAYS # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_ALWAYS
#else #else
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_NONE # define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_NONE

View File

@ -15,7 +15,7 @@
#include "SkConvolver.h" #include "SkConvolver.h"
#if !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) #if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
void SI8_D16_nofilter_DX_arm( void SI8_D16_nofilter_DX_arm(
const SkBitmapProcState& s, const SkBitmapProcState& s,
const uint32_t* SK_RESTRICT xy, const uint32_t* SK_RESTRICT xy,
@ -186,7 +186,7 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s,
s.fBitmap->getColorTable()->unlockColors(); s.fBitmap->getColorTable()->unlockColors();
} }
#endif // !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN) #endif // SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@ -194,7 +194,6 @@ void SI8_opaque_D32_nofilter_DX_arm(const SkBitmapProcState& s,
otherwise the shader won't even look at the matrix/sampler otherwise the shader won't even look at the matrix/sampler
*/ */
void SkBitmapProcState::platformProcs() { void SkBitmapProcState::platformProcs() {
#if !defined(SK_CPU_ARM64) && SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
bool isOpaque = 256 == fAlphaScale; bool isOpaque = 256 == fAlphaScale;
bool justDx = false; bool justDx = false;
@ -204,6 +203,7 @@ void SkBitmapProcState::platformProcs() {
switch (fBitmap->config()) { switch (fBitmap->config()) {
case SkBitmap::kIndex8_Config: case SkBitmap::kIndex8_Config:
#if SK_ARM_ARCH >= 6 && !defined(SK_CPU_BENDIAN)
if (justDx && SkPaint::kNone_FilterLevel == fFilterLevel) { if (justDx && SkPaint::kNone_FilterLevel == fFilterLevel) {
#if 0 /* crashing on android device */ #if 0 /* crashing on android device */
fSampleProc16 = SI8_D16_nofilter_DX_arm; fSampleProc16 = SI8_D16_nofilter_DX_arm;
@ -215,11 +215,11 @@ void SkBitmapProcState::platformProcs() {
fShaderProc32 = NULL; fShaderProc32 = NULL;
} }
} }
#endif
break; break;
default: default:
break; break;
} }
#endif
} }
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

View File

@ -41,13 +41,8 @@ static inline uint16x8_t SkAlphaMulAlpha_neon8_16(uint8x8_t color, uint8x8_t alp
static inline uint8x8_t SkDiv255Round_neon8_32_8(int32x4_t p1, int32x4_t p2) { static inline uint8x8_t SkDiv255Round_neon8_32_8(int32x4_t p1, int32x4_t p2) {
uint16x8_t tmp; uint16x8_t tmp;
#ifdef SK_CPU_ARM64
tmp = vmovn_high_u32(vmovn_u32(vreinterpretq_u32_s32(p1)),
vreinterpretq_u32_s32(p2));
#else
tmp = vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(p1)), tmp = vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(p1)),
vmovn_u32(vreinterpretq_u32_s32(p2))); vmovn_u32(vreinterpretq_u32_s32(p2)));
#endif
tmp += vdupq_n_u16(128); tmp += vdupq_n_u16(128);
tmp += vshrq_n_u16(tmp, 8); tmp += vshrq_n_u16(tmp, 8);
@ -71,11 +66,7 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val
// Test if <= 0 // Test if <= 0
cmp1 = vcleq_s32(val1, vdupq_n_s32(0)); cmp1 = vcleq_s32(val1, vdupq_n_s32(0));
cmp2 = vcleq_s32(val2, vdupq_n_s32(0)); cmp2 = vcleq_s32(val2, vdupq_n_s32(0));
#ifdef SK_CPU_ARM64
cmp16 = vmovn_high_u32(vmovn_u32(cmp1), cmp2);
#else
cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2));
#endif
cmp8_1 = vmovn_u16(cmp16); cmp8_1 = vmovn_u16(cmp16);
// Init to zero // Init to zero
@ -84,11 +75,7 @@ static inline uint8x8_t clamp_div255round_simd8_32(int32x4_t val1, int32x4_t val
// Test if >= 255*255 // Test if >= 255*255
cmp1 = vcgeq_s32(val1, vdupq_n_s32(255*255)); cmp1 = vcgeq_s32(val1, vdupq_n_s32(255*255));
cmp2 = vcgeq_s32(val2, vdupq_n_s32(255*255)); cmp2 = vcgeq_s32(val2, vdupq_n_s32(255*255));
#ifdef SK_CPU_ARM64
cmp16 = vmovn_high_u32(vmovn_u32(cmp1), cmp2);
#else
cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2)); cmp16 = vcombine_u16(vmovn_u32(cmp1), vmovn_u32(cmp2));
#endif
cmp8 = vmovn_u16(cmp16); cmp8 = vmovn_u16(cmp16);
// Insert 255 where true // Insert 255 where true
@ -422,19 +409,11 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc,
if (overlay) { if (overlay) {
dc2 = vshll_n_u8(dc, 1); dc2 = vshll_n_u8(dc, 1);
scdc2_1 = vmull_u16(vget_low_u16(dc2), vget_low_u16(vmovl_u8(sc))); scdc2_1 = vmull_u16(vget_low_u16(dc2), vget_low_u16(vmovl_u8(sc)));
#ifdef SK_CPU_ARM64
scdc2_2 = vmull_high_u16(dc2, vmovl_u8(sc));
#else
scdc2_2 = vmull_u16(vget_high_u16(dc2), vget_high_u16(vmovl_u8(sc))); scdc2_2 = vmull_u16(vget_high_u16(dc2), vget_high_u16(vmovl_u8(sc)));
#endif
} else { } else {
sc2 = vshll_n_u8(sc, 1); sc2 = vshll_n_u8(sc, 1);
scdc2_1 = vmull_u16(vget_low_u16(sc2), vget_low_u16(vmovl_u8(dc))); scdc2_1 = vmull_u16(vget_low_u16(sc2), vget_low_u16(vmovl_u8(dc)));
#ifdef SK_CPU_ARM64
scdc2_2 = vmull_high_u16(sc2, vmovl_u8(dc));
#else
scdc2_2 = vmull_u16(vget_high_u16(sc2), vget_high_u16(vmovl_u8(dc))); scdc2_2 = vmull_u16(vget_high_u16(sc2), vget_high_u16(vmovl_u8(dc)));
#endif
} }
// Calc COM // Calc COM
@ -442,20 +421,12 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc,
com1 = vreinterpretq_s32_u32( com1 = vreinterpretq_s32_u32(
vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc)));
com2 = vreinterpretq_s32_u32( com2 = vreinterpretq_s32_u32(
#ifdef SK_CPU_ARM64
vmull_high_u16(const255, sc_plus_dc));
#else
vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc)));
#endif
// Calc SUB // Calc SUB
int32x4_t sub1, sub2; int32x4_t sub1, sub2;
sub1 = vreinterpretq_s32_u32(vaddl_u16(vget_low_u16(scda), vget_low_u16(dcsa))); sub1 = vreinterpretq_s32_u32(vaddl_u16(vget_low_u16(scda), vget_low_u16(dcsa)));
#ifdef SK_CPU_ARM64
sub2 = vreinterpretq_s32_u32(vaddl_high_u16(scda, dcsa));
#else
sub2 = vreinterpretq_s32_u32(vaddl_u16(vget_high_u16(scda), vget_high_u16(dcsa))); sub2 = vreinterpretq_s32_u32(vaddl_u16(vget_high_u16(scda), vget_high_u16(dcsa)));
#endif
sub1 = vsubq_s32(sub1, vreinterpretq_s32_u32(scdc2_1)); sub1 = vsubq_s32(sub1, vreinterpretq_s32_u32(scdc2_1));
sub2 = vsubq_s32(sub2, vreinterpretq_s32_u32(scdc2_2)); sub2 = vsubq_s32(sub2, vreinterpretq_s32_u32(scdc2_2));
@ -473,14 +444,10 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc,
int32x4_t val2_1, val2_2; int32x4_t val2_1, val2_2;
uint32x4_t cmp1, cmp2; uint32x4_t cmp1, cmp2;
// Doing a signed lengthening allows to save a few instructions cmp1 = vmovl_u16(vget_low_u16(cmp));
// thanks to sign extension. cmp1 |= vshlq_n_u32(cmp1, 16);
cmp1 = vreinterpretq_u32_s32(vmovl_s16(vreinterpret_s16_u16(vget_low_u16(cmp)))); cmp2 = vmovl_u16(vget_high_u16(cmp));
#ifdef SK_CPU_ARM64 cmp2 |= vshlq_n_u32(cmp2, 16);
cmp2 = vreinterpretq_u32_s32(vmovl_high_s16(vreinterpretq_s16_u16(cmp)));
#else
cmp2 = vreinterpretq_u32_s32(vmovl_s16(vreinterpret_s16_u16(vget_high_u16(cmp))));
#endif
// Calc COM - SUB // Calc COM - SUB
val1_1 = com1 - sub1; val1_1 = com1 - sub1;
@ -491,11 +458,7 @@ static inline uint8x8_t overlay_hardlight_color(uint8x8_t sc, uint8x8_t dc,
val2_2 = com2 + sub2; val2_2 = com2 + sub2;
val2_1 = vsubq_s32(val2_1, vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sada)))); val2_1 = vsubq_s32(val2_1, vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sada))));
#ifdef SK_CPU_ARM64
val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_high_u16(sada)));
#else
val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sada)))); val2_2 = vsubq_s32(val2_2, vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sada))));
#endif
// Insert where needed // Insert where needed
val1_1 = vbslq_s32(cmp1, val1_1, val2_1); val1_1 = vbslq_s32(cmp1, val1_1, val2_1);
@ -665,19 +628,11 @@ static inline uint8x8_t exclusion_color(uint8x8_t sc, uint8x8_t dc,
term1_1 = vreinterpretq_s32_u32( term1_1 = vreinterpretq_s32_u32(
vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc))); vmull_u16(vget_low_u16(const255), vget_low_u16(sc_plus_dc)));
term1_2 = vreinterpretq_s32_u32( term1_2 = vreinterpretq_s32_u32(
#ifdef SK_CPU_ARM64
vmull_high_u16(const255, sc_plus_dc));
#else
vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc))); vmull_u16(vget_high_u16(const255), vget_high_u16(sc_plus_dc)));
#endif
/* Calc the second term */ /* Calc the second term */
term2_1 = vreinterpretq_s32_u32(vshll_n_u16(vget_low_u16(scdc), 1)); term2_1 = vreinterpretq_s32_u32(vshll_n_u16(vget_low_u16(scdc), 1));
#ifdef SK_CPU_ARM64
term2_2 = vreinterpretq_s32_u32(vshll_high_n_u16(scdc, 1));
#else
term2_2 = vreinterpretq_s32_u32(vshll_n_u16(vget_high_u16(scdc), 1)); term2_2 = vreinterpretq_s32_u32(vshll_n_u16(vget_high_u16(scdc), 1));
#endif
return clamp_div255round_simd8_32(term1_1 - term2_1, term1_2 - term2_2); return clamp_div255round_simd8_32(term1_1 - term2_1, term1_2 - term2_2);
} }
@ -706,18 +661,10 @@ static inline uint8x8_t blendfunc_multiply_color(uint8x8_t sc, uint8x8_t dc,
scdc = vmull_u8(sc, dc); scdc = vmull_u8(sc, dc);
val1 = vaddl_u16(vget_low_u16(t1), vget_low_u16(t2)); val1 = vaddl_u16(vget_low_u16(t1), vget_low_u16(t2));
#ifdef SK_CPU_ARM64
val2 = vaddl_high_u16(t1, t2);
#else
val2 = vaddl_u16(vget_high_u16(t1), vget_high_u16(t2)); val2 = vaddl_u16(vget_high_u16(t1), vget_high_u16(t2));
#endif
val1 = vaddw_u16(val1, vget_low_u16(scdc)); val1 = vaddw_u16(val1, vget_low_u16(scdc));
#ifdef SK_CPU_ARM64
val2 = vaddw_high_u16(val2, scdc);
#else
val2 = vaddw_u16(val2, vget_high_u16(scdc)); val2 = vaddw_u16(val2, vget_high_u16(scdc));
#endif
return clamp_div255round_simd8_32( return clamp_div255round_simd8_32(
vreinterpretq_s32_u32(val1), vreinterpretq_s32_u32(val2)); vreinterpretq_s32_u32(val1), vreinterpretq_s32_u32(val2));
@ -761,10 +708,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
while (count >= 8) { while (count >= 8) {
uint8x8x4_t vsrc, vdst, vres; uint8x8x4_t vsrc, vdst, vres;
#ifdef SK_CPU_ARM64
vsrc = vld4_u8((uint8_t*)src);
vdst = vld4_u8((uint8_t*)dst);
#else
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
asm volatile ( asm volatile (
"vld4.u8 %h[vsrc], [%[src]]! \t\n" "vld4.u8 %h[vsrc], [%[src]]! \t\n"
@ -797,7 +740,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
vsrc.val[2] = d2; vdst.val[2] = d6; vsrc.val[2] = d2; vdst.val[2] = d6;
vsrc.val[3] = d3; vdst.val[3] = d7; vsrc.val[3] = d3; vdst.val[3] = d7;
#endif #endif
#endif // #ifdef SK_CPU_ARM64
vres = procSIMD(vsrc, vdst); vres = procSIMD(vsrc, vdst);
@ -805,9 +747,6 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
count -= 8; count -= 8;
dst += 8; dst += 8;
#ifdef SK_CPU_ARM64
src += 8;
#endif
} }
// Leftovers // Leftovers
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
@ -844,9 +783,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
vdst = vld1q_u16(dst); vdst = vld1q_u16(dst);
#ifdef SK_CPU_ARM64
vsrc = vld4_u8((uint8_t*)src);
#else
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6)) #if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
asm volatile ( asm volatile (
"vld4.u8 %h[vsrc], [%[src]]! \t\n" "vld4.u8 %h[vsrc], [%[src]]! \t\n"
@ -870,7 +806,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
vsrc.val[2] = d2; vsrc.val[2] = d2;
vsrc.val[3] = d3; vsrc.val[3] = d3;
#endif #endif
#endif // #ifdef SK_CPU_ARM64
vdst32 = SkPixel16ToPixel32_neon8(vdst); vdst32 = SkPixel16ToPixel32_neon8(vdst);
vres = procSIMD(vsrc, vdst32); vres = procSIMD(vsrc, vdst32);
@ -880,9 +815,6 @@ void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
count -= 8; count -= 8;
dst += 8; dst += 8;
#ifdef SK_CPU_ARM64
src += 8;
#endif
} }
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
SkPMColor dstC = SkPixel16ToPixel32(dst[i]); SkPMColor dstC = SkPixel16ToPixel32(dst[i]);