ARM Skia NEON patches - 31 - Xfermode: xfer16
Xfermode: xfer16 This adds support for 16bit Xfermodes. It also tunes the gcc test macros in xfer32() to add compatibility for gcc > 4. Signed-off-by: Kévin PETIT <kevin.petit@arm.com> BUG= R=djsollen@google.com, mtklein@google.com, reed@google.com Author: kevin.petit.arm@gmail.com Review URL: https://codereview.chromium.org/33063002 git-svn-id: http://skia.googlecode.com/svn/trunk@12192 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
42feaaf0a5
commit
bc25dfc798
@ -31,9 +31,7 @@ void S32_D565_Opaque_neon(uint16_t* SK_RESTRICT dst,
|
||||
vsrc = vld4_u8((uint8_t*)src);
|
||||
|
||||
// Convert src to 565
|
||||
vdst = vshll_n_u8(vsrc.val[NEON_R], 8);
|
||||
vdst = vsriq_n_u16(vdst, vshll_n_u8(vsrc.val[NEON_G], 8), 5);
|
||||
vdst = vsriq_n_u16(vdst, vshll_n_u8(vsrc.val[NEON_B], 8), 5+6);
|
||||
vdst = SkPixel32ToPixel16_neon8(vsrc);
|
||||
|
||||
// Store
|
||||
vst1q_u16(dst, vdst);
|
||||
|
@ -29,4 +29,40 @@ static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This function expands 8 pixels from RGB565 (R, G, B from high to low) to
|
||||
* SkPMColor (all possible configurations supported) in the exact same way as
|
||||
* SkPixel16ToPixel32.
|
||||
*/
|
||||
static inline uint8x8x4_t SkPixel16ToPixel32_neon8(uint16x8_t vsrc) {
|
||||
|
||||
uint8x8x4_t ret;
|
||||
uint8x8_t vr, vg, vb;
|
||||
|
||||
vr = vmovn_u16(vshrq_n_u16(vsrc, SK_R16_SHIFT));
|
||||
vg = vmovn_u16(vshrq_n_u16(vshlq_n_u16(vsrc, SK_R16_BITS), SK_R16_BITS + SK_B16_BITS));
|
||||
vb = vmovn_u16(vsrc & vdupq_n_u16(SK_B16_MASK));
|
||||
|
||||
ret.val[NEON_A] = vdup_n_u8(0xFF);
|
||||
ret.val[NEON_R] = vshl_n_u8(vr, 8 - SK_R16_BITS) | vshr_n_u8(vr, 2 * SK_R16_BITS - 8);
|
||||
ret.val[NEON_G] = vshl_n_u8(vg, 8 - SK_G16_BITS) | vshr_n_u8(vg, 2 * SK_G16_BITS - 8);
|
||||
ret.val[NEON_B] = vshl_n_u8(vb, 8 - SK_B16_BITS) | vshr_n_u8(vb, 2 * SK_B16_BITS - 8);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This function packs 8 pixels from SkPMColor (all possible configurations
|
||||
* supported) to RGB565 (R, G, B from high to low) in the exact same way as
|
||||
* SkPixel32ToPixel16.
|
||||
*/
|
||||
static inline uint16x8_t SkPixel32ToPixel16_neon8(uint8x8x4_t vsrc) {
|
||||
|
||||
uint16x8_t ret;
|
||||
|
||||
ret = vshll_n_u8(vsrc.val[NEON_R], 8);
|
||||
ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_G], 8), SK_R16_BITS);
|
||||
ret = vsriq_n_u16(ret, vshll_n_u8(vsrc.val[NEON_B], 8), SK_R16_BITS + SK_G16_BITS);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* #ifndef SkColor_opts_neon_DEFINED */
|
||||
|
@ -574,13 +574,14 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
|
||||
|
||||
SkXfermodeProc proc = this->getProc();
|
||||
SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
|
||||
SkASSERT(procSIMD != NULL);
|
||||
|
||||
if (NULL == aa) {
|
||||
// Unrolled NEON code
|
||||
while (count >= 8) {
|
||||
uint8x8x4_t vsrc, vdst, vres;
|
||||
|
||||
#if (__GNUC__ == 4) && (__GNUC_MINOR__ > 6)
|
||||
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
|
||||
asm volatile (
|
||||
"vld4.u8 %h[vsrc], [%[src]]! \t\n"
|
||||
"vld4.u8 %h[vdst], [%[dst]] \t\n"
|
||||
@ -639,6 +640,74 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
|
||||
}
|
||||
}
|
||||
|
||||
void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
|
||||
const SkPMColor* SK_RESTRICT src, int count,
|
||||
const SkAlpha* SK_RESTRICT aa) const {
|
||||
SkASSERT(dst && src && count >= 0);
|
||||
|
||||
SkXfermodeProc proc = this->getProc();
|
||||
SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
|
||||
SkASSERT(procSIMD != NULL);
|
||||
|
||||
if (NULL == aa) {
|
||||
while(count >= 8) {
|
||||
uint16x8_t vdst, vres16;
|
||||
uint8x8x4_t vdst32, vsrc, vres;
|
||||
|
||||
vdst = vld1q_u16(dst);
|
||||
|
||||
#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
|
||||
asm volatile (
|
||||
"vld4.u8 %h[vsrc], [%[src]]! \t\n"
|
||||
: [vsrc] "=w" (vsrc), [src] "+&r" (src)
|
||||
: :
|
||||
);
|
||||
#else
|
||||
register uint8x8_t d0 asm("d0");
|
||||
register uint8x8_t d1 asm("d1");
|
||||
register uint8x8_t d2 asm("d2");
|
||||
register uint8x8_t d3 asm("d3");
|
||||
|
||||
asm volatile (
|
||||
"vld4.u8 {d0-d3},[%[src]]!;"
|
||||
: "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
|
||||
[src] "+&r" (src)
|
||||
: :
|
||||
);
|
||||
vsrc.val[0] = d0;
|
||||
vsrc.val[1] = d1;
|
||||
vsrc.val[2] = d2;
|
||||
vsrc.val[3] = d3;
|
||||
#endif
|
||||
|
||||
vdst32 = SkPixel16ToPixel32_neon8(vdst);
|
||||
vres = procSIMD(vsrc, vdst32);
|
||||
vres16 = SkPixel32ToPixel16_neon8(vres);
|
||||
|
||||
vst1q_u16(dst, vres16);
|
||||
|
||||
count -= 8;
|
||||
dst += 8;
|
||||
}
|
||||
for (int i = 0; i < count; i++) {
|
||||
SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
|
||||
dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC));
|
||||
}
|
||||
} else {
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
unsigned a = aa[i];
|
||||
if (0 != a) {
|
||||
SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
|
||||
SkPMColor C = proc(src[i], dstC);
|
||||
if (0xFF != a) {
|
||||
C = SkFourByteInterp(C, dstC, a);
|
||||
}
|
||||
dst[i] = SkPixel32ToPixel16_ToU16(C);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SK_DEVELOPER
|
||||
void SkNEONProcCoeffXfermode::toString(SkString* str) const {
|
||||
this->INHERITED::toString(str);
|
||||
|
@ -11,6 +11,8 @@ public:
|
||||
|
||||
virtual void xfer32(SkPMColor dst[], const SkPMColor src[], int count,
|
||||
const SkAlpha aa[]) const SK_OVERRIDE;
|
||||
virtual void xfer16(uint16_t* SK_RESTRICT dst, const SkPMColor* SK_RESTRICT src,
|
||||
int count, const SkAlpha* SK_RESTRICT aa) const SK_OVERRIDE;
|
||||
|
||||
SK_DEVELOPER_TO_STRING()
|
||||
SK_DECLARE_PUBLIC_FLATTENABLE_DESERIALIZATION_PROCS(SkNEONProcCoeffXfermode)
|
||||
|
Loading…
Reference in New Issue
Block a user