ARM Skia NEON patches - 13 - S32A_Opaque
Blitrow32: S32A_Opaque code cleaning and speed improvement - the old way of calculating alpha doesn't seem to be used anymore, so remove the remaining code - adding prefetching allows to improve performance greatly in some cases at the expense of a little trade-off: +-------+-----------+------------+ | count | Cortex-A9 | Cortex-A15 | +-------+-----------+------------+ | 1,2 | 0 | 0 | +-------+-----------+------------+ | 4 | 0 | -3% | +-------+-----------+------------+ | 8 | 0 | -4% | +-------+-----------+------------+ | 16 | 0 | -5% | +-------+-----------+------------+ | 64 | +14% | 0 | +-------+-----------+------------+ | 256 | +14% | +12% | +-------+-----------+------------+ | 1024 | +115% | +15% | +-------+-----------+------------+ BUG= R=djsollen@google.com Author: kevin.petit.arm@gmail.com Review URL: https://chromiumcodereview.appspot.com/18459008 git-svn-id: http://skia.googlecode.com/svn/trunk@10026 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
66ba9f978f
commit
0a5699ee48
@ -426,6 +426,13 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
||||
uint8x8_t src_raw, dst_raw, dst_final;
|
||||
uint8x8_t src_raw_2, dst_raw_2, dst_final_2;
|
||||
|
||||
/* The two prefetches below may make the code slighlty
|
||||
* slower for small values of count but are worth having
|
||||
* in the general case.
|
||||
*/
|
||||
__builtin_prefetch(src+32);
|
||||
__builtin_prefetch(dst+32);
|
||||
|
||||
/* get the source */
|
||||
src_raw = vreinterpret_u8_u32(vld1_u32(src));
|
||||
#if UNROLL > 2
|
||||
@ -447,14 +454,7 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
||||
|
||||
/* get the alphas spread out properly */
|
||||
alpha_narrow = vtbl1_u8(src_raw, alpha_mask);
|
||||
#if 1
|
||||
/* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
|
||||
/* we collapsed (255-a)+1 ... */
|
||||
alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
|
||||
#else
|
||||
alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
|
||||
alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
|
||||
#endif
|
||||
|
||||
/* spread the dest */
|
||||
dst_wide = vmovl_u8(dst_raw);
|
||||
@ -476,14 +476,7 @@ void S32A_Opaque_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,
|
||||
uint16x8_t alpha_wide;
|
||||
|
||||
alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask);
|
||||
#if 1
|
||||
/* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */
|
||||
/* we collapsed (255-a)+1 ... */
|
||||
alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow);
|
||||
#else
|
||||
alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow);
|
||||
alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7));
|
||||
#endif
|
||||
|
||||
/* spread the dest */
|
||||
dst_wide = vmovl_u8(dst_raw_2);
|
||||
|
Loading…
Reference in New Issue
Block a user