Improve SkARGB32_A8_BlitMask_SSE2
With clang this: - movzbl -3(%rbx), %edx - pxor %xmm5, %xmm5 - pinsrw $0, %edx, %xmm5 - pinsrw $1, %edx, %xmm5 - movzbl -2(%rbx), %edx - pinsrw $2, %edx, %xmm5 - pinsrw $3, %edx, %xmm5 - movzbl -1(%rbx), %edx - pinsrw $4, %edx, %xmm5 - pinsrw $5, %edx, %xmm5 - movzbl (%rbx), %edx - pinsrw $6, %edx, %xmm5 - pinsrw $7, %edx, %xmm5 becomes: + movd (%rbx), %xmm4 + punpcklbw %xmm9, %xmm4 + punpcklwd %xmm4, %xmm4 And clang already does better codegen than msvc 2013 on this. BUG=skia: Review URL: https://codereview.chromium.org/609823003
This commit is contained in:
parent
12b1831ea4
commit
60e4ad7b29
1
AUTHORS
1
AUTHORS
@ -15,6 +15,7 @@ ACCESS CO., LTD. <*@access-company.com>
|
||||
ARM <*@arm.com>
|
||||
Ehsan Akhgari <ehsan.akhgari@gmail.com>
|
||||
George Wright <george@mozilla.com>
|
||||
Jeff Muizelaar <jmuizelaar@mozilla.com>
|
||||
Google Inc. <*@google.com>
|
||||
Igalia <*@igalia.com>
|
||||
Intel <*@intel.com>
|
||||
|
@ -441,11 +441,10 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
|
||||
__m128i dst_pixel = _mm_load_si128(d);
|
||||
|
||||
//set the aphla value
|
||||
__m128i src_scale_wide = _mm_set_epi8(0, *(mask+3),\
|
||||
0, *(mask+3),0, \
|
||||
*(mask+2),0, *(mask+2),\
|
||||
0,*(mask+1), 0,*(mask+1),\
|
||||
0, *mask,0,*mask);
|
||||
__m128i src_scale_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask));
|
||||
src_scale_wide = _mm_unpacklo_epi8(src_scale_wide,
|
||||
_mm_setzero_si128());
|
||||
src_scale_wide = _mm_unpacklo_epi16(src_scale_wide, src_scale_wide);
|
||||
|
||||
//call SkAlpha255To256()
|
||||
src_scale_wide = _mm_add_epi16(src_scale_wide, c_1);
|
||||
|
Loading…
Reference in New Issue
Block a user