skip alpha scale if 256
It's free performance! Seriously, though, it's a perfectly predictable branch. On opaque content I'm seeing a small speedup and two extra instructions in the hot path: + cmpl $256, %regs + je <to write-it-out> pmullw %xmm, %xmm psrlw $8, %xmm pmullw %xmm, %xmm psrlw $8, %xmm write-it-out: Change-Id: I1c19a9efac62df2bb901ca9eb2679c967729d863 Reviewed-on: https://skia-review.googlesource.com/c/170901 Auto-Submit: Mike Klein <mtklein@google.com> Commit-Queue: Herb Derby <herb@google.com> Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
parent
2f9a5ea639
commit
9341952756
@ -80,9 +80,12 @@ void S32_alpha_D32_filter_DX_SSSE3(const SkBitmapProcState& s,
|
||||
SkASSERT(s.fFilterQuality != kNone_SkFilterQuality);
|
||||
SkASSERT(kN32_SkColorType == s.fPixmap.colorType());
|
||||
|
||||
int alpha = s.fAlphaScale;
|
||||
|
||||
// Return (px * s.fAlphaScale) / 256. (s.fAlphaScale is in [0,256].)
|
||||
auto scale_by_alpha = [&](const __m128i& px) {
|
||||
return _mm_srli_epi16(_mm_mullo_epi16(px, _mm_set1_epi16(s.fAlphaScale)), 8);
|
||||
auto scale_by_alpha = [alpha](const __m128i& px) {
|
||||
return alpha == 256 ? px
|
||||
: _mm_srli_epi16(_mm_mullo_epi16(px, _mm_set1_epi16(alpha)), 8);
|
||||
};
|
||||
|
||||
// We're in _DX_ mode here, so we're only varying in X.
|
||||
|
Loading…
Reference in New Issue
Block a user