From 3e47d49b46b3ab62071218ef3dd44642c9713e04 Mon Sep 17 00:00:00 2001 From: mtklein Date: Mon, 29 Jun 2015 08:40:30 -0700 Subject: [PATCH] SoftLight with SkPMFloat SSE speeds up about 4.5x over existing integer SSE, NEON speeds up about 3x over serial integer code. We expect 1-2 bit component diffs in the usual GMs. Still guarded by SK_SUPPORT_LEGACY_XFERMODES, which I'll now try to lift in Chrome. BUG=skia: Review URL: https://codereview.chromium.org/1221493002 --- src/core/Sk4pxXfermode.h | 26 ++++++++++++++++++++++++++ src/opts/SkXfermode_opts_SSE2.cpp | 4 +--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/core/Sk4pxXfermode.h b/src/core/Sk4pxXfermode.h index b587183046..28fd9f1e7b 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/core/Sk4pxXfermode.h @@ -141,6 +141,31 @@ XFERMODE(ColorBurn) { otherwise)); return srcover * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); } +XFERMODE(SoftLight) { + auto sa = s.alphas(), + da = d.alphas(), + isa = Sk4f(1)-sa, + ida = Sk4f(1)-da; + + // Some common terms. + auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)), + s2 = Sk4f(2)*s, + m4 = Sk4f(4)*m; + + // The logic forks three ways: + // 1. dark src? + // 2. light src, dark dst? + // 3. light src, light dst? + auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1. + darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2. + liteDst = m.sqrt() - m, // Used in case 3. + liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d < da).thenElse(darkDst, liteDst); // Case 2 or 3? + + auto alpha = s + d*isa; + auto colors = s*ida + d*isa + (s2 < sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3? + + return alpha * SkPMFloat(1,0,0,0) + colors * SkPMFloat(0,1,1,1); +} #undef XFERMODE // A reasonable fallback mode for doing AA is to simply apply the transfermode first, @@ -244,6 +269,7 @@ static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode case SkXfermode::kColorDodge_Mode: return SkTPMFloatXfermode::Create(rec); case SkXfermode::kColorBurn_Mode: return SkTPMFloatXfermode::Create(rec); + case SkXfermode::kSoftLight_Mode: return SkTPMFloatXfermode::Create(rec); #endif default: break; } diff --git a/src/opts/SkXfermode_opts_SSE2.cpp b/src/opts/SkXfermode_opts_SSE2.cpp index ca26263727..2024a175a8 100644 --- a/src/opts/SkXfermode_opts_SSE2.cpp +++ b/src/opts/SkXfermode_opts_SSE2.cpp @@ -516,15 +516,13 @@ SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec, SkXfermode::Mode mode) { SkXfermodeProcSIMD proc = nullptr; switch (mode) { - // TODO(mtklein): Sk4pxXfermode has these now. Clean up. + // TODO(mtklein): Sk4pxXfermode has these now. Clean up the whole file! case SkProcCoeffXfermode::kOverlay_Mode: proc = overlay_modeproc_SSE2; break; case SkProcCoeffXfermode::kDarken_Mode: proc = darken_modeproc_SSE2; break; case SkProcCoeffXfermode::kLighten_Mode: proc = lighten_modeproc_SSE2; break; case SkProcCoeffXfermode::kHardLight_Mode: proc = hardlight_modeproc_SSE2; break; case SkProcCoeffXfermode::kColorDodge_Mode: proc = colordodge_modeproc_SSE2; break; case SkProcCoeffXfermode::kColorBurn_Mode: proc = colorburn_modeproc_SSE2; break; - - // TODO(mtklein): implement this with SkPMFloat. case SkProcCoeffXfermode::kSoftLight_Mode: proc = softlight_modeproc_SSE2; break; default: break; }