From 87185f7539f4e55fbf2e5eea458b8980c34a8d1d Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Thu, 1 Dec 2016 18:22:26 -0500 Subject: [PATCH] use fma in store_8888 I think we just happened not to here. This improves Adobe -> sRGB pipeline conversion by about 3-4%. While at it, unify all the fma() lambdas into SkNf_fma(). I'd have called it fma(), but IIRC there was some sort of name conflict there with type-generic fma() functions from the C math.h or something silly like that. CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: Id176671fec27c984efa4703c5be2fb63b7f0b11f Reviewed-on: https://skia-review.googlesource.com/5474 Reviewed-by: Matt Sarett Commit-Queue: Mike Klein --- src/opts/SkRasterPipeline_opts.h | 50 +++++++++++++++----------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/opts/SkRasterPipeline_opts.h b/src/opts/SkRasterPipeline_opts.h index 962ef1568f..ddd1aaf9cd 100644 --- a/src/opts/SkRasterPipeline_opts.h +++ b/src/opts/SkRasterPipeline_opts.h @@ -250,6 +250,8 @@ SI void from_565(const SkNh& _565, SkNf* r, SkNf* g, SkNf* b) { *b = SkNx_cast(_32_bit & SK_B16_MASK_IN_PLACE) * (1.0f / SK_B16_MASK_IN_PLACE); } +SI SkNf SkNf_fma(const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); } + STAGE(trace) { SkDebugf("%s\n", (const char*)ctx); } @@ -387,7 +389,7 @@ STAGE(scale_u8) { } SI SkNf lerp(const SkNf& from, const SkNf& to, const SkNf& cov) { - return SkNx_fma(to-from, cov, from); + return SkNf_fma(to-from, cov, from); } // s' = d(1-c) + sc, for a scalar c. @@ -541,10 +543,10 @@ STAGE(load_8888_d) { } STAGE(store_8888) { auto ptr = *(uint32_t**)ctx + x; - store(tail, ( SkNx_cast(255.0f * r + 0.5f) << 0 - | SkNx_cast(255.0f * g + 0.5f) << 8 - | SkNx_cast(255.0f * b + 0.5f) << 16 - | SkNx_cast(255.0f * a + 0.5f) << 24 ), (int*)ptr); + store(tail, ( SkNx_cast(SkNf_fma(255.0f, r, 0.5f)) << 0 + | SkNx_cast(SkNf_fma(255.0f, g, 0.5f)) << 8 + | SkNx_cast(SkNf_fma(255.0f, b, 0.5f)) << 16 + | SkNx_cast(SkNf_fma(255.0f, a, 0.5f)) << 24 ), (int*)ptr); } STAGE(load_tables) { @@ -580,7 +582,7 @@ RGBA_XFERMODE(clear) { return 0.0f; } RGBA_XFERMODE(srcatop) { return s*da + d*inv(sa); } RGBA_XFERMODE(srcin) { return s * da; } RGBA_XFERMODE(srcout) { return s * inv(da); } -RGBA_XFERMODE(srcover) { return SkNx_fma(d, inv(sa), s); } +RGBA_XFERMODE(srcover) { return SkNf_fma(d, inv(sa), s); } RGBA_XFERMODE(dstatop) { return srcatop_kernel(d,da,s,sa); } RGBA_XFERMODE(dstin) { return srcin_kernel (d,da,s,sa); } RGBA_XFERMODE(dstout) { return srcout_kernel (d,da,s,sa); } @@ -635,19 +637,17 @@ STAGE(luminance_to_alpha) { STAGE(matrix_2x3) { auto m = (const float*)ctx; - auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }; - auto R = fma(r,m[0], fma(g,m[2], m[4])), - G = fma(r,m[1], fma(g,m[3], m[5])); + auto R = SkNf_fma(r,m[0], SkNf_fma(g,m[2], m[4])), + G = SkNf_fma(r,m[1], SkNf_fma(g,m[3], m[5])); r = R; g = G; } STAGE(matrix_3x4) { auto m = (const float*)ctx; - auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }; - auto R = fma(r,m[0], fma(g,m[3], fma(b,m[6], m[ 9]))), - G = fma(r,m[1], fma(g,m[4], fma(b,m[7], m[10]))), - B = fma(r,m[2], fma(g,m[5], fma(b,m[8], m[11]))); + auto R = SkNf_fma(r,m[0], SkNf_fma(g,m[3], SkNf_fma(b,m[6], m[ 9]))), + G = SkNf_fma(r,m[1], SkNf_fma(g,m[4], SkNf_fma(b,m[7], m[10]))), + B = SkNf_fma(r,m[2], SkNf_fma(g,m[5], SkNf_fma(b,m[8], m[11]))); r = R; g = G; b = B; @@ -655,11 +655,10 @@ STAGE(matrix_3x4) { STAGE(matrix_4x5) { auto m = (const float*)ctx; - auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }; - auto R = fma(r,m[0], fma(g,m[4], fma(b,m[ 8], fma(a,m[12], m[16])))), - G = fma(r,m[1], fma(g,m[5], fma(b,m[ 9], fma(a,m[13], m[17])))), - B = fma(r,m[2], fma(g,m[6], fma(b,m[10], fma(a,m[14], m[18])))), - A = fma(r,m[3], fma(g,m[7], fma(b,m[11], fma(a,m[15], m[19])))); + auto R = SkNf_fma(r,m[0], SkNf_fma(g,m[4], SkNf_fma(b,m[ 8], SkNf_fma(a,m[12], m[16])))), + G = SkNf_fma(r,m[1], SkNf_fma(g,m[5], SkNf_fma(b,m[ 9], SkNf_fma(a,m[13], m[17])))), + B = SkNf_fma(r,m[2], SkNf_fma(g,m[6], SkNf_fma(b,m[10], SkNf_fma(a,m[14], m[18])))), + A = SkNf_fma(r,m[3], SkNf_fma(g,m[7], SkNf_fma(b,m[11], SkNf_fma(a,m[15], m[19])))); r = R; g = G; b = B; @@ -669,10 +668,9 @@ STAGE(matrix_perspective) { // N.B. unlike the matrix_NxM stages, this takes a row-major matrix. auto m = (const float*)ctx; - auto fma = [](const SkNf& f, const SkNf& m, const SkNf& a) { return SkNx_fma(f,m,a); }; - auto R = fma(r,m[0], fma(g,m[1], m[2])), - G = fma(r,m[3], fma(g,m[4], m[5])), - Z = fma(r,m[6], fma(g,m[7], m[8])); + auto R = SkNf_fma(r,m[0], SkNf_fma(g,m[1], m[2])), + G = SkNf_fma(r,m[3], SkNf_fma(g,m[4], m[5])), + Z = SkNf_fma(r,m[6], SkNf_fma(g,m[7], m[8])); r = R * Z.invert(); g = G * Z.invert(); } @@ -830,10 +828,10 @@ STAGE(accumulate) { auto sc = (const SkImageShaderContext*)ctx; auto scale = SkNf::Load(sc->scale); - dr = SkNx_fma(scale, r, dr); - dg = SkNx_fma(scale, g, dg); - db = SkNx_fma(scale, b, db); - da = SkNx_fma(scale, a, da); + dr = SkNf_fma(scale, r, dr); + dg = SkNf_fma(scale, g, dg); + db = SkNf_fma(scale, b, db); + da = SkNf_fma(scale, a, da); } template