From 6006f678e78af7b6f67a454cd4bc213048983f9d Mon Sep 17 00:00:00 2001 From: msarett Date: Mon, 11 Jul 2016 05:49:17 -0700 Subject: [PATCH] Make all color xforms 'fast' (step 1) This refactors opt code to handle arbitrary src and dst gammas that are specified by tables. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2130013002 CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot Review-Url: https://codereview.chromium.org/2130013002 --- src/core/SkColorSpaceXform.cpp | 403 ++++++++++++++++++++++----------- src/core/SkColorSpaceXform.h | 17 +- src/core/SkOpts.cpp | 14 +- src/core/SkOpts.h | 21 +- src/opts/SkColorXform_opts.h | 153 ++++++++----- src/opts/SkOpts_sse41.cpp | 7 +- 6 files changed, 398 insertions(+), 217 deletions(-) diff --git a/src/core/SkColorSpaceXform.cpp b/src/core/SkColorSpaceXform.cpp index e913c5555a..634fcb6325 100644 --- a/src/core/SkColorSpaceXform.cpp +++ b/src/core/SkColorSpaceXform.cpp @@ -11,144 +11,7 @@ #include "SkOpts.h" #include "SkSRGB.h" -static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ, - const SkMatrix44& dstToXYZ) { - if (!dstToXYZ.invert(srcToDst)) { - return false; - } - - srcToDst->postConcat(srcToXYZ); - return true; -} - -std::unique_ptr SkColorSpaceXform::New(const sk_sp& srcSpace, - const sk_sp& dstSpace) { - if (!srcSpace || !dstSpace) { - // Invalid input - return nullptr; - } - - if (as_CSB(dstSpace)->colorLUT()) { - // It would be really weird for a dst profile to have a color LUT. I don't think - // we need to support this. - return nullptr; - } - - SkMatrix44 srcToDst(SkMatrix44::kUninitialized_Constructor); - if (!compute_gamut_xform(&srcToDst, srcSpace->xyz(), dstSpace->xyz())) { - return nullptr; - } - - if (0.0f == srcToDst.getFloat(3, 0) && - 0.0f == srcToDst.getFloat(3, 1) && - 0.0f == srcToDst.getFloat(3, 2) && - !as_CSB(srcSpace)->colorLUT()) - { - switch (srcSpace->gammaNamed()) { - case SkColorSpace::kSRGB_GammaNamed: - if (SkColorSpace::kSRGB_GammaNamed == dstSpace->gammaNamed()) { - return std::unique_ptr( - new SkFastXform(srcToDst)); - } else if (SkColorSpace::k2Dot2Curve_GammaNamed == dstSpace->gammaNamed()) { - return std::unique_ptr( - new SkFastXform(srcToDst)); - } - break; - case SkColorSpace::k2Dot2Curve_GammaNamed: - if (SkColorSpace::kSRGB_GammaNamed == dstSpace->gammaNamed()) { - return std::unique_ptr( - new SkFastXform(srcToDst)); - } else if (SkColorSpace::k2Dot2Curve_GammaNamed == dstSpace->gammaNamed()) { - return std::unique_ptr( - new SkFastXform(srcToDst)); - } - break; - default: - break; - } - } - - return std::unique_ptr(new SkDefaultXform(srcSpace, srcToDst, dstSpace)); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -static void build_src_to_dst(float srcToDstArray[12], const SkMatrix44& srcToDstMatrix) { - // Build the following row major matrix: - // rX gX bX 0 - // rY gY bY 0 - // rZ gZ bZ 0 - // Swap R and B if necessary to make sure that we output SkPMColor order. -#ifdef SK_PMCOLOR_IS_BGRA - srcToDstArray[0] = srcToDstMatrix.getFloat(0, 2); - srcToDstArray[1] = srcToDstMatrix.getFloat(0, 1); - srcToDstArray[2] = srcToDstMatrix.getFloat(0, 0); - srcToDstArray[3] = 0.0f; - srcToDstArray[4] = srcToDstMatrix.getFloat(1, 2); - srcToDstArray[5] = srcToDstMatrix.getFloat(1, 1); - srcToDstArray[6] = srcToDstMatrix.getFloat(1, 0); - srcToDstArray[7] = 0.0f; - srcToDstArray[8] = srcToDstMatrix.getFloat(2, 2); - srcToDstArray[9] = srcToDstMatrix.getFloat(2, 1); - srcToDstArray[10] = srcToDstMatrix.getFloat(2, 0); - srcToDstArray[11] = 0.0f; -#else - srcToDstArray[0] = srcToDstMatrix.getFloat(0, 0); - srcToDstArray[1] = srcToDstMatrix.getFloat(0, 1); - srcToDstArray[2] = srcToDstMatrix.getFloat(0, 2); - srcToDstArray[3] = 0.0f; - srcToDstArray[4] = srcToDstMatrix.getFloat(1, 0); - srcToDstArray[5] = srcToDstMatrix.getFloat(1, 1); - srcToDstArray[6] = srcToDstMatrix.getFloat(1, 2); - srcToDstArray[7] = 0.0f; - srcToDstArray[8] = srcToDstMatrix.getFloat(2, 0); - srcToDstArray[9] = srcToDstMatrix.getFloat(2, 1); - srcToDstArray[10] = srcToDstMatrix.getFloat(2, 2); - srcToDstArray[11] = 0.0f; -#endif -} - -template -SkFastXform::SkFastXform(const SkMatrix44& srcToDst) -{ - build_src_to_dst(fSrcToDst, srcToDst); -} - -template <> -void SkFastXform -::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const -{ - SkOpts::color_xform_RGB1_srgb_to_srgb(dst, src, len, fSrcToDst); -} - -template <> -void SkFastXform -::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const -{ - SkOpts::color_xform_RGB1_srgb_to_2dot2(dst, src, len, fSrcToDst); -} - -template <> -void SkFastXform -::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const -{ - SkOpts::color_xform_RGB1_2dot2_to_srgb(dst, src, len, fSrcToDst); -} - -template <> -void SkFastXform -::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const -{ - SkOpts::color_xform_RGB1_2dot2_to_2dot2(dst, src, len, fSrcToDst); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -extern const float sk_linear_from_2dot2[256] = { +static constexpr float sk_linear_from_2dot2[256] = { 0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f, 0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f, 0.000492503787191433f, 0.000638182842167022f, 0.000804658499513058f, 0.000992374304074325f, @@ -255,6 +118,18 @@ static void build_table_linear_from_gamma(float* outTable, float g, float a, flo } } +static inline bool compute_gamut_xform(SkMatrix44* srcToDst, const SkMatrix44& srcToXYZ, + const SkMatrix44& dstToXYZ) { + if (!dstToXYZ.invert(srcToDst)) { + return false; + } + + srcToDst->postConcat(srcToXYZ); + return true; +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + static constexpr uint8_t linear_to_srgb[1024] = { 0, 3, 6, 10, 13, 15, 18, 20, 22, 23, 25, 27, 28, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 49, 50, 51, 52, @@ -473,6 +348,258 @@ static void build_table_linear_to_gamma(uint8_t* outTable, int outTableSize, flo } } +/////////////////////////////////////////////////////////////////////////////////////////////////// + +std::unique_ptr SkColorSpaceXform::New(const sk_sp& srcSpace, + const sk_sp& dstSpace) { + if (!srcSpace || !dstSpace) { + // Invalid input + return nullptr; + } + + if (as_CSB(dstSpace)->colorLUT()) { + // It would be really weird for a dst profile to have a color LUT. I don't think + // we need to support this. + return nullptr; + } + + SkMatrix44 srcToDst(SkMatrix44::kUninitialized_Constructor); + if (!compute_gamut_xform(&srcToDst, srcSpace->xyz(), dstSpace->xyz())) { + return nullptr; + } + + if (0.0f == srcToDst.getFloat(3, 0) && + 0.0f == srcToDst.getFloat(3, 1) && + 0.0f == srcToDst.getFloat(3, 2) && + !as_CSB(srcSpace)->colorLUT()) + { + switch (dstSpace->gammaNamed()) { + case SkColorSpace::kSRGB_GammaNamed: + return std::unique_ptr( + new SkFastXform(srcSpace, srcToDst, + dstSpace)); + case SkColorSpace::k2Dot2Curve_GammaNamed: + return std::unique_ptr( + new SkFastXform(srcSpace, srcToDst, + dstSpace)); + default: + return std::unique_ptr( + new SkFastXform(srcSpace, srcToDst, + dstSpace)); + } + } + + return std::unique_ptr(new SkDefaultXform(srcSpace, srcToDst, dstSpace)); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +static void build_src_to_dst(float srcToDstArray[12], const SkMatrix44& srcToDstMatrix) { + // Build the following row major matrix: + // rX gX bX 0 + // rY gY bY 0 + // rZ gZ bZ 0 + // Swap R and B if necessary to make sure that we output SkPMColor order. +#ifdef SK_PMCOLOR_IS_BGRA + srcToDstArray[0] = srcToDstMatrix.getFloat(0, 2); + srcToDstArray[1] = srcToDstMatrix.getFloat(0, 1); + srcToDstArray[2] = srcToDstMatrix.getFloat(0, 0); + srcToDstArray[3] = 0.0f; + srcToDstArray[4] = srcToDstMatrix.getFloat(1, 2); + srcToDstArray[5] = srcToDstMatrix.getFloat(1, 1); + srcToDstArray[6] = srcToDstMatrix.getFloat(1, 0); + srcToDstArray[7] = 0.0f; + srcToDstArray[8] = srcToDstMatrix.getFloat(2, 2); + srcToDstArray[9] = srcToDstMatrix.getFloat(2, 1); + srcToDstArray[10] = srcToDstMatrix.getFloat(2, 0); + srcToDstArray[11] = 0.0f; +#else + srcToDstArray[0] = srcToDstMatrix.getFloat(0, 0); + srcToDstArray[1] = srcToDstMatrix.getFloat(0, 1); + srcToDstArray[2] = srcToDstMatrix.getFloat(0, 2); + srcToDstArray[3] = 0.0f; + srcToDstArray[4] = srcToDstMatrix.getFloat(1, 0); + srcToDstArray[5] = srcToDstMatrix.getFloat(1, 1); + srcToDstArray[6] = srcToDstMatrix.getFloat(1, 2); + srcToDstArray[7] = 0.0f; + srcToDstArray[8] = srcToDstMatrix.getFloat(2, 0); + srcToDstArray[9] = srcToDstMatrix.getFloat(2, 1); + srcToDstArray[10] = srcToDstMatrix.getFloat(2, 2); + srcToDstArray[11] = 0.0f; +#endif +} + +template +SkFastXform::SkFastXform(const sk_sp& srcSpace, const SkMatrix44& srcToDst, + const sk_sp& dstSpace) +{ + build_src_to_dst(fSrcToDst, srcToDst); + + // Build tables to transform src gamma to linear. + switch (srcSpace->gammaNamed()) { + case SkColorSpace::kSRGB_GammaNamed: + fSrcGammaTables[0] = fSrcGammaTables[1] = fSrcGammaTables[2] = sk_linear_from_srgb; + break; + case SkColorSpace::k2Dot2Curve_GammaNamed: + fSrcGammaTables[0] = fSrcGammaTables[1] = fSrcGammaTables[2] = sk_linear_from_2dot2; + break; + case SkColorSpace::kLinear_GammaNamed: + build_table_linear_from_gamma(fSrcGammaTableStorage, 1.0f); + fSrcGammaTables[0] = fSrcGammaTables[1] = fSrcGammaTables[2] = fSrcGammaTableStorage; + break; + default: { + const SkGammas* gammas = as_CSB(srcSpace)->gammas(); + SkASSERT(gammas); + + for (int i = 0; i < 3; i++) { + const SkGammaCurve& curve = (*gammas)[i]; + + if (i > 0) { + // Check if this curve matches the first curve. In this case, we can + // share the same table pointer. Logically, this should almost always + // be true. I've never seen a profile where all three gamma curves + // didn't match. But it is possible that they won't. + // TODO (msarett): + // This comparison won't catch the case where each gamma curve has a + // pointer to its own look-up table, but the tables actually match. + // Should we perform a deep compare of gamma tables here? Or should + // we catch this when parsing the profile? Or should we not worry + // about a bit of redundant work? + if (curve.quickEquals((*gammas)[0])) { + fSrcGammaTables[i] = fSrcGammaTables[0]; + continue; + } + } + + if (curve.isNamed()) { + switch (curve.fNamed) { + case SkColorSpace::kSRGB_GammaNamed: + fSrcGammaTables[i] = sk_linear_from_srgb; + break; + case SkColorSpace::k2Dot2Curve_GammaNamed: + fSrcGammaTables[i] = sk_linear_from_2dot2; + break; + case SkColorSpace::kLinear_GammaNamed: + build_table_linear_from_gamma(&fSrcGammaTableStorage[i * 256], 1.0f); + fSrcGammaTables[i] = &fSrcGammaTableStorage[i * 256]; + break; + default: + SkASSERT(false); + break; + } + } else if (curve.isValue()) { + build_table_linear_from_gamma(&fSrcGammaTableStorage[i * 256], curve.fValue); + fSrcGammaTables[i] = &fSrcGammaTableStorage[i * 256]; + } else if (curve.isTable()) { + build_table_linear_from_gamma(&fSrcGammaTableStorage[i * 256], + curve.fTable.get(), curve.fTableSize); + fSrcGammaTables[i] = &fSrcGammaTableStorage[i * 256]; + } else { + SkASSERT(curve.isParametric()); + build_table_linear_from_gamma(&fSrcGammaTableStorage[i * 256], curve.fG, + curve.fA, curve.fB, curve.fC, curve.fD, curve.fE, + curve.fF); + fSrcGammaTables[i] = &fSrcGammaTableStorage[i * 256]; + } + } + } + } + + // Build tables to transform linear to dst gamma. + switch (dstSpace->gammaNamed()) { + case SkColorSpace::kSRGB_GammaNamed: + case SkColorSpace::k2Dot2Curve_GammaNamed: + break; + case SkColorSpace::kLinear_GammaNamed: + build_table_linear_to_gamma(fDstGammaTableStorage, kDstGammaTableSize, 1.0f); + fDstGammaTables[0] = fDstGammaTables[1] = fDstGammaTables[2] = fDstGammaTableStorage; + break; + default: { + const SkGammas* gammas = as_CSB(dstSpace)->gammas(); + SkASSERT(gammas); + + for (int i = 0; i < 3; i++) { + const SkGammaCurve& curve = (*gammas)[i]; + + if (i > 0) { + // Check if this curve matches the first curve. In this case, we can + // share the same table pointer. Logically, this should almost always + // be true. I've never seen a profile where all three gamma curves + // didn't match. But it is possible that they won't. + // TODO (msarett): + // This comparison won't catch the case where each gamma curve has a + // pointer to its own look-up table (but the tables actually match). + // Should we perform a deep compare of gamma tables here? Or should + // we catch this when parsing the profile? Or should we not worry + // about a bit of redundant work? + if (curve.quickEquals((*gammas)[0])) { + fDstGammaTables[i] = fDstGammaTables[0]; + continue; + } + } + + if (curve.isNamed()) { + switch (curve.fNamed) { + case SkColorSpace::kSRGB_GammaNamed: + fDstGammaTables[i] = linear_to_srgb; + break; + case SkColorSpace::k2Dot2Curve_GammaNamed: + fDstGammaTables[i] = linear_to_2dot2; + break; + case SkColorSpace::kLinear_GammaNamed: + build_table_linear_to_gamma( + &fDstGammaTableStorage[i * kDstGammaTableSize], + kDstGammaTableSize, 1.0f); + fDstGammaTables[i] = &fDstGammaTableStorage[i * kDstGammaTableSize]; + break; + default: + SkASSERT(false); + break; + } + } else if (curve.isValue()) { + build_table_linear_to_gamma(&fDstGammaTableStorage[i * kDstGammaTableSize], + kDstGammaTableSize, curve.fValue); + fDstGammaTables[i] = &fDstGammaTableStorage[i * kDstGammaTableSize]; + } else if (curve.isTable()) { + build_table_linear_to_gamma(&fDstGammaTableStorage[i * kDstGammaTableSize], + kDstGammaTableSize, curve.fTable.get(), + curve.fTableSize); + fDstGammaTables[i] = &fDstGammaTableStorage[i * kDstGammaTableSize]; + } else { + SkASSERT(curve.isParametric()); + build_table_linear_to_gamma(&fDstGammaTableStorage[i * kDstGammaTableSize], + kDstGammaTableSize, curve.fG, curve.fA, curve.fB, + curve.fC, curve.fD, curve.fE, curve.fF); + fDstGammaTables[i] = &fDstGammaTableStorage[i * kDstGammaTableSize]; + } + } + } + } +} + +template <> +void SkFastXform +::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const +{ + SkOpts::color_xform_RGB1_to_srgb(dst, src, len, fSrcGammaTables, fSrcToDst); +} + +template <> +void SkFastXform +::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const +{ + SkOpts::color_xform_RGB1_to_2dot2(dst, src, len, fSrcGammaTables, fSrcToDst); +} + +template <> +void SkFastXform +::xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const +{ + SkOpts::color_xform_RGB1_to_table(dst, src, len, fSrcGammaTables, fSrcToDst, fDstGammaTables); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + SkDefaultXform::SkDefaultXform(const sk_sp& srcSpace, const SkMatrix44& srcToDst, const sk_sp& dstSpace) : fColorLUT(sk_ref_sp((SkColorLookUpTable*) as_CSB(srcSpace)->colorLUT())) diff --git a/src/core/SkColorSpaceXform.h b/src/core/SkColorSpaceXform.h index 723dea4335..e09a7252ba 100644 --- a/src/core/SkColorSpaceXform.h +++ b/src/core/SkColorSpaceXform.h @@ -34,16 +34,27 @@ public: virtual ~SkColorSpaceXform() {} }; -template +template class SkFastXform : public SkColorSpaceXform { public: void xform_RGB1_8888(uint32_t* dst, const uint32_t* src, uint32_t len) const override; private: - SkFastXform(const SkMatrix44& srcToDst); + SkFastXform(const sk_sp& srcSpace, const SkMatrix44& srcToDst, + const sk_sp& dstSpace); - float fSrcToDst[12]; + static constexpr int kDstGammaTableSize = 1024; + + // May contain pointers into storage or pointers into precomputed tables. + const float* fSrcGammaTables[3]; + float fSrcGammaTableStorage[3 * 256]; + + float fSrcToDst[12]; + + // May contain pointers into storage or pointers into precomputed tables. + const uint8_t* fDstGammaTables[3]; + uint8_t fDstGammaTableStorage[3 * kDstGammaTableSize]; friend class SkColorSpaceXform; }; diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 3c13dcfb0d..9e03632a15 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -77,14 +77,12 @@ namespace SkOpts { decltype(srcover_srgb_srgb) srcover_srgb_srgb = sk_default::srcover_srgb_srgb; - decltype(color_xform_RGB1_srgb_to_2dot2) color_xform_RGB1_srgb_to_2dot2 = - sk_default::color_xform_RGB1_srgb_to_2dot2; - decltype(color_xform_RGB1_2dot2_to_2dot2) color_xform_RGB1_2dot2_to_2dot2 = - sk_default::color_xform_RGB1_2dot2_to_2dot2; - decltype(color_xform_RGB1_srgb_to_srgb) color_xform_RGB1_srgb_to_srgb = - sk_default::color_xform_RGB1_srgb_to_srgb; - decltype(color_xform_RGB1_2dot2_to_srgb) color_xform_RGB1_2dot2_to_srgb = - sk_default::color_xform_RGB1_2dot2_to_srgb; + decltype(color_xform_RGB1_to_2dot2) color_xform_RGB1_to_2dot2 = + sk_default::color_xform_RGB1_to_2dot2; + decltype(color_xform_RGB1_to_srgb) color_xform_RGB1_to_srgb = + sk_default::color_xform_RGB1_to_srgb; + decltype(color_xform_RGB1_to_table) color_xform_RGB1_to_table = + sk_default::color_xform_RGB1_to_table; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_ssse3(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 186fa73d4c..f551bf453c 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -69,15 +69,18 @@ namespace SkOpts { // If nsrc < ndst, we loop over src to create a pattern. extern void (*srcover_srgb_srgb)(uint32_t* dst, const uint32_t* src, int ndst, int nsrc); - // Color xform RGB1 pixels. Does not change byte ordering. - extern void (*color_xform_RGB1_srgb_to_2dot2) (uint32_t* dst, const uint32_t* src, int len, - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_2dot2_to_2dot2)(uint32_t* dst, const uint32_t* src, int len, - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_srgb_to_srgb) (uint32_t* dst, const uint32_t* src, int len, - const float srcToDstMatrix[16]); - extern void (*color_xform_RGB1_2dot2_to_srgb)(uint32_t* dst, const uint32_t* src, int len, - const float srcToDstMatrix[16]); + // Color xform RGB1 pixels into SkPMColor order. + extern void (*color_xform_RGB1_to_2dot2) (uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], + const float srcToDstMatrix[12]); + extern void (*color_xform_RGB1_to_srgb)(uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], + const float srcToDstMatrix[12]); + extern void (*color_xform_RGB1_to_table)(uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], + const float srcToDstMatrix[12], + const uint8_t* const dstTables[3]); + } #endif//SkOpts_DEFINED diff --git a/src/opts/SkColorXform_opts.h b/src/opts/SkColorXform_opts.h index 9904b3ef93..30bf164cbe 100644 --- a/src/opts/SkColorXform_opts.h +++ b/src/opts/SkColorXform_opts.h @@ -12,8 +12,6 @@ #include "SkColorPriv.h" #include "SkSRGB.h" -extern const float sk_linear_from_2dot2[256]; - namespace SK_OPTS_NS { static Sk4f linear_to_2dot2(const Sk4f& x) { @@ -32,28 +30,35 @@ static Sk4f clamp_0_to_255(const Sk4f& x) { return Sk4f::Min(Sk4f::Max(x, 0.0f), 255.0f); } -template +enum DstGamma { + kSRGB_DstGamma, + k2Dot2_DstGamma, + kTable_DstGamma, +}; + +template static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, - const float matrix[16]) { + const float* const srcTables[3], const float matrix[16], + const uint8_t* const dstTables[3]) { Sk4f rXgXbX = Sk4f::Load(matrix + 0), rYgYbY = Sk4f::Load(matrix + 4), rZgZbZ = Sk4f::Load(matrix + 8); if (len >= 4) { Sk4f reds, greens, blues; - auto load_next_4 = [&reds, &greens, &blues, &src, &len] { - reds = Sk4f{linear_from_curve[(src[0] >> 0) & 0xFF], - linear_from_curve[(src[1] >> 0) & 0xFF], - linear_from_curve[(src[2] >> 0) & 0xFF], - linear_from_curve[(src[3] >> 0) & 0xFF]}; - greens = Sk4f{linear_from_curve[(src[0] >> 8) & 0xFF], - linear_from_curve[(src[1] >> 8) & 0xFF], - linear_from_curve[(src[2] >> 8) & 0xFF], - linear_from_curve[(src[3] >> 8) & 0xFF]}; - blues = Sk4f{linear_from_curve[(src[0] >> 16) & 0xFF], - linear_from_curve[(src[1] >> 16) & 0xFF], - linear_from_curve[(src[2] >> 16) & 0xFF], - linear_from_curve[(src[3] >> 16) & 0xFF]}; + auto load_next_4 = [&reds, &greens, &blues, &src, &len, &srcTables] { + reds = Sk4f{srcTables[0][(src[0] >> 0) & 0xFF], + srcTables[0][(src[1] >> 0) & 0xFF], + srcTables[0][(src[2] >> 0) & 0xFF], + srcTables[0][(src[3] >> 0) & 0xFF]}; + greens = Sk4f{srcTables[1][(src[0] >> 8) & 0xFF], + srcTables[1][(src[1] >> 8) & 0xFF], + srcTables[1][(src[2] >> 8) & 0xFF], + srcTables[1][(src[3] >> 8) & 0xFF]}; + blues = Sk4f{srcTables[2][(src[0] >> 16) & 0xFF], + srcTables[2][(src[1] >> 16) & 0xFF], + srcTables[2][(src[2] >> 16) & 0xFF], + srcTables[2][(src[3] >> 16) & 0xFF]}; src += 4; len -= 4; }; @@ -66,20 +71,51 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, dstBlues = rXgXbX[2]*reds + rYgYbY[2]*greens + rZgZbZ[2]*blues; }; - auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst] { - dstReds = linear_to_curve(dstReds); - dstGreens = linear_to_curve(dstGreens); - dstBlues = linear_to_curve(dstBlues); + auto store_4 = [&dstReds, &dstGreens, &dstBlues, &dst, &dstTables] { + if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { + Sk4f (*linear_to_curve)(const Sk4f&) = + (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2; - dstReds = clamp_0_to_255(dstReds); - dstGreens = clamp_0_to_255(dstGreens); - dstBlues = clamp_0_to_255(dstBlues); + dstReds = linear_to_curve(dstReds); + dstGreens = linear_to_curve(dstGreens); + dstBlues = linear_to_curve(dstBlues); + + dstReds = clamp_0_to_255(dstReds); + dstGreens = clamp_0_to_255(dstGreens); + dstBlues = clamp_0_to_255(dstBlues); + + auto rgba = (SkNx_cast(dstReds) ) + | (SkNx_cast(dstGreens) << 8) + | (SkNx_cast(dstBlues) << 16) + | (Sk4i{ 0xFF << 24}); + rgba.store(dst); + } else { + Sk4f scaledReds = Sk4f::Min(Sk4f::Max(1023.0f * dstReds, 0.0f), 1023.0f); + Sk4f scaledGreens = Sk4f::Min(Sk4f::Max(1023.0f * dstGreens, 0.0f), 1023.0f); + Sk4f scaledBlues = Sk4f::Min(Sk4f::Max(1023.0f * dstBlues, 0.0f), 1023.0f); + + Sk4i indicesReds = SkNx_cast(scaledReds + 0.5f); + Sk4i indicesGreens = SkNx_cast(scaledGreens + 0.5f); + Sk4i indicesBlues = SkNx_cast(scaledBlues + 0.5f); + + dst[0] = dstTables[0][indicesReds [0]] + | dstTables[1][indicesGreens[0]] << 8 + | dstTables[2][indicesBlues [0]] << 16 + | 0xFF << 24; + dst[1] = dstTables[0][indicesReds [1]] + | dstTables[1][indicesGreens[1]] << 8 + | dstTables[2][indicesBlues [1]] << 16 + | 0xFF << 24; + dst[2] = dstTables[0][indicesReds [2]] + | dstTables[1][indicesGreens[2]] << 8 + | dstTables[2][indicesBlues [2]] << 16 + | 0xFF << 24; + dst[3] = dstTables[0][indicesReds [3]] + | dstTables[1][indicesGreens[3]] << 8 + | dstTables[2][indicesBlues [3]] << 16 + | 0xFF << 24; + } - auto rgba = (Sk4i{(int)0xFF000000} ) - | (SkNx_cast(dstReds) ) - | (SkNx_cast(dstGreens) << 8) - | (SkNx_cast(dstBlues) << 16); - rgba.store(dst); dst += 4; }; @@ -97,24 +133,35 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, while (len > 0) { // Splat r,g,b across a register each. - auto r = Sk4f{linear_from_curve[(*src >> 0) & 0xFF]}, - g = Sk4f{linear_from_curve[(*src >> 8) & 0xFF]}, - b = Sk4f{linear_from_curve[(*src >> 16) & 0xFF]}; + auto r = Sk4f{srcTables[0][(*src >> 0) & 0xFF]}, + g = Sk4f{srcTables[1][(*src >> 8) & 0xFF]}, + b = Sk4f{srcTables[2][(*src >> 16) & 0xFF]}; // Apply transformation matrix to dst gamut. auto dstPixel = rXgXbX*r + rYgYbY*g + rZgZbZ*b; - // Convert to dst gamma. - dstPixel = linear_to_curve(dstPixel); + if (kSRGB_DstGamma == kDstGamma || k2Dot2_DstGamma == kDstGamma) { + Sk4f (*linear_to_curve)(const Sk4f&) = + (kSRGB_DstGamma == kDstGamma) ? sk_linear_to_srgb : linear_to_2dot2; - // Clamp floats to byte range. - dstPixel = clamp_0_to_255(dstPixel); + dstPixel = linear_to_curve(dstPixel); - // Convert to bytes and store to memory. - uint32_t rgba; - SkNx_cast(dstPixel).store(&rgba); - rgba |= 0xFF000000; - *dst = rgba; + dstPixel = clamp_0_to_255(dstPixel); + + uint32_t rgba; + SkNx_cast(dstPixel).store(&rgba); + rgba |= 0xFF000000; + *dst = rgba; + } else { + Sk4f scaledPixel = Sk4f::Min(Sk4f::Max(1023.0f * dstPixel, 0.0f), 1023.0f); + + Sk4i indices = SkNx_cast(scaledPixel + 0.5f); + + *dst = dstTables[0][indices[0]] + | dstTables[1][indices[1]] << 8 + | dstTables[2][indices[2]] << 16 + | 0xFF << 24; + } dst += 1; src += 1; @@ -122,24 +169,20 @@ static void color_xform_RGB1(uint32_t* dst, const uint32_t* src, int len, } } -static void color_xform_RGB1_srgb_to_2dot2(uint32_t* dst, const uint32_t* src, int len, - const float matrix[16]) { - color_xform_RGB1(dst, src, len, matrix); +static void color_xform_RGB1_to_2dot2(uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], const float matrix[16]) { + color_xform_RGB1(dst, src, len, srcTables, matrix, nullptr); } -static void color_xform_RGB1_2dot2_to_2dot2(uint32_t* dst, const uint32_t* src, int len, - const float matrix[16]) { - color_xform_RGB1(dst, src, len, matrix); +static void color_xform_RGB1_to_srgb(uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], const float matrix[16]) { + color_xform_RGB1(dst, src, len, srcTables, matrix, nullptr); } -static void color_xform_RGB1_srgb_to_srgb(uint32_t* dst, const uint32_t* src, int len, - const float matrix[16]) { - color_xform_RGB1(dst, src, len, matrix); -} - -static void color_xform_RGB1_2dot2_to_srgb(uint32_t* dst, const uint32_t* src, int len, - const float matrix[16]) { - color_xform_RGB1(dst, src, len, matrix); +static void color_xform_RGB1_to_table(uint32_t* dst, const uint32_t* src, int len, + const float* const srcTables[3], const float matrix[16], + const uint8_t* const dstTables[3]) { + color_xform_RGB1(dst, src, len, srcTables, matrix, dstTables); } } // namespace SK_OPTS_NS diff --git a/src/opts/SkOpts_sse41.cpp b/src/opts/SkOpts_sse41.cpp index 4489242de7..408602e535 100644 --- a/src/opts/SkOpts_sse41.cpp +++ b/src/opts/SkOpts_sse41.cpp @@ -21,9 +21,8 @@ namespace SkOpts { srcover_srgb_srgb = sk_sse41::srcover_srgb_srgb; blit_row_s32a_opaque = sk_sse41::blit_row_s32a_opaque; - color_xform_RGB1_srgb_to_2dot2 = sk_sse41::color_xform_RGB1_srgb_to_2dot2; - color_xform_RGB1_2dot2_to_2dot2 = sk_sse41::color_xform_RGB1_2dot2_to_2dot2; - color_xform_RGB1_srgb_to_srgb = sk_sse41::color_xform_RGB1_srgb_to_srgb; - color_xform_RGB1_2dot2_to_srgb = sk_sse41::color_xform_RGB1_2dot2_to_srgb; + color_xform_RGB1_to_2dot2 = sk_sse41::color_xform_RGB1_to_2dot2; + color_xform_RGB1_to_srgb = sk_sse41::color_xform_RGB1_to_srgb; + color_xform_RGB1_to_table = sk_sse41::color_xform_RGB1_to_table; } }