Add RasterPipeline implementation for SkColorSpaceXform

This is initially turned on for Linux debug builds,
which allows us to start testing.

Chrome for Android is a really good candidate for
this (will appreciate the code size savings), but
I'd first like to run some tests to understand the
performance/size tradeoffs a little better.

BUG:660416

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD

Change-Id: I0fb2512216dfc0bda2e5388f9865318eec22291e
Reviewed-on: https://skia-review.googlesource.com/5348
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Matt Sarett 2016-12-01 12:51:14 -05:00 committed by Skia Commit-Bot
parent 6e1fca43b4
commit dd19ac7d10
4 changed files with 297 additions and 4 deletions

View File

@ -15,8 +15,15 @@
#include "SkColorSpaceXformPriv.h" #include "SkColorSpaceXformPriv.h"
#include "SkHalf.h" #include "SkHalf.h"
#include "SkOpts.h" #include "SkOpts.h"
#include "SkRasterPipeline.h"
#include "SkSRGB.h" #include "SkSRGB.h"
#if defined(SK_DEBUG) && defined(SK_BUILD_FOR_UNIX)
static constexpr bool kUseRasterPipeline = true;
#else
static constexpr bool kUseRasterPipeline = false;
#endif
static constexpr float sk_linear_from_2dot2[256] = { static constexpr float sk_linear_from_2dot2[256] = {
0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f, 0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f,
0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f, 0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f,
@ -350,6 +357,27 @@ std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(SkColorSpace* srcSpace
} }
} }
if (kUseRasterPipeline) {
SrcGamma srcGamma = srcSpaceXYZ->gammaIsLinear() ? kLinear_SrcGamma : kTable_SrcGamma;
DstGamma dstGamma;
switch (dstSpaceXYZ->gammaNamed()) {
case kSRGB_SkGammaNamed:
dstGamma = kSRGB_DstGamma;
break;
case k2Dot2Curve_SkGammaNamed:
dstGamma = k2Dot2_DstGamma;
break;
case kLinear_SkGammaNamed:
dstGamma = kLinear_DstGamma;
break;
default:
dstGamma = kTable_DstGamma;
break;
}
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
srcSpaceXYZ, srcToDst, dstSpaceXYZ, csm, srcGamma, dstGamma));
}
switch (csm) { switch (csm) {
case kNone_ColorSpaceMatch: case kNone_ColorSpaceMatch:
switch (dstSpaceXYZ->gammaNamed()) { switch (dstSpaceXYZ->gammaNamed()) {
@ -1268,8 +1296,172 @@ bool SkColorSpaceXform::apply(ColorFormat dstColorFormat, void* dst, ColorFormat
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
SkColorSpaceXform_Pipeline::SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace,
const SkMatrix44& srcToDst,
SkColorSpace_XYZ* dstSpace,
ColorSpaceMatch csm,
SrcGamma srcGamma,
DstGamma dstGamma)
: fCSM(csm)
, fSrcGamma(srcGamma)
, fDstGamma(dstGamma)
{
fSrcToDst[ 0] = srcToDst.get(0, 0);
fSrcToDst[ 1] = srcToDst.get(1, 0);
fSrcToDst[ 2] = srcToDst.get(2, 0);
fSrcToDst[ 3] = srcToDst.get(0, 1);
fSrcToDst[ 4] = srcToDst.get(1, 1);
fSrcToDst[ 5] = srcToDst.get(2, 1);
fSrcToDst[ 6] = srcToDst.get(0, 2);
fSrcToDst[ 7] = srcToDst.get(1, 2);
fSrcToDst[ 8] = srcToDst.get(2, 2);
fSrcToDst[ 9] = srcToDst.get(0, 3);
fSrcToDst[10] = srcToDst.get(1, 3);
fSrcToDst[11] = srcToDst.get(2, 3);
const int numSrcTables = num_tables(srcSpace);
const size_t srcEntries = numSrcTables * 256;
const bool srcGammasAreMatching = (1 >= numSrcTables);
fSrcStorage.reset(srcEntries);
build_gamma_tables(fSrcGammaTables, fSrcStorage.get(), 256, srcSpace, kToLinear,
srcGammasAreMatching);
const int numDstTables = num_tables(dstSpace);
dstSpace->toDstGammaTables(fDstGammaTables, &fDstStorage, numDstTables);
}
bool SkColorSpaceXform_Pipeline::onApply(ColorFormat dstColorFormat, void* dst,
ColorFormat srcColorFormat, const void* src, int len,
SkAlphaType alphaType) const {
if (kFull_ColorSpaceMatch == fCSM) {
if (kPremul_SkAlphaType != alphaType) {
if ((kRGBA_8888_ColorFormat == dstColorFormat &&
kRGBA_8888_ColorFormat == srcColorFormat) ||
(kBGRA_8888_ColorFormat == dstColorFormat &&
kBGRA_8888_ColorFormat == srcColorFormat))
{
memcpy(dst, src, len * sizeof(uint32_t));
return true;
}
if ((kRGBA_8888_ColorFormat == dstColorFormat &&
kBGRA_8888_ColorFormat == srcColorFormat) ||
(kBGRA_8888_ColorFormat == dstColorFormat &&
kRGBA_8888_ColorFormat == srcColorFormat))
{
SkOpts::RGBA_to_BGRA((uint32_t*) dst, src, len);
return true;
}
}
}
if (kRGBA_F16_ColorFormat == srcColorFormat || kRGBA_F32_ColorFormat == srcColorFormat) {
return false;
}
SkRasterPipeline pipeline;
LoadTablesContext loadTables;
if (kLinear_SrcGamma == fSrcGamma) {
pipeline.append(SkRasterPipeline::load_8888, &src);
if (kBGRA_8888_ColorFormat == srcColorFormat) {
pipeline.append(SkRasterPipeline::swap_rb);
}
} else {
loadTables.fSrc = (const uint32_t*) src;
loadTables.fG = fSrcGammaTables[1];
if (kRGBA_8888_ColorFormat == srcColorFormat) {
loadTables.fR = fSrcGammaTables[0];
loadTables.fB = fSrcGammaTables[2];
pipeline.append(SkRasterPipeline::load_tables, &loadTables);
} else {
loadTables.fR = fSrcGammaTables[2];
loadTables.fB = fSrcGammaTables[0];
pipeline.append(SkRasterPipeline::load_tables, &loadTables);
pipeline.append(SkRasterPipeline::swap_rb);
}
}
if (kNone_ColorSpaceMatch == fCSM) {
pipeline.append(SkRasterPipeline::matrix_3x4, fSrcToDst);
}
if (kRGBA_8888_ColorFormat == dstColorFormat || kBGRA_8888_ColorFormat == dstColorFormat) {
pipeline.append(SkRasterPipeline::clamp_0);
pipeline.append(SkRasterPipeline::clamp_1);
}
if (kPremul_SkAlphaType == alphaType) {
pipeline.append(SkRasterPipeline::premul);
}
StoreTablesContext storeTables;
switch (fDstGamma) {
case kSRGB_DstGamma:
pipeline.append(SkRasterPipeline::to_srgb);
break;
case k2Dot2_DstGamma:
pipeline.append(SkRasterPipeline::to_2dot2);
break;
default:
break;
}
switch (dstColorFormat) {
case kRGBA_8888_ColorFormat:
if (kTable_DstGamma == fDstGamma) {
storeTables.fDst = (uint32_t*) dst;
storeTables.fR = fDstGammaTables[0];
storeTables.fG = fDstGammaTables[1];
storeTables.fB = fDstGammaTables[2];
storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
pipeline.append(SkRasterPipeline::store_tables, &storeTables);
} else {
pipeline.append(SkRasterPipeline::store_8888, &dst);
}
break;
case kBGRA_8888_ColorFormat:
if (kTable_DstGamma == fDstGamma) {
storeTables.fDst = (uint32_t*) dst;
storeTables.fR = fDstGammaTables[2];
storeTables.fG = fDstGammaTables[1];
storeTables.fB = fDstGammaTables[0];
storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
pipeline.append(SkRasterPipeline::swap_rb);
pipeline.append(SkRasterPipeline::store_tables, &storeTables);
} else {
pipeline.append(SkRasterPipeline::swap_rb);
pipeline.append(SkRasterPipeline::store_8888, &dst);
}
break;
case kRGBA_F16_ColorFormat:
if (kLinear_DstGamma != fDstGamma) {
return false;
}
pipeline.append(SkRasterPipeline::store_f16, &dst);
break;
case kRGBA_F32_ColorFormat:
if (kLinear_DstGamma != fDstGamma) {
return false;
}
pipeline.append(SkRasterPipeline::store_f32, &dst);
break;
}
pipeline.run(0, 0, len);
return true;
}
///////////////////////////////////////////////////////////////////////////////////////////////////
std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space) { std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space) {
if (kUseRasterPipeline) {
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
space, SkMatrix::I(), space, kNone_ColorSpaceMatch, kTable_SrcGamma,
kTable_DstGamma));
} else {
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_XYZ return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_XYZ
<kTable_SrcGamma, kTable_DstGamma, kNone_ColorSpaceMatch> <kTable_SrcGamma, kTable_DstGamma, kNone_ColorSpaceMatch>
(space, SkMatrix::I(), space)); (space, SkMatrix::I(), space));
}
} }

View File

@ -71,6 +71,47 @@ private:
friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space); friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
}; };
struct LoadTablesContext {
const uint32_t* fSrc;
const float* fR;
const float* fG;
const float* fB;
};
struct StoreTablesContext {
uint32_t* fDst;
const uint8_t* fR;
const uint8_t* fG;
const uint8_t* fB;
int fCount;
};
class SkColorSpaceXform_Pipeline : public SkColorSpaceXform {
protected:
virtual bool onApply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat, const void* src,
int count, SkAlphaType alphaType) const;
private:
SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace, const SkMatrix44& srcToDst,
SkColorSpace_XYZ* dstSpace, ColorSpaceMatch csm, SrcGamma srcGamma,
DstGamma dstGamma);
// Contain pointers into storage or pointers into precomputed tables.
const float* fSrcGammaTables[3];
SkAutoTMalloc<float> fSrcStorage;
const uint8_t* fDstGammaTables[3];
sk_sp<SkData> fDstStorage;
float fSrcToDst[12];
ColorSpaceMatch fCSM;
SrcGamma fSrcGamma;
DstGamma fDstGamma;
friend class SkColorSpaceXform;
friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
};
// For testing. Bypasses opts for when src and dst color spaces are equal. // For testing. Bypasses opts for when src and dst color spaces are equal.
std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space); std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);

View File

@ -61,10 +61,12 @@
M(unpremul) M(premul) \ M(unpremul) M(premul) \
M(set_rgb) \ M(set_rgb) \
M(from_srgb) M(from_srgb_d) M(to_srgb) \ M(from_srgb) M(from_srgb_d) M(to_srgb) \
M(to_2dot2) \
M(constant_color) M(store_f32) \ M(constant_color) M(store_f32) \
M(load_565) M(load_565_d) M(store_565) \ M(load_565) M(load_565_d) M(store_565) \
M(load_f16) M(load_f16_d) M(store_f16) \ M(load_f16) M(load_f16_d) M(store_f16) \
M(load_8888) M(load_8888_d) M(store_8888) \ M(load_8888) M(load_8888_d) M(store_8888) \
M(load_tables) M(store_tables) \
M(scale_u8) M(scale_1_float) \ M(scale_u8) M(scale_1_float) \
M(lerp_u8) M(lerp_565) M(lerp_1_float) \ M(lerp_u8) M(lerp_565) M(lerp_1_float) \
M(dstatop) M(dstin) M(dstout) M(dstover) \ M(dstatop) M(dstin) M(dstout) M(dstover) \

View File

@ -185,13 +185,21 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
return tail ? _mm256_maskload_epi32((const int*)src, mask(tail)) return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
: SkNu::Load(src); : SkNu::Load(src);
} }
SI SkNf load(size_t tail, const float* src) {
return tail ? _mm256_maskload_ps((const float*)src, mask(tail))
: SkNf::Load(src);
}
SI SkNi gather(size_t tail, const int32_t* src, const SkNi& offset) { SI SkNi gather(size_t tail, const int32_t* src, const SkNi& offset) {
return _mm256_mask_i32gather_epi32(SkNi(0).fVec, auto m = mask(tail);
(const int*)src, offset.fVec, mask(tail), 4); return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
} }
SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) { SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) {
return _mm256_mask_i32gather_epi32(SkNi(0).fVec, auto m = mask(tail);
(const int*)src, offset.fVec, mask(tail), 4); return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
}
SI SkNf gather(size_t tail, const float* src, const SkNi& offset) {
auto m = _mm256_castsi256_ps(mask(tail));
return _mm256_mask_i32gather_ps(SkNf(0).fVec, (const float*)src, offset.fVec, m, 4);
} }
static const char* bug = "I don't think MSAN understands maskstore."; static const char* bug = "I don't think MSAN understands maskstore.";
@ -210,6 +218,13 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
} }
v.store(dst); v.store(dst);
} }
SI void store(size_t tail, const SkNf& v, float* dst) {
if (tail) {
_mm256_maskstore_ps((float*)dst, mask(tail), v.fVec);
return sk_msan_mark_initialized(dst, dst+tail, bug);
}
v.store(dst);
}
#endif #endif
SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) { SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
@ -326,6 +341,22 @@ STAGE(to_srgb) {
b = sk_linear_to_srgb_needs_round(b); b = sk_linear_to_srgb_needs_round(b);
} }
STAGE(to_2dot2) {
auto to_2dot2 = [](const SkNf& x) {
// x^(29/64) is a very good approximation of the true value, x^(1/2.2).
auto x2 = x.rsqrt(), // x^(-1/2)
x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32)
x64 = x32.rsqrt(); // x^(+1/64)
// 29 = 32 - 2 - 1
return x2.invert() * x32 * x64.invert();
};
r = to_2dot2(r);
g = to_2dot2(g);
b = to_2dot2(b);
}
// The default shader produces a constant color (from the SkPaint). // The default shader produces a constant color (from the SkPaint).
STAGE(constant_color) { STAGE(constant_color) {
auto color = (const SkPM4f*)ctx; auto color = (const SkPM4f*)ctx;
@ -516,6 +547,33 @@ STAGE(store_8888) {
| SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr); | SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
} }
STAGE(load_tables) {
auto loadCtx = (const LoadTablesContext*)ctx;
auto ptr = loadCtx->fSrc + x;
SkNu rgba = load(tail, ptr);
auto to_int = [](const SkNu& v) { return SkNi::Load(&v); };
r = gather(tail, loadCtx->fR, to_int((rgba >> 0) & 0xff));
g = gather(tail, loadCtx->fG, to_int((rgba >> 8) & 0xff));
b = gather(tail, loadCtx->fB, to_int((rgba >> 16) & 0xff));
a = (1/255.0f) * SkNx_cast<float>(to_int(rgba >> 24));
}
STAGE(store_tables) {
auto storeCtx = (const StoreTablesContext*)ctx;
auto ptr = storeCtx->fDst + x;
float scale = storeCtx->fCount - 1;
SkNi ri = SkNx_cast<int>(scale * r + 0.5f);
SkNi gi = SkNx_cast<int>(scale * g + 0.5f);
SkNi bi = SkNx_cast<int>(scale * b + 0.5f);
store(tail, ( SkNx_cast<int>(gather(tail, storeCtx->fR, ri)) << 0
| SkNx_cast<int>(gather(tail, storeCtx->fG, gi)) << 8
| SkNx_cast<int>(gather(tail, storeCtx->fB, bi)) << 16
| SkNx_cast<int>(255.0f * a + 0.5f) << 24), (int*)ptr);
}
SI SkNf inv(const SkNf& x) { return 1.0f - x; } SI SkNf inv(const SkNf& x) { return 1.0f - x; }
RGBA_XFERMODE(clear) { return 0.0f; } RGBA_XFERMODE(clear) { return 0.0f; }