Add RasterPipeline implementation for SkColorSpaceXform
This is initially turned on for Linux debug builds, which allows us to start testing. Chrome for Android is a really good candidate for this (will appreciate the code size savings), but I'd first like to run some tests to understand the performance/size tradeoffs a little better. BUG:660416 CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I0fb2512216dfc0bda2e5388f9865318eec22291e Reviewed-on: https://skia-review.googlesource.com/5348 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
6e1fca43b4
commit
dd19ac7d10
@ -15,8 +15,15 @@
|
||||
#include "SkColorSpaceXformPriv.h"
|
||||
#include "SkHalf.h"
|
||||
#include "SkOpts.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "SkSRGB.h"
|
||||
|
||||
#if defined(SK_DEBUG) && defined(SK_BUILD_FOR_UNIX)
|
||||
static constexpr bool kUseRasterPipeline = true;
|
||||
#else
|
||||
static constexpr bool kUseRasterPipeline = false;
|
||||
#endif
|
||||
|
||||
static constexpr float sk_linear_from_2dot2[256] = {
|
||||
0.000000000000000000f, 0.000005077051900662f, 0.000023328004666099f, 0.000056921765712193f,
|
||||
0.000107187362341244f, 0.000175123977503027f, 0.000261543754548491f, 0.000367136269815943f,
|
||||
@ -350,6 +357,27 @@ std::unique_ptr<SkColorSpaceXform> SkColorSpaceXform::New(SkColorSpace* srcSpace
|
||||
}
|
||||
}
|
||||
|
||||
if (kUseRasterPipeline) {
|
||||
SrcGamma srcGamma = srcSpaceXYZ->gammaIsLinear() ? kLinear_SrcGamma : kTable_SrcGamma;
|
||||
DstGamma dstGamma;
|
||||
switch (dstSpaceXYZ->gammaNamed()) {
|
||||
case kSRGB_SkGammaNamed:
|
||||
dstGamma = kSRGB_DstGamma;
|
||||
break;
|
||||
case k2Dot2Curve_SkGammaNamed:
|
||||
dstGamma = k2Dot2_DstGamma;
|
||||
break;
|
||||
case kLinear_SkGammaNamed:
|
||||
dstGamma = kLinear_DstGamma;
|
||||
break;
|
||||
default:
|
||||
dstGamma = kTable_DstGamma;
|
||||
break;
|
||||
}
|
||||
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
|
||||
srcSpaceXYZ, srcToDst, dstSpaceXYZ, csm, srcGamma, dstGamma));
|
||||
}
|
||||
|
||||
switch (csm) {
|
||||
case kNone_ColorSpaceMatch:
|
||||
switch (dstSpaceXYZ->gammaNamed()) {
|
||||
@ -1268,8 +1296,172 @@ bool SkColorSpaceXform::apply(ColorFormat dstColorFormat, void* dst, ColorFormat
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SkColorSpaceXform_Pipeline::SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace,
|
||||
const SkMatrix44& srcToDst,
|
||||
SkColorSpace_XYZ* dstSpace,
|
||||
ColorSpaceMatch csm,
|
||||
SrcGamma srcGamma,
|
||||
DstGamma dstGamma)
|
||||
: fCSM(csm)
|
||||
, fSrcGamma(srcGamma)
|
||||
, fDstGamma(dstGamma)
|
||||
{
|
||||
fSrcToDst[ 0] = srcToDst.get(0, 0);
|
||||
fSrcToDst[ 1] = srcToDst.get(1, 0);
|
||||
fSrcToDst[ 2] = srcToDst.get(2, 0);
|
||||
fSrcToDst[ 3] = srcToDst.get(0, 1);
|
||||
fSrcToDst[ 4] = srcToDst.get(1, 1);
|
||||
fSrcToDst[ 5] = srcToDst.get(2, 1);
|
||||
fSrcToDst[ 6] = srcToDst.get(0, 2);
|
||||
fSrcToDst[ 7] = srcToDst.get(1, 2);
|
||||
fSrcToDst[ 8] = srcToDst.get(2, 2);
|
||||
fSrcToDst[ 9] = srcToDst.get(0, 3);
|
||||
fSrcToDst[10] = srcToDst.get(1, 3);
|
||||
fSrcToDst[11] = srcToDst.get(2, 3);
|
||||
|
||||
const int numSrcTables = num_tables(srcSpace);
|
||||
const size_t srcEntries = numSrcTables * 256;
|
||||
const bool srcGammasAreMatching = (1 >= numSrcTables);
|
||||
fSrcStorage.reset(srcEntries);
|
||||
build_gamma_tables(fSrcGammaTables, fSrcStorage.get(), 256, srcSpace, kToLinear,
|
||||
srcGammasAreMatching);
|
||||
|
||||
const int numDstTables = num_tables(dstSpace);
|
||||
dstSpace->toDstGammaTables(fDstGammaTables, &fDstStorage, numDstTables);
|
||||
}
|
||||
|
||||
bool SkColorSpaceXform_Pipeline::onApply(ColorFormat dstColorFormat, void* dst,
|
||||
ColorFormat srcColorFormat, const void* src, int len,
|
||||
SkAlphaType alphaType) const {
|
||||
if (kFull_ColorSpaceMatch == fCSM) {
|
||||
if (kPremul_SkAlphaType != alphaType) {
|
||||
if ((kRGBA_8888_ColorFormat == dstColorFormat &&
|
||||
kRGBA_8888_ColorFormat == srcColorFormat) ||
|
||||
(kBGRA_8888_ColorFormat == dstColorFormat &&
|
||||
kBGRA_8888_ColorFormat == srcColorFormat))
|
||||
{
|
||||
memcpy(dst, src, len * sizeof(uint32_t));
|
||||
return true;
|
||||
}
|
||||
|
||||
if ((kRGBA_8888_ColorFormat == dstColorFormat &&
|
||||
kBGRA_8888_ColorFormat == srcColorFormat) ||
|
||||
(kBGRA_8888_ColorFormat == dstColorFormat &&
|
||||
kRGBA_8888_ColorFormat == srcColorFormat))
|
||||
{
|
||||
SkOpts::RGBA_to_BGRA((uint32_t*) dst, src, len);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (kRGBA_F16_ColorFormat == srcColorFormat || kRGBA_F32_ColorFormat == srcColorFormat) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SkRasterPipeline pipeline;
|
||||
|
||||
LoadTablesContext loadTables;
|
||||
if (kLinear_SrcGamma == fSrcGamma) {
|
||||
pipeline.append(SkRasterPipeline::load_8888, &src);
|
||||
if (kBGRA_8888_ColorFormat == srcColorFormat) {
|
||||
pipeline.append(SkRasterPipeline::swap_rb);
|
||||
}
|
||||
} else {
|
||||
loadTables.fSrc = (const uint32_t*) src;
|
||||
loadTables.fG = fSrcGammaTables[1];
|
||||
if (kRGBA_8888_ColorFormat == srcColorFormat) {
|
||||
loadTables.fR = fSrcGammaTables[0];
|
||||
loadTables.fB = fSrcGammaTables[2];
|
||||
pipeline.append(SkRasterPipeline::load_tables, &loadTables);
|
||||
} else {
|
||||
loadTables.fR = fSrcGammaTables[2];
|
||||
loadTables.fB = fSrcGammaTables[0];
|
||||
pipeline.append(SkRasterPipeline::load_tables, &loadTables);
|
||||
pipeline.append(SkRasterPipeline::swap_rb);
|
||||
}
|
||||
}
|
||||
|
||||
if (kNone_ColorSpaceMatch == fCSM) {
|
||||
pipeline.append(SkRasterPipeline::matrix_3x4, fSrcToDst);
|
||||
}
|
||||
|
||||
if (kRGBA_8888_ColorFormat == dstColorFormat || kBGRA_8888_ColorFormat == dstColorFormat) {
|
||||
pipeline.append(SkRasterPipeline::clamp_0);
|
||||
pipeline.append(SkRasterPipeline::clamp_1);
|
||||
}
|
||||
|
||||
if (kPremul_SkAlphaType == alphaType) {
|
||||
pipeline.append(SkRasterPipeline::premul);
|
||||
}
|
||||
|
||||
StoreTablesContext storeTables;
|
||||
switch (fDstGamma) {
|
||||
case kSRGB_DstGamma:
|
||||
pipeline.append(SkRasterPipeline::to_srgb);
|
||||
break;
|
||||
case k2Dot2_DstGamma:
|
||||
pipeline.append(SkRasterPipeline::to_2dot2);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
switch (dstColorFormat) {
|
||||
case kRGBA_8888_ColorFormat:
|
||||
if (kTable_DstGamma == fDstGamma) {
|
||||
storeTables.fDst = (uint32_t*) dst;
|
||||
storeTables.fR = fDstGammaTables[0];
|
||||
storeTables.fG = fDstGammaTables[1];
|
||||
storeTables.fB = fDstGammaTables[2];
|
||||
storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
|
||||
pipeline.append(SkRasterPipeline::store_tables, &storeTables);
|
||||
} else {
|
||||
pipeline.append(SkRasterPipeline::store_8888, &dst);
|
||||
}
|
||||
break;
|
||||
case kBGRA_8888_ColorFormat:
|
||||
if (kTable_DstGamma == fDstGamma) {
|
||||
storeTables.fDst = (uint32_t*) dst;
|
||||
storeTables.fR = fDstGammaTables[2];
|
||||
storeTables.fG = fDstGammaTables[1];
|
||||
storeTables.fB = fDstGammaTables[0];
|
||||
storeTables.fCount = SkColorSpaceXform_Base::kDstGammaTableSize;
|
||||
pipeline.append(SkRasterPipeline::swap_rb);
|
||||
pipeline.append(SkRasterPipeline::store_tables, &storeTables);
|
||||
} else {
|
||||
pipeline.append(SkRasterPipeline::swap_rb);
|
||||
pipeline.append(SkRasterPipeline::store_8888, &dst);
|
||||
}
|
||||
break;
|
||||
case kRGBA_F16_ColorFormat:
|
||||
if (kLinear_DstGamma != fDstGamma) {
|
||||
return false;
|
||||
}
|
||||
pipeline.append(SkRasterPipeline::store_f16, &dst);
|
||||
break;
|
||||
case kRGBA_F32_ColorFormat:
|
||||
if (kLinear_DstGamma != fDstGamma) {
|
||||
return false;
|
||||
}
|
||||
pipeline.append(SkRasterPipeline::store_f32, &dst);
|
||||
break;
|
||||
}
|
||||
|
||||
pipeline.run(0, 0, len);
|
||||
return true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space) {
|
||||
if (kUseRasterPipeline) {
|
||||
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_Pipeline(
|
||||
space, SkMatrix::I(), space, kNone_ColorSpaceMatch, kTable_SrcGamma,
|
||||
kTable_DstGamma));
|
||||
} else {
|
||||
return std::unique_ptr<SkColorSpaceXform>(new SkColorSpaceXform_XYZ
|
||||
<kTable_SrcGamma, kTable_DstGamma, kNone_ColorSpaceMatch>
|
||||
(space, SkMatrix::I(), space));
|
||||
}
|
||||
}
|
||||
|
@ -71,6 +71,47 @@ private:
|
||||
friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
|
||||
};
|
||||
|
||||
struct LoadTablesContext {
|
||||
const uint32_t* fSrc;
|
||||
const float* fR;
|
||||
const float* fG;
|
||||
const float* fB;
|
||||
};
|
||||
|
||||
struct StoreTablesContext {
|
||||
uint32_t* fDst;
|
||||
const uint8_t* fR;
|
||||
const uint8_t* fG;
|
||||
const uint8_t* fB;
|
||||
int fCount;
|
||||
};
|
||||
|
||||
class SkColorSpaceXform_Pipeline : public SkColorSpaceXform {
|
||||
protected:
|
||||
virtual bool onApply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat, const void* src,
|
||||
int count, SkAlphaType alphaType) const;
|
||||
|
||||
private:
|
||||
SkColorSpaceXform_Pipeline(SkColorSpace_XYZ* srcSpace, const SkMatrix44& srcToDst,
|
||||
SkColorSpace_XYZ* dstSpace, ColorSpaceMatch csm, SrcGamma srcGamma,
|
||||
DstGamma dstGamma);
|
||||
|
||||
// Contain pointers into storage or pointers into precomputed tables.
|
||||
const float* fSrcGammaTables[3];
|
||||
SkAutoTMalloc<float> fSrcStorage;
|
||||
const uint8_t* fDstGammaTables[3];
|
||||
sk_sp<SkData> fDstStorage;
|
||||
|
||||
float fSrcToDst[12];
|
||||
|
||||
ColorSpaceMatch fCSM;
|
||||
SrcGamma fSrcGamma;
|
||||
DstGamma fDstGamma;
|
||||
|
||||
friend class SkColorSpaceXform;
|
||||
friend std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
|
||||
};
|
||||
|
||||
// For testing. Bypasses opts for when src and dst color spaces are equal.
|
||||
std::unique_ptr<SkColorSpaceXform> SlowIdentityXform(SkColorSpace_XYZ* space);
|
||||
|
||||
|
@ -61,10 +61,12 @@
|
||||
M(unpremul) M(premul) \
|
||||
M(set_rgb) \
|
||||
M(from_srgb) M(from_srgb_d) M(to_srgb) \
|
||||
M(to_2dot2) \
|
||||
M(constant_color) M(store_f32) \
|
||||
M(load_565) M(load_565_d) M(store_565) \
|
||||
M(load_f16) M(load_f16_d) M(store_f16) \
|
||||
M(load_8888) M(load_8888_d) M(store_8888) \
|
||||
M(load_tables) M(store_tables) \
|
||||
M(scale_u8) M(scale_1_float) \
|
||||
M(lerp_u8) M(lerp_565) M(lerp_1_float) \
|
||||
M(dstatop) M(dstin) M(dstout) M(dstover) \
|
||||
|
@ -185,13 +185,21 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
|
||||
return tail ? _mm256_maskload_epi32((const int*)src, mask(tail))
|
||||
: SkNu::Load(src);
|
||||
}
|
||||
SI SkNf load(size_t tail, const float* src) {
|
||||
return tail ? _mm256_maskload_ps((const float*)src, mask(tail))
|
||||
: SkNf::Load(src);
|
||||
}
|
||||
SI SkNi gather(size_t tail, const int32_t* src, const SkNi& offset) {
|
||||
return _mm256_mask_i32gather_epi32(SkNi(0).fVec,
|
||||
(const int*)src, offset.fVec, mask(tail), 4);
|
||||
auto m = mask(tail);
|
||||
return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
|
||||
}
|
||||
SI SkNu gather(size_t tail, const uint32_t* src, const SkNi& offset) {
|
||||
return _mm256_mask_i32gather_epi32(SkNi(0).fVec,
|
||||
(const int*)src, offset.fVec, mask(tail), 4);
|
||||
auto m = mask(tail);
|
||||
return _mm256_mask_i32gather_epi32(SkNi(0).fVec, (const int*)src, offset.fVec, m, 4);
|
||||
}
|
||||
SI SkNf gather(size_t tail, const float* src, const SkNi& offset) {
|
||||
auto m = _mm256_castsi256_ps(mask(tail));
|
||||
return _mm256_mask_i32gather_ps(SkNf(0).fVec, (const float*)src, offset.fVec, m, 4);
|
||||
}
|
||||
|
||||
static const char* bug = "I don't think MSAN understands maskstore.";
|
||||
@ -210,6 +218,13 @@ SI void store(size_t tail, const SkNx<N,T>& v, T* dst) {
|
||||
}
|
||||
v.store(dst);
|
||||
}
|
||||
SI void store(size_t tail, const SkNf& v, float* dst) {
|
||||
if (tail) {
|
||||
_mm256_maskstore_ps((float*)dst, mask(tail), v.fVec);
|
||||
return sk_msan_mark_initialized(dst, dst+tail, bug);
|
||||
}
|
||||
v.store(dst);
|
||||
}
|
||||
#endif
|
||||
|
||||
SI void from_8888(const SkNu& _8888, SkNf* r, SkNf* g, SkNf* b, SkNf* a) {
|
||||
@ -326,6 +341,22 @@ STAGE(to_srgb) {
|
||||
b = sk_linear_to_srgb_needs_round(b);
|
||||
}
|
||||
|
||||
STAGE(to_2dot2) {
|
||||
auto to_2dot2 = [](const SkNf& x) {
|
||||
// x^(29/64) is a very good approximation of the true value, x^(1/2.2).
|
||||
auto x2 = x.rsqrt(), // x^(-1/2)
|
||||
x32 = x2.rsqrt().rsqrt().rsqrt().rsqrt(), // x^(-1/32)
|
||||
x64 = x32.rsqrt(); // x^(+1/64)
|
||||
|
||||
// 29 = 32 - 2 - 1
|
||||
return x2.invert() * x32 * x64.invert();
|
||||
};
|
||||
|
||||
r = to_2dot2(r);
|
||||
g = to_2dot2(g);
|
||||
b = to_2dot2(b);
|
||||
}
|
||||
|
||||
// The default shader produces a constant color (from the SkPaint).
|
||||
STAGE(constant_color) {
|
||||
auto color = (const SkPM4f*)ctx;
|
||||
@ -516,6 +547,33 @@ STAGE(store_8888) {
|
||||
| SkNx_cast<int>(255.0f * a + 0.5f) << 24 ), (int*)ptr);
|
||||
}
|
||||
|
||||
STAGE(load_tables) {
|
||||
auto loadCtx = (const LoadTablesContext*)ctx;
|
||||
auto ptr = loadCtx->fSrc + x;
|
||||
|
||||
SkNu rgba = load(tail, ptr);
|
||||
auto to_int = [](const SkNu& v) { return SkNi::Load(&v); };
|
||||
r = gather(tail, loadCtx->fR, to_int((rgba >> 0) & 0xff));
|
||||
g = gather(tail, loadCtx->fG, to_int((rgba >> 8) & 0xff));
|
||||
b = gather(tail, loadCtx->fB, to_int((rgba >> 16) & 0xff));
|
||||
a = (1/255.0f) * SkNx_cast<float>(to_int(rgba >> 24));
|
||||
}
|
||||
|
||||
STAGE(store_tables) {
|
||||
auto storeCtx = (const StoreTablesContext*)ctx;
|
||||
auto ptr = storeCtx->fDst + x;
|
||||
|
||||
float scale = storeCtx->fCount - 1;
|
||||
SkNi ri = SkNx_cast<int>(scale * r + 0.5f);
|
||||
SkNi gi = SkNx_cast<int>(scale * g + 0.5f);
|
||||
SkNi bi = SkNx_cast<int>(scale * b + 0.5f);
|
||||
|
||||
store(tail, ( SkNx_cast<int>(gather(tail, storeCtx->fR, ri)) << 0
|
||||
| SkNx_cast<int>(gather(tail, storeCtx->fG, gi)) << 8
|
||||
| SkNx_cast<int>(gather(tail, storeCtx->fB, bi)) << 16
|
||||
| SkNx_cast<int>(255.0f * a + 0.5f) << 24), (int*)ptr);
|
||||
}
|
||||
|
||||
SI SkNf inv(const SkNf& x) { return 1.0f - x; }
|
||||
|
||||
RGBA_XFERMODE(clear) { return 0.0f; }
|
||||
|
Loading…
Reference in New Issue
Block a user