Replace interp() with clut_{3,4}D stages.
I tried to follow exactly the same strategy as a start. (Though I did fix the off-by-one dimensions.) It does rather look like we only need 3D and 4D now that I've looked at the call sites. Looks like about a 20% speedup. Change-Id: I8b1af64750ad1750716ee1ab0767e64591c7206a Reviewed-on: https://skia-review.googlesource.com/32842 Commit-Queue: Mike Klein <mtklein@google.com> Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
f4874bc5c1
commit
c2f876bb8d
@ -18,59 +18,3 @@ SkColorLookUpTable::SkColorLookUpTable(uint8_t inputChannels, const uint8_t limi
|
||||
SkASSERT(fLimits[i] > 1);
|
||||
}
|
||||
}
|
||||
|
||||
// Our general strategy is to recursively interpolate each dimension,
|
||||
// accumulating the index to sample at, and our current pixel stride to help accumulate the index.
|
||||
template <int dim>
|
||||
static Sk4f interp_dimension(const float* table, const uint8_t* limits,
|
||||
const float* src, int index, int stride) {
|
||||
// We'd logically like to sample this dimension at x.
|
||||
int limit = limits[dim];
|
||||
float x = src[dim] * (limit - 1);
|
||||
|
||||
// We can't index an array by a float (darn) so we have to snap to nearby integers lo and hi.
|
||||
int lo = (int)(x ),
|
||||
hi = (int)(x + 0.9999f);
|
||||
|
||||
// Recursively sample at lo and hi.
|
||||
Sk4f L = interp_dimension<dim-1>(table,limits,src, stride*lo + index, stride*limit),
|
||||
H = interp_dimension<dim-1>(table,limits,src, stride*hi + index, stride*limit);
|
||||
|
||||
// Linearly interpolate those colors based on their distance to x.
|
||||
float t = (x - lo);
|
||||
return (1 - t)*L + t*H;
|
||||
}
|
||||
|
||||
// Bottom out our recursion at 0 dimensions, i.e. just return the color at index.
|
||||
template <>
|
||||
Sk4f interp_dimension<-1>(const float* table, const uint8_t* limits,
|
||||
const float* src, int index, int stride) {
|
||||
return {
|
||||
table[3*index+0],
|
||||
table[3*index+1],
|
||||
table[3*index+2],
|
||||
0.0f,
|
||||
};
|
||||
}
|
||||
|
||||
template <int dim>
|
||||
static Sk4f interp_dimension(const float* table, const uint8_t* limits, const float* src) {
|
||||
// Start our accumulated index and stride off at their identity values, 0 and 1.
|
||||
return interp_dimension<dim>(table, limits, src, 0,1);
|
||||
}
|
||||
|
||||
void SkColorLookUpTable::interp(float* dst, const float* src) const {
|
||||
Sk4f rgb;
|
||||
switch (fInputChannels-1) {
|
||||
case 0: rgb = interp_dimension<0>(this->table(), fLimits, src); break;
|
||||
case 1: rgb = interp_dimension<1>(this->table(), fLimits, src); break;
|
||||
case 2: rgb = interp_dimension<2>(this->table(), fLimits, src); break;
|
||||
case 3: rgb = interp_dimension<3>(this->table(), fLimits, src); break;
|
||||
default: SkDEBUGFAIL("oops"); return;
|
||||
}
|
||||
|
||||
rgb = Sk4f::Max(0, Sk4f::Min(rgb, 1));
|
||||
dst[0] = rgb[0];
|
||||
dst[1] = rgb[1];
|
||||
dst[2] = rgb[2];
|
||||
}
|
||||
|
@ -21,11 +21,6 @@ public:
|
||||
|
||||
SkColorLookUpTable(uint8_t inputChannels, const uint8_t limits[]);
|
||||
|
||||
// This always does the appropriate multilinear interpolation.
|
||||
// We used to do tetrahedral for 3D tables, but found that was slower!
|
||||
// src must point to fInputChannels values, one per channel.
|
||||
void interp(float dst[3], const float src[]) const;
|
||||
|
||||
int inputChannels() const { return fInputChannels; }
|
||||
int outputChannels() const { return kOutputChannels; }
|
||||
|
||||
@ -41,11 +36,11 @@ public:
|
||||
void* operator new(size_t, void* p) { return p; }
|
||||
void operator delete(void* p) { sk_free(p); }
|
||||
|
||||
private:
|
||||
const float* table() const {
|
||||
return SkTAddOffset<const float>(this, sizeof(SkColorLookUpTable));
|
||||
}
|
||||
|
||||
private:
|
||||
uint8_t fInputChannels;
|
||||
uint8_t fLimits[kMaxColorChannels];
|
||||
};
|
||||
|
@ -189,27 +189,24 @@ SkColorSpaceXform_A2B::SkColorSpaceXform_A2B(SkColorSpace_A2B* srcSpace,
|
||||
case SkColorSpace_A2B::Element::Type::kCLUT: {
|
||||
SkCSXformPrintf("CLUT (%d -> %d) stage added\n", e.colorLUT().inputChannels(),
|
||||
e.colorLUT().outputChannels());
|
||||
struct CallbackCtx : SkJumper_CallbackCtx {
|
||||
sk_sp<const SkColorLookUpTable> clut;
|
||||
// clut->interp() can't always safely alias its arguments,
|
||||
// so we allocate a second buffer to hold our results.
|
||||
float results[4*SkJumper_kMaxStride];
|
||||
};
|
||||
auto cb = fAlloc.make<CallbackCtx>();
|
||||
cb->clut = sk_ref_sp(&e.colorLUT());
|
||||
cb->read_from = cb->results;
|
||||
cb->fn = [](SkJumper_CallbackCtx* ctx, int active_pixels) {
|
||||
auto c = (CallbackCtx*)ctx;
|
||||
for (int i = 0; i < active_pixels; i++) {
|
||||
// Look up red, green, and blue for this pixel using 3-4 values from rgba.
|
||||
c->clut->interp(c->results+4*i, c->rgba+4*i);
|
||||
|
||||
// If we used 3 inputs (rgb) preserve the fourth as alpha.
|
||||
// If we used 4 inputs (cmyk) force alpha to 1.
|
||||
c->results[4*i+3] = (3 == c->clut->inputChannels()) ? c->rgba[4*i+3] : 1.0f;
|
||||
}
|
||||
struct Ctx : SkJumper_ColorLookupTableCtx {
|
||||
sk_sp<const SkColorLookUpTable> clut;
|
||||
};
|
||||
fElementsPipeline.append(SkRasterPipeline::callback, cb);
|
||||
auto ctx = fAlloc.make<Ctx>();
|
||||
ctx->clut = sk_ref_sp(&e.colorLUT());
|
||||
ctx->table = ctx->clut->table();
|
||||
for (int i = 0; i < ctx->clut->inputChannels(); i++) {
|
||||
ctx->limits[i] = ctx->clut->gridPoints(i);
|
||||
}
|
||||
|
||||
switch (e.colorLUT().inputChannels()) {
|
||||
case 3: fElementsPipeline.append(SkRasterPipeline::clut_3D, ctx); break;
|
||||
case 4: fElementsPipeline.append(SkRasterPipeline::clut_4D, ctx); break;
|
||||
default: SkDEBUGFAIL("need to handle 1 or 2 channel color lookup tables.");
|
||||
}
|
||||
fElementsPipeline.append(SkRasterPipeline::clamp_0);
|
||||
fElementsPipeline.append(SkRasterPipeline::clamp_1);
|
||||
break;
|
||||
}
|
||||
case SkColorSpace_A2B::Element::Type::kMatrix:
|
||||
|
@ -91,7 +91,8 @@ struct SkJumper_Engine;
|
||||
M(xy_to_2pt_conical_linear) \
|
||||
M(mask_2pt_conical_degenerates) M(apply_vector_mask) \
|
||||
M(byte_tables) M(byte_tables_rgb) \
|
||||
M(rgb_to_hsl) M(hsl_to_rgb)
|
||||
M(rgb_to_hsl) M(hsl_to_rgb) \
|
||||
M(clut_3D) M(clut_4D)
|
||||
|
||||
class SkRasterPipeline {
|
||||
public:
|
||||
|
@ -121,4 +121,9 @@ struct SkJumper_UniformColorCtx {
|
||||
uint32_t rgba;
|
||||
};
|
||||
|
||||
struct SkJumper_ColorLookupTableCtx {
|
||||
const float* table;
|
||||
int limits[4];
|
||||
};
|
||||
|
||||
#endif//SkJumper_DEFINED
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1495,3 +1495,55 @@ STAGE(callback) {
|
||||
c->fn(c, tail ? tail : kStride);
|
||||
load4(c->read_from,0, &r,&g,&b,&a);
|
||||
}
|
||||
|
||||
// Our general strategy is to recursively interpolate each dimension,
|
||||
// accumulating the index to sample at, and our current pixel stride to help accumulate the index.
|
||||
template <int dim>
|
||||
SI void color_lookup_table(const SkJumper_ColorLookupTableCtx* ctx,
|
||||
F& r, F& g, F& b, F a, U32 index, U32 stride) {
|
||||
// We'd logically like to sample this dimension at x.
|
||||
int limit = ctx->limits[dim-1];
|
||||
F src;
|
||||
switch(dim) {
|
||||
case 1: src = r; break;
|
||||
case 2: src = g; break;
|
||||
case 3: src = b; break;
|
||||
case 4: src = a; break;
|
||||
}
|
||||
F x = src * (limit - 1);
|
||||
|
||||
// We can't index an array by a float (darn) so we have to snap to nearby integers lo and hi.
|
||||
U32 lo = trunc_(x ),
|
||||
hi = trunc_(x + 0.9999f);
|
||||
|
||||
// Recursively sample at lo and hi.
|
||||
F lr = r, lg = g, lb = b,
|
||||
hr = r, hg = g, hb = b;
|
||||
color_lookup_table<dim-1>(ctx, lr,lg,lb,a, stride*lo + index, stride*limit);
|
||||
color_lookup_table<dim-1>(ctx, hr,hg,hb,a, stride*hi + index, stride*limit);
|
||||
|
||||
// Linearly interpolate those colors based on their distance to x.
|
||||
F t = x - cast(lo);
|
||||
r = lerp(lr, hr, t);
|
||||
g = lerp(lg, hg, t);
|
||||
b = lerp(lb, hb, t);
|
||||
}
|
||||
|
||||
// Bottom out our recursion at 0 dimensions, i.e. just return the colors at index.
|
||||
template<>
|
||||
inline void color_lookup_table<0>(const SkJumper_ColorLookupTableCtx* ctx,
|
||||
F& r, F& g, F& b, F a, U32 index, U32 stride) {
|
||||
r = gather(ctx->table, 3*index+0);
|
||||
g = gather(ctx->table, 3*index+1);
|
||||
b = gather(ctx->table, 3*index+2);
|
||||
}
|
||||
|
||||
STAGE(clut_3D) {
|
||||
color_lookup_table<3>(ctx, r,g,b,a, 0,1);
|
||||
// This 3D color lookup table leaves alpha alone.
|
||||
}
|
||||
STAGE(clut_4D) {
|
||||
color_lookup_table<4>(ctx, r,g,b,a, 0,1);
|
||||
// "a" was really CMYK's K, so we just set alpha opaque.
|
||||
a = 1.0f;
|
||||
}
|
||||
|
@ -10,14 +10,16 @@
|
||||
#include "SkBitmap.h"
|
||||
#include "SkCanvas.h"
|
||||
#include "SkCodec.h"
|
||||
#include "SkColorSpacePriv.h"
|
||||
#include "SkColorSpace_A2B.h"
|
||||
#include "SkColorSpace_XYZ.h"
|
||||
#include "SkColorSpacePriv.h"
|
||||
#include "SkCommandLineFlags.h"
|
||||
#include "SkICCPriv.h"
|
||||
#include "SkImageEncoder.h"
|
||||
#include "SkMatrix44.h"
|
||||
#include "SkOSFile.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "../src/jumper/SkJumper.h"
|
||||
|
||||
#include "sk_tool_utils.h"
|
||||
|
||||
@ -264,6 +266,27 @@ static int cut_size(const SkColorLookUpTable& clut, int dimOrder[4]) {
|
||||
return cutWidth < cutHeight ? cutWidth : cutHeight;
|
||||
}
|
||||
|
||||
static void clut_interp(const SkColorLookUpTable& clut, float out[3], const float in[4]) {
|
||||
// This is kind of a toy implementation.
|
||||
// You generally wouldn't want to do this 1 pixel at a time.
|
||||
|
||||
SkJumper_ColorLookupTableCtx ctx;
|
||||
ctx.table = clut.table();
|
||||
for (int i = 0; i < clut.inputChannels(); i++) {
|
||||
ctx.limits[i] = clut.gridPoints(i);
|
||||
}
|
||||
|
||||
SkSTArenaAlloc<256> alloc;
|
||||
SkRasterPipeline p(&alloc);
|
||||
p.append_constant_color(&alloc, in);
|
||||
p.append(clut.inputChannels() == 3 ? SkRasterPipeline::clut_3D
|
||||
: SkRasterPipeline::clut_4D, &ctx);
|
||||
p.append(SkRasterPipeline::clamp_0);
|
||||
p.append(SkRasterPipeline::clamp_1);
|
||||
p.append(SkRasterPipeline::store_f32, &out);
|
||||
p.run(0,0, 1,1);
|
||||
}
|
||||
|
||||
static void draw_clut(SkCanvas* canvas, const SkColorLookUpTable& clut, int dimOrder[4]) {
|
||||
dump_clut(clut);
|
||||
|
||||
@ -291,7 +314,7 @@ static void draw_clut(SkCanvas* canvas, const SkColorLookUpTable& clut, int dimO
|
||||
const float w = row / (rows - 1.0f);
|
||||
const float input[4] = {x, y, z, w};
|
||||
float output[3];
|
||||
clut.interp(output, input);
|
||||
clut_interp(clut, output, input);
|
||||
paint.setColor(SkColorSetRGB(255*output[0], 255*output[1], 255*output[2]));
|
||||
canvas->drawRect(SkRect::MakeLTRB(ox + cutSize * x, oy + cutSize * y,
|
||||
ox + cutSize * (x + xStep),
|
||||
|
Loading…
Reference in New Issue
Block a user