linear -> sRGB: use fast approximate sqrt()
Since we're already approximating the sRGB gamma curve with a sqrt(), we might as well approximate with it a faster approximate sqrt(). On Intel, this .rsqrt().invert() version is 2-3x faster than .sqrt() (~3x faster on older machines, ~2x faster on newer machines). This should provide ~11 bits of precision, suspiciously exactly enough. Running dm --config srgb, there are diffs, but none perceptible. BUG=skia: GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2046063002 Review-Url: https://codereview.chromium.org/2046063002
This commit is contained in:
parent
12dfaaa53c
commit
3db2028126
@ -41,7 +41,7 @@ static inline Sk4f srgb_to_linear(const Sk4f& s4) {
|
||||
}
|
||||
|
||||
static inline Sk4f linear_to_srgb(const Sk4f& l4) {
|
||||
return set_alpha(l4.sqrt(), get_alpha(l4));
|
||||
return set_alpha(l4.rsqrt().invert(), get_alpha(l4));
|
||||
}
|
||||
|
||||
static inline float srgb_to_linear(float x) {
|
||||
|
@ -68,10 +68,10 @@ static Sk4x4f load_4_srgb(const void* ptr) {
|
||||
// Store an Sk4x4f back to 4 interlaced 8888 sRGB pixels.
|
||||
static void store_4_srgb(void* ptr, const Sk4x4f& p) {
|
||||
// Convert back to sRGB and [0,255], again approximating sRGB as gamma == 2.
|
||||
auto r = p.r.sqrt() * 255.0f + 0.5f,
|
||||
g = p.g.sqrt() * 255.0f + 0.5f,
|
||||
b = p.b.sqrt() * 255.0f + 0.5f,
|
||||
a = p.a * 255.0f + 0.5f;
|
||||
auto r = p.r.rsqrt().invert() * 255.0f + 0.5f,
|
||||
g = p.g.rsqrt().invert() * 255.0f + 0.5f,
|
||||
b = p.b.rsqrt().invert() * 255.0f + 0.5f,
|
||||
a = p.a * 255.0f + 0.5f;
|
||||
Sk4x4f{r,g,b,a}.transpose((uint8_t*)ptr);
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,12 @@ void ramp<DstType::S32, ApplyPremul::False>(const Sk4f& c, const Sk4f& dc, SkPMC
|
||||
Sk4x4f c4x = Sk4x4f::Transpose(c, c + dc, c + dc * 2, c + dc * 3);
|
||||
|
||||
while (n >= 4) {
|
||||
const Sk4x4f cx4s32 = { c4x.r.sqrt(), c4x.g.sqrt(), c4x.b.sqrt(), c4x.a };
|
||||
const Sk4x4f cx4s32 = {
|
||||
c4x.r.rsqrt().invert(),
|
||||
c4x.g.rsqrt().invert(),
|
||||
c4x.b.rsqrt().invert(),
|
||||
c4x.a
|
||||
};
|
||||
cx4s32.transpose((uint8_t*)dst);
|
||||
|
||||
c4x.r += dc4x.r;
|
||||
|
Loading…
Reference in New Issue
Block a user