Tricky float -> byte conversion in store_8888.
In IEEE, for each byte BB, the float 0x470000BB equals 32768.0f + BB*(1/256.0f). So to turn a [0,1] float into a byte, we can - multiply by (255/256.0f) to get into [0,255/256.0f] range, - add 32768.0f to get into [32768.0f, 32768.0f + 255/256.0f] range, - look at the low byte. Those first two of course are an FMA. Using this trick here makes store_8888 measurably faster. Instead of a FMA then float->int trunc, we do an FMA then a bitwise AND. Overall the math goes from 4 FMA + 4 trunc + 3 shift to 4 FMA + 3 AND + 3 shift (we can skip the shift for red and the AND for alpha). As you might guess, AND is cheaper than trunc, so this is a net win. I should be able to follow up with the same trick in reverse in from_8888(). CQ_INCLUDE_TRYBOTS=skia.primary:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Change-Id: I42c8f4a6ea0b6c22160517cf5f9c048f01c9a330 Reviewed-on: https://skia-review.googlesource.com/5540 Reviewed-by: Matt Sarett <msarett@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
c2881e9b40
commit
3e05671ace
@ -547,11 +547,18 @@ STAGE(load_8888_d) {
|
||||
from_8888(load(tail, ptr), &dr, &dg, &db, &da);
|
||||
}
|
||||
STAGE(store_8888) {
|
||||
auto byte = [](const SkNf& x, int ix) {
|
||||
// Here's a neat trick: 0x47000000 == 32768.0f, and 0x470000ff == 32768.0f + (255/256.0f).
|
||||
auto v = SkNf_fma(255/256.0f, x, 32768.0f);
|
||||
switch (ix) {
|
||||
case 0: return SkNi::Load(&v) & 0xff; // R
|
||||
case 3: return SkNi::Load(&v) << 24; // A
|
||||
}
|
||||
return (SkNi::Load(&v) & 0xff) << (8*ix); // B or G
|
||||
};
|
||||
|
||||
auto ptr = *(uint32_t**)ctx + x;
|
||||
store(tail, ( SkNf_round(255.0f, r) << 0
|
||||
| SkNf_round(255.0f, g) << 8
|
||||
| SkNf_round(255.0f, b) << 16
|
||||
| SkNf_round(255.0f, a) << 24 ), (int*)ptr);
|
||||
store(tail, byte(r,0)|byte(g,1)|byte(b,2)|byte(a,3), (int*)ptr);
|
||||
}
|
||||
|
||||
STAGE(load_tables) {
|
||||
|
Loading…
Reference in New Issue
Block a user