Refactor FourByteInterps. Add 64-bit Fast version. Add tests.
Effect on benches (only _fast_ should be affected, and only on 64-bit): Desktop (64-bit) four_byte_interp_slow_255 NONRENDERING c 7.80 7.84 -0.04 -0.5% four_byte_interp_slow_256 NONRENDERING c 7.38 7.36 +0.02 +0.3% four_byte_interp_fast_256 NONRENDERING c 4.86 4.38 +0.48 +9.9% four_byte_interp_fast_255 NONRENDERING c 5.80 5.16 +0.64 +11.0% N5 (32-bit) four_byte_interp_slow_256 NONRENDERING c 22.22 22.66 -0.44 -2.0% four_byte_interp_fast_255 NONRENDERING c 22.22 22.22 +0.00 +0.0% four_byte_interp_fast_256 NONRENDERING c 18.81 18.81 +0.00 +0.0% four_byte_interp_slow_255 NONRENDERING c 22.42 22.42 +0.00 +0.0% BUG= R=reed@google.com Author: mtklein@google.com Review URL: https://codereview.chromium.org/100923003 git-svn-id: http://skia.googlecode.com/svn/trunk@12468 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
3361471a35
commit
55ca8244cc
@ -50,6 +50,7 @@
|
||||
'../tests/ClipStackTest.cpp',
|
||||
'../tests/ClipperTest.cpp',
|
||||
'../tests/ColorFilterTest.cpp',
|
||||
'../tests/ColorPrivTest.cpp',
|
||||
'../tests/ColorTest.cpp',
|
||||
'../tests/DataRefTest.cpp',
|
||||
'../tests/DeferredCanvasTest.cpp',
|
||||
|
@ -269,34 +269,82 @@ static inline SkPMColor SkFourByteInterp(SkPMColor src, SkPMColor dst,
|
||||
}
|
||||
|
||||
/**
|
||||
* 32b optimized version; currently appears to be 10% faster even on 64b
|
||||
* architectures than an equivalent 64b version and 30% faster than
|
||||
* SkFourByteInterp(). Third parameter controls blending of the first two:
|
||||
* (src, dst, 0) returns dst
|
||||
* (src, dst, 256) returns src
|
||||
* ** Does not match the results of SkFourByteInterp256() because we use
|
||||
* a more accurate scale computation!
|
||||
* TODO: migrate Skia function to using an accurate 255->266 alpha
|
||||
* conversion.
|
||||
* 0xAARRGGBB -> 0x00AA00GG, 0x00RR00BB
|
||||
*/
|
||||
static inline SkPMColor SkFastFourByteInterp256(SkPMColor src,
|
||||
SkPMColor dst,
|
||||
unsigned scale) {
|
||||
SkASSERT(scale <= 256);
|
||||
|
||||
// Reorders ARGB to AG-RB in order to reduce the number of operations.
|
||||
const uint32_t mask = 0xFF00FF;
|
||||
uint32_t src_rb = src & mask;
|
||||
uint32_t src_ag = (src >> 8) & mask;
|
||||
uint32_t dst_rb = dst & mask;
|
||||
uint32_t dst_ag = (dst >> 8) & mask;
|
||||
|
||||
uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
|
||||
uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
|
||||
|
||||
return (ret_ag & ~mask) | ((ret_rb & ~mask) >> 8);
|
||||
static inline void SkSplay(SkPMColor color, uint32_t* ag, uint32_t* rb) {
|
||||
const uint32_t mask = 0x00FF00FF;
|
||||
*ag = (color >> 8) & mask;
|
||||
*rb = color & mask;
|
||||
}
|
||||
|
||||
/**
|
||||
* 0xAARRGGBB -> 0x00AA00GG00RR00BB
|
||||
* (note, ARGB -> AGRB)
|
||||
*/
|
||||
static inline uint64_t SkSplay(SkPMColor color) {
|
||||
const uint32_t mask = 0x00FF00FF;
|
||||
uint64_t agrb = (color >> 8) & mask; // 0x0000000000AA00GG
|
||||
agrb <<= 32; // 0x00AA00GG00000000
|
||||
agrb |= color & mask; // 0x00AA00GG00RR00BB
|
||||
return agrb;
|
||||
}
|
||||
|
||||
/**
|
||||
* 0xAAxxGGxx, 0xRRxxBBxx-> 0xAARRGGBB
|
||||
*/
|
||||
static inline SkPMColor SkUnsplay(uint32_t ag, uint32_t rb) {
|
||||
const uint32_t mask = 0xFF00FF00;
|
||||
return (ag & mask) | ((rb & mask) >> 8);
|
||||
}
|
||||
|
||||
/**
|
||||
* 0xAAxxGGxxRRxxBBxx -> 0xAARRGGBB
|
||||
* (note, AGRB -> ARGB)
|
||||
*/
|
||||
static inline SkPMColor SkUnsplay(uint64_t agrb) {
|
||||
const uint32_t mask = 0xFF00FF00;
|
||||
return ((agrb & mask) >> 8) | // 0x00RR00BB
|
||||
((agrb >> 32) & mask); // 0xAARRGGBB
|
||||
}
|
||||
|
||||
static inline SkPMColor SkFastFourByteInterp256_32(SkPMColor src, SkPMColor dst, unsigned scale) {
|
||||
SkASSERT(scale <= 256);
|
||||
|
||||
// Two 8-bit blends per two 32-bit registers, with space to make sure the math doesn't collide.
|
||||
uint32_t src_ag, src_rb, dst_ag, dst_rb;
|
||||
SkSplay(src, &src_ag, &src_rb);
|
||||
SkSplay(dst, &dst_ag, &dst_rb);
|
||||
|
||||
const uint32_t ret_ag = src_ag * scale + (256 - scale) * dst_ag;
|
||||
const uint32_t ret_rb = src_rb * scale + (256 - scale) * dst_rb;
|
||||
|
||||
return SkUnsplay(ret_ag, ret_rb);
|
||||
}
|
||||
|
||||
static inline SkPMColor SkFastFourByteInterp256_64(SkPMColor src, SkPMColor dst, unsigned scale) {
|
||||
SkASSERT(scale <= 256);
|
||||
// Four 8-bit blends in one 64-bit register, with space to make sure the math doesn't collide.
|
||||
return SkUnsplay(SkSplay(src) * scale + (256-scale) * SkSplay(dst));
|
||||
}
|
||||
|
||||
// TODO(mtklein): Replace slow versions with fast versions, using scale + (scale>>7) everywhere.
|
||||
|
||||
/**
|
||||
* Same as SkFourByteInterp256, but faster.
|
||||
*/
|
||||
static inline SkPMColor SkFastFourByteInterp256(SkPMColor src, SkPMColor dst, unsigned scale) {
|
||||
// On a 64-bit machine, _64 is about 10% faster than _32, but ~40% slower on a 32-bit machine.
|
||||
if (sizeof(void*) == 4) {
|
||||
return SkFastFourByteInterp256_32(src, dst, scale);
|
||||
} else {
|
||||
return SkFastFourByteInterp256_64(src, dst, scale);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Nearly the same as SkFourByteInterp, but faster and a touch more accurate, due to better
|
||||
* srcWeight scaling to [0, 256].
|
||||
*/
|
||||
static inline SkPMColor SkFastFourByteInterp(SkPMColor src,
|
||||
SkPMColor dst,
|
||||
U8CPU srcWeight) {
|
||||
|
44
tests/ColorPrivTest.cpp
Normal file
44
tests/ColorPrivTest.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include "Test.h"
|
||||
#include "TestClassDef.h"
|
||||
|
||||
#include "SkColorPriv.h"
|
||||
|
||||
#define ASSERT(expr) REPORTER_ASSERT(r, expr)
|
||||
|
||||
DEF_TEST(Splay, r) {
|
||||
const SkPMColor color = 0xA1B2C3D4;
|
||||
|
||||
uint32_t ag, rb;
|
||||
SkSplay(color, &ag, &rb);
|
||||
ASSERT(ag == 0x00A100C3);
|
||||
ASSERT(rb == 0x00B200D4);
|
||||
ASSERT(SkUnsplay(ag << 8, rb << 8) == color);
|
||||
|
||||
const uint64_t agrb = SkSplay(color);
|
||||
ASSERT(agrb == 0x00A100C300B200D4);
|
||||
ASSERT(SkUnsplay(agrb<<8) == color);
|
||||
}
|
||||
|
||||
DEF_TEST(FourByteInterp, r) {
|
||||
const SkPMColor src = 0xAB998877, dst = 0x66334455;
|
||||
for (unsigned scale = 0; scale <= 256; scale++) {
|
||||
ASSERT(SkFourByteInterp256(src, dst, scale) == SkFastFourByteInterp256(src, dst, scale));
|
||||
}
|
||||
|
||||
for (unsigned scale = 0; scale < 256; scale++) {
|
||||
// SkFourByteInterp and SkFastFourByteInterp convert from [0, 255] to [0, 256] differently.
|
||||
// In particular, slow may end up a little too high (weirdly, fast is more accurate).
|
||||
const SkPMColor slow = SkFourByteInterp(src, dst, scale);
|
||||
const SkPMColor fast = SkFastFourByteInterp(src, dst, scale);
|
||||
|
||||
const int deltaA = SkGetPackedA32(slow) - SkGetPackedA32(fast);
|
||||
const int deltaR = SkGetPackedR32(slow) - SkGetPackedR32(fast);
|
||||
const int deltaG = SkGetPackedG32(slow) - SkGetPackedG32(fast);
|
||||
const int deltaB = SkGetPackedB32(slow) - SkGetPackedB32(fast);
|
||||
|
||||
ASSERT(deltaA == 0 || deltaA == 1);
|
||||
ASSERT(deltaR == 0 || deltaR == 1);
|
||||
ASSERT(deltaG == 0 || deltaG == 1);
|
||||
ASSERT(deltaB == 0 || deltaB == 1);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user