SkPMFloat::trunc()
Add and test trunc(), which is what get() used to be before rounding. Using trunc() is a ~40% speedup on our linear gradient bench. #neon #floats BUG=skia:3592 #n5 #n9 CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus5-Adreno330-Arm7-Debug-Trybot;client.skia.android:Test-Android-Nexus9-TegraK1-Arm64-Release-Trybot Review URL: https://codereview.chromium.org/1032243002
This commit is contained in:
parent
d968a6f29e
commit
3d4c4a5a9f
2
HASHTAGS
2
HASHTAGS
@ -14,6 +14,8 @@ notry,NOTRY=true
|
||||
nocommit,COMMIT=false
|
||||
|
||||
floats,BUG=skia:3592
|
||||
neon,#n5,#n9
|
||||
n5,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus5-Adreno330-Arm7-Debug-Trybot
|
||||
n7,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus7-Tegra3-Arm7-Debug-Trybot
|
||||
n9,CQ_INCLUDE_TRYBOTS=client.skia.android:Test-Android-Nexus9-TegraK1-Arm64-Release-Trybot
|
||||
|
||||
|
@ -112,15 +112,15 @@ struct PMFloatGradientBench : public Benchmark {
|
||||
dcdx4(dcdx+dcdx+dcdx+dcdx);
|
||||
|
||||
for (int n = 0; n < loops; n++) {
|
||||
Sk4f a = c0 + dc*fx, // TODO: add 0.5f, here call trunc() instead of get().
|
||||
Sk4f a = c0 + dc*fx + Sk4f(0.5f), // The +0.5f lets us call trunc() instead of get().
|
||||
b = a + dcdx,
|
||||
c = b + dcdx,
|
||||
d = c + dcdx;
|
||||
for (size_t i = 0; i < SK_ARRAY_COUNT(fDevice); i += 4) {
|
||||
fDevice[i+0] = SkPMFloat(a).get();
|
||||
fDevice[i+1] = SkPMFloat(b).get();
|
||||
fDevice[i+2] = SkPMFloat(c).get();
|
||||
fDevice[i+3] = SkPMFloat(d).get();
|
||||
fDevice[i+0] = SkPMFloat(a).trunc();
|
||||
fDevice[i+1] = SkPMFloat(b).trunc();
|
||||
fDevice[i+2] = SkPMFloat(c).trunc();
|
||||
fDevice[i+3] = SkPMFloat(d).trunc();
|
||||
a += dcdx4;
|
||||
b += dcdx4;
|
||||
c += dcdx4;
|
||||
|
@ -57,6 +57,10 @@ public:
|
||||
SkPMColor get() const; // May SkASSERT(this->isValid()). Some implementations may clamp.
|
||||
SkPMColor clamped() const; // Will clamp all values to [0, 255]. Then may assert isValid().
|
||||
|
||||
// Like get(), but truncates instead of rounding.
|
||||
// The domain of this function is (-1.0f, 256.0f). Values in (-1.0f, 0.0f] trunc to a zero.
|
||||
SkPMColor trunc() const;
|
||||
|
||||
// 4-at-a-time versions of get() and clamped(). Like From4PMColors(), no alignment assumed.
|
||||
static void To4PMColors(
|
||||
const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, const SkPMFloat&, SkPMColor[4]);
|
||||
|
@ -41,6 +41,16 @@ inline SkPMColor SkPMFloat::clamped() const {
|
||||
return c;
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::trunc() const {
|
||||
// Basically, same as clamped(), but no rounding.
|
||||
__m128i fix8_32 = _mm_cvttps_epi32(fColors),
|
||||
fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
|
||||
fix8 = _mm_packus_epi16(fix8_16, fix8_16);
|
||||
SkPMColor c = _mm_cvtsi128_si32(fix8);
|
||||
SkPMColorAssert(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
|
||||
SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
|
||||
// Haven't beaten this yet.
|
||||
|
@ -27,17 +27,20 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
|
||||
SkASSERT(this->isValid());
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::get() const {
|
||||
SkASSERT(this->isValid());
|
||||
inline SkPMColor SkPMFloat::trunc() const {
|
||||
const int _ = 255; // _ means to zero that byte.
|
||||
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
|
||||
__m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),
|
||||
__m128i fix8_32 = _mm_cvttps_epi32(fColors),
|
||||
fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _,_,_,_, 12,8,4,0));
|
||||
SkPMColor c = _mm_cvtsi128_si32(fix8);
|
||||
SkPMColorAssert(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::get() const {
|
||||
SkASSERT(this->isValid());
|
||||
return SkPMFloat(Sk4f(0.5f) + *this).trunc();
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::clamped() const {
|
||||
// We don't use _mm_cvtps_epi32, because we want precise control over how 0.5 rounds (up).
|
||||
__m128i fix8_32 = _mm_cvttps_epi32(_mm_add_ps(_mm_set1_ps(0.5f), fColors)),
|
||||
|
@ -26,10 +26,8 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
|
||||
SkASSERT(this->isValid());
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::get() const {
|
||||
SkASSERT(this->isValid());
|
||||
float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f));
|
||||
uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
|
||||
inline SkPMColor SkPMFloat::trunc() const {
|
||||
uint32x4_t fix8_32 = vcvtq_u32_f32(fColors); // vcvtq_u32_f32 truncates
|
||||
uint16x4_t fix8_16 = vmovn_u32(fix8_32);
|
||||
uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
|
||||
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
|
||||
@ -37,6 +35,11 @@ inline SkPMColor SkPMFloat::get() const {
|
||||
return c;
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::get() const {
|
||||
SkASSERT(this->isValid());
|
||||
return SkPMFloat(Sk4f(0.5f) + *this).trunc();
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::clamped() const {
|
||||
float32x4_t add_half = vaddq_f32(fColors, vdupq_n_f32(0.5f));
|
||||
uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
|
||||
|
@ -18,6 +18,10 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) {
|
||||
SkASSERT(this->isValid());
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::trunc() const {
|
||||
return SkPackARGB32(this->a(), this->r(), this->g(), this->b());
|
||||
}
|
||||
|
||||
inline SkPMColor SkPMFloat::get() const {
|
||||
SkASSERT(this->isValid());
|
||||
return SkPackARGB32(this->a()+0.5f, this->r()+0.5f, this->g()+0.5f, this->b()+0.5f);
|
||||
|
@ -22,6 +22,9 @@ DEF_TEST(SkPMFloat, r) {
|
||||
pmf = SkPMFloat(254.5f, 203.5f, 153.1f, 50.8f);
|
||||
REPORTER_ASSERT(r, c == pmf.get());
|
||||
|
||||
pmf = SkPMFloat(255.9f, 204.01f, 153.0f, -0.9f);
|
||||
REPORTER_ASSERT(r, SkPreMultiplyColor(0xFFCC9900) == pmf.trunc());
|
||||
|
||||
// Test clamping.
|
||||
SkPMFloat clamped(SkPMFloat(510.0f, 153.0f, 1.0f, -0.2f).clamped());
|
||||
REPORTER_ASSERT(r, SkScalarNearlyEqual(255.0f, clamped.a()));
|
||||
|
Loading…
Reference in New Issue
Block a user