Convert color data to skvx::float4 from Sk4f

Change-Id: I511f6105537b24953de1533ad7b73d1186afd4fc
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/541060
Commit-Queue: Michael Ludwig <michaelludwig@google.com>
Reviewed-by: Brian Osman <brianosman@google.com>
This commit is contained in:
Michael Ludwig 2022-05-17 16:26:03 -04:00 committed by SkCQ
parent 7f99451720
commit 9b59fe655c
21 changed files with 154 additions and 220 deletions

View File

@ -232,11 +232,7 @@ private:
};
SkASSERT(sizeof(V) == vertexStride);
uint64_t color;
Sk4h halfColor = SkFloatToHalf_finite_ftz(Sk4f::Load(&fColor4f));
color = (uint64_t)halfColor[0] << 48 |
(uint64_t)halfColor[1] << 32 |
(uint64_t)halfColor[2] << 16 |
(uint64_t)halfColor[3] << 0;
SkFloatToHalf_finite_ftz(skvx::float4::Load(&fColor4f)).store(&color);
V* v = (V*)verts;
for (int i = 0; i < kVertexCount; i += 2) {
v[i + 0].fPos.set(dx * i, 0.0f);

View File

@ -1256,9 +1256,9 @@ struct Task {
bool unclamped = false;
for (int y = 0; y < pm.height() && !unclamped; ++y)
for (int x = 0; x < pm.width() && !unclamped; ++x) {
Sk4f rgba = SkHalfToFloat_finite_ftz(*pm.addr64(x, y));
skvx::float4 rgba = SkHalfToFloat_finite_ftz(*pm.addr64(x, y));
float a = rgba[3];
if (a > 1.0f || (rgba < 0.0f).anyTrue() || (rgba > a).anyTrue()) {
if (a > 1.0f || any(rgba < 0.0f) || any(rgba > a)) {
SkDebugf("[%s] F16Norm pixel [%d, %d] unclamped: (%g, %g, %g, %g)\n",
name.c_str(), x, y, rgba[0], rgba[1], rgba[2], rgba[3]);
unclamped = true;

View File

@ -28,8 +28,8 @@ generated_cc_atom(
hdrs = ["SkColorData.h"],
visibility = ["//:__subpackages__"],
deps = [
":SkNx_hdr",
":SkTo_hdr",
":SkVx_hdr",
"//include/core:SkColorPriv_hdr",
"//include/core:SkColor_hdr",
],
@ -93,7 +93,7 @@ generated_cc_atom(
hdrs = ["SkHalf.h"],
visibility = ["//:__subpackages__"],
deps = [
":SkNx_hdr",
":SkVx_hdr",
"//include/core:SkTypes_hdr",
],
)

View File

@ -10,8 +10,8 @@
#include "include/core/SkColor.h"
#include "include/core/SkColorPriv.h"
#include "include/private/SkNx.h"
#include "include/private/SkTo.h"
#include "include/private/SkVx.h"
////////////////////////////////////////////////////////////////////////////////////////////
// Convert a 16bit pixel to a 32bit pixel
@ -395,11 +395,11 @@ static inline SkPMColor SkPixel4444ToPixel32(U16CPU c) {
return d | (d << 4);
}
static inline Sk4f swizzle_rb(const Sk4f& x) {
return SkNx_shuffle<2, 1, 0, 3>(x);
static inline skvx::float4 swizzle_rb(const skvx::float4& x) {
return skvx::shuffle<2, 1, 0, 3>(x);
}
static inline Sk4f swizzle_rb_if_bgra(const Sk4f& x) {
static inline skvx::float4 swizzle_rb_if_bgra(const skvx::float4& x) {
#ifdef SK_PMCOLOR_IS_BGRA
return swizzle_rb(x);
#else
@ -407,24 +407,13 @@ static inline Sk4f swizzle_rb_if_bgra(const Sk4f& x) {
#endif
}
static inline Sk4f Sk4f_fromL32(uint32_t px) {
return SkNx_cast<float>(Sk4b::Load(&px)) * (1 / 255.0f);
static inline skvx::float4 Sk4f_fromL32(uint32_t px) {
return skvx::cast<float>(skvx::byte4::Load(&px)) * (1 / 255.0f);
}
static inline uint32_t Sk4f_toL32(const Sk4f& px) {
Sk4f v = px;
#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
// SkNx_cast<uint8_t, int32_t>() pins, and we don't anticipate giant floats
#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
// SkNx_cast<uint8_t, int32_t>() pins, and so does Sk4f_round().
#else
// No guarantee of a pin.
v = Sk4f::Max(0, Sk4f::Min(v, 1));
#endif
static inline uint32_t Sk4f_toL32(const skvx::float4& px) {
uint32_t l32;
SkNx_cast<uint8_t>(Sk4f_round(v * 255.0f)).store(&l32);
skvx::cast<uint8_t>(pin(lrint(px * 255.f), skvx::int4(0), skvx::int4(255))).store(&l32);
return l32;
}

View File

@ -9,7 +9,7 @@
#define SkHalf_DEFINED
#include "include/core/SkTypes.h"
#include "include/private/SkNx.h"
#include "include/private/SkVx.h"
// 16-bit floating point value
// format is 1 bit sign, 5 bits exponent, 10 bits mantissa
@ -28,58 +28,11 @@ SkHalf SkFloatToHalf(float f);
// Convert between half and single precision floating point,
// assuming inputs and outputs are both finite, and may
// flush values which would be denormal half floats to zero.
static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t);
static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&);
// ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
// Like the serial versions in SkHalf.cpp, these are based on
// https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
// GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly.
static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t rgba) {
Sk4h hs = Sk4h::Load(&rgba);
#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
float32x4_t fs;
asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...)
: [fs] "=w" (fs) // =w: write-only NEON register
: [hs] "w" (hs.fVec)); // w: read-only NEON register
return fs;
#else
Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit.
sign = bits & 0x00008000, // Save the sign bit for later...
positive = bits ^ sign, // ...but strip it off for now.
is_norm = 0x03ff < positive; // Exponent > 0?
// For normal half floats, extend the mantissa by 13 zero bits,
// then adjust the exponent from 15 bias to 127 bias.
Sk4i norm = (positive << 13) + ((127 - 15) << 23);
Sk4i merged = (sign << 16) | (norm & is_norm);
return Sk4f::Load(&merged);
#endif
static inline skvx::float4 SkHalfToFloat_finite_ftz(uint64_t rgba) {
return skvx::from_half(skvx::half4::Load(&rgba));
}
static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {
#if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
float32x4_t vec = fs.fVec;
asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec)
: [vec] "+w" (vec)); // +w: read-write NEON register
return vreinterpret_u16_f32(vget_low_f32(vec));
#else
Sk4i bits = Sk4i::Load(&fs),
sign = bits & 0x80000000, // Save the sign bit for later...
positive = bits ^ sign, // ...but strip it off for now.
will_be_norm = 0x387fdfff < positive; // greater than largest denorm half?
// For normal half floats, adjust the exponent from 127 bias to 15 bias,
// then drop the bottom 13 mantissa bits.
Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;
Sk4i merged = (sign >> 16) | (will_be_norm & norm);
return SkNx_cast<uint16_t>(merged);
#endif
static inline skvx::half4 SkFloatToHalf_finite_ftz(const skvx::float4& c) {
return skvx::to_half(c);
}
#endif

View File

@ -463,28 +463,28 @@ SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec
// Specializations inline here so they can generalize what types the apply to.
// (This header is used in C++14 contexts, so we have to kind of fake constexpr if.)
#if SKVX_USE_SIMD && defined(__AVX2__)
if /*constexpr*/ (N*sizeof(T) == 32) {
if constexpr (N*sizeof(T) == 32) {
return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
unchecked_bit_pun<__m256i>(t),
unchecked_bit_pun<__m256i>(cond)));
}
#endif
#if SKVX_USE_SIMD && defined(__SSE4_1__)
if /*constexpr*/ (N*sizeof(T) == 16) {
if constexpr (N*sizeof(T) == 16) {
return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
unchecked_bit_pun<__m128i>(t),
unchecked_bit_pun<__m128i>(cond)));
}
#endif
#if SKVX_USE_SIMD && defined(__ARM_NEON)
if /*constexpr*/ (N*sizeof(T) == 16) {
if constexpr (N*sizeof(T) == 16) {
return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
unchecked_bit_pun<uint8x16_t>(t),
unchecked_bit_pun<uint8x16_t>(e)));
}
#endif
// Recurse for large vectors to try to hit the specializations above.
if /*constexpr*/ (N*sizeof(T) > 16) {
if constexpr (N*sizeof(T) > 16) {
return join(if_then_else(cond.lo, t.lo, e.lo),
if_then_else(cond.hi, t.hi, e.hi));
}
@ -506,19 +506,19 @@ SINT bool any(const Vec<N,T>& x) {
SIT bool all(const Vec<1,T>& x) { return x.val != 0; }
SINT bool all(const Vec<N,T>& x) {
#if SKVX_USE_SIMD && defined(__AVX2__)
if /*constexpr*/ (N*sizeof(T) == 32) {
if constexpr (N*sizeof(T) == 32) {
return _mm256_testc_si256(unchecked_bit_pun<__m256i>(x),
_mm256_set1_epi32(-1));
}
#endif
#if SKVX_USE_SIMD && defined(__SSE4_1__)
if /*constexpr*/ (N*sizeof(T) == 16) {
if constexpr (N*sizeof(T) == 16) {
return _mm_testc_si128(unchecked_bit_pun<__m128i>(x),
_mm_set1_epi32(-1));
}
#endif
#if SKVX_USE_SIMD && defined(__wasm_simd128__)
if /*constexpr*/ (N == 4 && sizeof(T) == 4) {
if constexpr (N == 4 && sizeof(T) == 4) {
return wasm_i32x4_all_true(unchecked_bit_pun<VExt<4,int>>(x));
}
#endif
@ -622,12 +622,12 @@ SI Vec<1,int> lrint(const Vec<1,float>& x) {
}
SIN Vec<N,int> lrint(const Vec<N,float>& x) {
#if SKVX_USE_SIMD && defined(__AVX__)
if /*constexpr*/ (N == 8) {
if constexpr (N == 8) {
return unchecked_bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(unchecked_bit_pun<__m256>(x)));
}
#endif
#if SKVX_USE_SIMD && defined(__SSE__)
if /*constexpr*/ (N == 4) {
if constexpr (N == 4) {
return unchecked_bit_pun<Vec<N,int>>(_mm_cvtps_epi32(unchecked_bit_pun<__m128>(x)));
}
#endif
@ -637,8 +637,7 @@ SIN Vec<N,int> lrint(const Vec<N,float>& x) {
SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); }
// The default logic for to_half/from_half is borrowed from skcms,
// and assumes inputs are finite and treat/flush denorm half floats as/to zero.
// Assumes inputs are finite and treat/flush denorm half floats as/to zero.
// Key constants to watch for:
// - a float is 32-bit, 1-8-23 sign-exponent-mantissa, with 127 exponent bias;
// - a half is 16-bit, 1-5-10 sign-exponent-mantissa, with 15 exponent bias.
@ -646,17 +645,17 @@ SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) {
Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x),
s = sem & 0x8000'0000,
em = sem ^ s,
is_denorm = em < 0x3880'0000;
return cast<uint16_t>(if_then_else(is_denorm, Vec<N,uint32_t>(0)
, (s>>16) + (em>>13) - ((127-15)<<10)));
is_norm = em > 0x387f'd000, // halfway between largest f16 denorm and smallest norm
norm = (em>>13) - ((127-15)<<10);
return cast<uint16_t>((s>>16) | (is_norm & norm));
}
SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) {
Vec<N,uint32_t> wide = cast<uint32_t>(x),
s = wide & 0x8000,
em = wide ^ s;
auto is_denorm = bit_pun<Vec<N,int32_t>>(em < 0x0400);
return if_then_else(is_denorm, Vec<N,float>(0)
, bit_pun<Vec<N,float>>( (s<<16) + (em<<13) + ((127-15)<<23) ));
em = wide ^ s,
is_norm = em > 0x3ff,
norm = (em<<13) + ((127-15)<<23);
return bit_pun<Vec<N,float>>((s<<16) | (is_norm & norm));
}
// Like if_then_else(), these N=1 base cases won't actually be used unless explicitly called.
@ -665,18 +664,18 @@ SI Vec<1,float> from_half(const Vec<1,uint16_t>& x) { return from_half_finite_f
SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
#if SKVX_USE_SIMD && defined(__F16C__)
if /*constexpr*/ (N == 8) {
if constexpr (N == 8) {
return unchecked_bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(unchecked_bit_pun<__m256>(x),
_MM_FROUND_CUR_DIRECTION));
}
#endif
#if SKVX_USE_SIMD && defined(__aarch64__)
if /*constexpr*/ (N == 4) {
if constexpr (N == 4) {
return unchecked_bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(unchecked_bit_pun<float32x4_t>(x)));
}
#endif
if /*constexpr*/ (N > 4) {
if constexpr (N > 4) {
return join(to_half(x.lo),
to_half(x.hi));
}
@ -685,16 +684,16 @@ SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) {
#if SKVX_USE_SIMD && defined(__F16C__)
if /*constexpr*/ (N == 8) {
if constexpr (N == 8) {
return unchecked_bit_pun<Vec<N,float>>(_mm256_cvtph_ps(unchecked_bit_pun<__m128i>(x)));
}
#endif
#if SKVX_USE_SIMD && defined(__aarch64__)
if /*constexpr*/ (N == 4) {
if constexpr (N == 4) {
return unchecked_bit_pun<Vec<N,float>>(vcvt_f32_f16(unchecked_bit_pun<float16x4_t>(x)));
}
#endif
if /*constexpr*/ (N > 4) {
if constexpr (N > 4) {
return join(from_half(x.lo),
from_half(x.hi));
}

View File

@ -8,6 +8,7 @@
#include "include/core/SkCanvas.h"
#include "include/core/SkPaint.h"
#include "include/core/SkPath.h"
#include "include/private/SkNx.h"
#include "include/utils/SkRandom.h"
#include "samplecode/Sample.h"
#include "src/core/SkPathPriv.h"

View File

@ -681,6 +681,7 @@ generated_cc_atom(
deps = [
":SkBlendModePriv_hdr",
":SkRasterPipeline_hdr",
"//include/private:SkVx_hdr",
],
)
@ -2993,7 +2994,6 @@ generated_cc_atom(
"//include/private:SkColorData_hdr",
"//include/private:SkHalf_hdr",
"//include/private:SkImageInfoPriv_hdr",
"//include/private:SkNx_hdr",
"//include/private:SkTo_hdr",
"//include/private:SkVx_hdr",
],
@ -3613,10 +3613,10 @@ generated_cc_atom(
"//include/private:SkColorData_hdr",
"//include/private:SkHalf_hdr",
"//include/private:SkImageInfoPriv_hdr",
"//include/private:SkNx_hdr",
"//include/private:SkTPin_hdr",
"//include/private:SkTemplates_hdr",
"//include/private:SkTo_hdr",
"//include/private:SkVx_hdr",
"//src/image:SkReadPixelsRec_hdr",
"//src/shaders:SkImageShader_hdr",
],

View File

@ -6,6 +6,8 @@
*/
#include "src/core/SkBlendModePriv.h"
#include "include/private/SkVx.h"
#include "src/core/SkRasterPipeline.h"
bool SkBlendMode_ShouldPreScaleCoverage(SkBlendMode mode, bool rgb_coverage) {
@ -129,8 +131,9 @@ SkPMColor4f SkBlendMode_Apply(SkBlendMode mode, const SkPMColor4f& src, const Sk
case SkBlendMode::kSrc: return src;
case SkBlendMode::kDst: return dst;
case SkBlendMode::kSrcOver: {
Sk4f r = Sk4f::Load(src.vec()) + Sk4f::Load(dst.vec()) * Sk4f(1 - src.fA);
return { r[0], r[1], r[2], r[3] };
SkPMColor4f r;
(skvx::float4::Load(src.vec()) + skvx::float4::Load(dst.vec()) * (1-src.fA)).store(&r);
return r;
}
default:
break;

View File

@ -115,12 +115,12 @@ SkColor4f SkColor4f::FromColor(SkColor bgra) {
template <>
SkColor SkColor4f::toSkColor() const {
return Sk4f_toL32(swizzle_rb(Sk4f::Load(this->vec())));
return Sk4f_toL32(swizzle_rb(skvx::float4::Load(this->vec())));
}
template <>
uint32_t SkColor4f::toBytes_RGBA() const {
return Sk4f_toL32(Sk4f::Load(this->vec()));
return Sk4f_toL32(skvx::float4::Load(this->vec()));
}
template <>
@ -139,7 +139,7 @@ SkPMColor4f SkPMColor4f::FromPMColor(SkPMColor c) {
template <>
uint32_t SkPMColor4f::toBytes_RGBA() const {
return Sk4f_toL32(Sk4f::Load(this->vec()));
return Sk4f_toL32(skvx::float4::Load(this->vec()));
}
template <>

View File

@ -10,7 +10,6 @@
#include "include/private/SkColorData.h"
#include "include/private/SkHalf.h"
#include "include/private/SkImageInfoPriv.h"
#include "include/private/SkNx.h"
#include "include/private/SkTo.h"
#include "include/private/SkVx.h"
#include "src/core/SkMathPriv.h"
@ -27,12 +26,12 @@
struct ColorTypeFilter_8888 {
typedef uint32_t Type;
static Sk4h Expand(uint32_t x) {
return SkNx_cast<uint16_t>(Sk4b::Load(&x));
static skvx::Vec<4, uint16_t> Expand(uint32_t x) {
return skvx::cast<uint16_t>(skvx::byte4::Load(&x));
}
static uint32_t Compact(const Sk4h& x) {
static uint32_t Compact(const skvx::Vec<4, uint16_t>& x) {
uint32_t r;
SkNx_cast<uint8_t>(x).store(&r);
skvx::cast<uint8_t>(x).store(&r);
return r;
}
};
@ -69,11 +68,11 @@ struct ColorTypeFilter_8 {
struct ColorTypeFilter_Alpha_F16 {
typedef uint16_t Type;
static Sk4f Expand(uint16_t x) {
static skvx::float4 Expand(uint16_t x) {
return SkHalfToFloat_finite_ftz((uint64_t) x); // expand out to four lanes
}
static uint16_t Compact(const Sk4f& x) {
static uint16_t Compact(const skvx::float4& x) {
uint64_t r;
SkFloatToHalf_finite_ftz(x).store(&r);
return r & 0xFFFF; // but ignore the extra 3 here
@ -82,10 +81,10 @@ struct ColorTypeFilter_Alpha_F16 {
struct ColorTypeFilter_RGBA_F16 {
typedef uint64_t Type; // SkHalf x4
static Sk4f Expand(uint64_t x) {
static skvx::float4 Expand(uint64_t x) {
return SkHalfToFloat_finite_ftz(x);
}
static uint64_t Compact(const Sk4f& x) {
static uint64_t Compact(const skvx::float4& x) {
uint64_t r;
SkFloatToHalf_finite_ftz(x).store(&r);
return r;
@ -114,10 +113,10 @@ struct ColorTypeFilter_1616 {
struct ColorTypeFilter_F16F16 {
typedef uint32_t Type;
static Sk4f Expand(uint32_t x) {
static skvx::float4 Expand(uint32_t x) {
return SkHalfToFloat_finite_ftz((uint64_t) x); // expand out to four lanes
}
static uint32_t Compact(const Sk4f& x) {
static uint32_t Compact(const skvx::float4& x) {
uint64_t r;
SkFloatToHalf_finite_ftz(x).store(&r);
return (uint32_t) (r & 0xFFFFFFFF); // but ignore the extra 2 here
@ -170,7 +169,7 @@ template <typename T> T shift_right(const T& x, int bits) {
return x >> bits;
}
Sk4f shift_right(const Sk4f& x, int bits) {
skvx::float4 shift_right(const skvx::float4& x, int bits) {
return x * (1.0f / (1 << bits));
}
@ -178,7 +177,7 @@ template <typename T> T shift_left(const T& x, int bits) {
return x << bits;
}
Sk4f shift_left(const Sk4f& x, int bits) {
skvx::float4 shift_left(const skvx::float4& x, int bits) {
return x * (1 << bits);
}

View File

@ -15,10 +15,10 @@
#include "include/private/SkColorData.h"
#include "include/private/SkHalf.h"
#include "include/private/SkImageInfoPriv.h"
#include "include/private/SkNx.h"
#include "include/private/SkTPin.h"
#include "include/private/SkTemplates.h"
#include "include/private/SkTo.h"
#include "include/private/SkVx.h"
#include "src/core/SkConvertPixels.h"
#include "src/core/SkDraw.h"
#include "src/core/SkMask.h"
@ -419,29 +419,25 @@ SkColor SkPixmap::getColor(int x, int y) const {
case kRGBA_F16_SkColorType: {
const uint64_t* addr =
(const uint64_t*)fPixels + y * (fRowBytes >> 3) + x;
Sk4f p4 = SkHalfToFloat_finite_ftz(*addr);
skvx::float4 p4 = SkHalfToFloat_finite_ftz(*addr);
if (p4[3] && needsUnpremul) {
float inva = 1 / p4[3];
p4 = p4 * Sk4f(inva, inva, inva, 1);
p4 = p4 * skvx::float4(inva, inva, inva, 1);
}
SkColor c;
SkNx_cast<uint8_t>(p4 * Sk4f(255) + Sk4f(0.5f)).store(&c);
// p4 is RGBA, but we want BGRA, so we need to swap next
return SkSwizzle_RB(c);
return Sk4f_toL32(swizzle_rb(p4));
}
case kRGBA_F32_SkColorType: {
const float* rgba =
(const float*)fPixels + 4*y*(fRowBytes >> 4) + 4*x;
Sk4f p4 = Sk4f::Load(rgba);
skvx::float4 p4 = skvx::float4::Load(rgba);
// From here on, just like F16:
if (p4[3] && needsUnpremul) {
float inva = 1 / p4[3];
p4 = p4 * Sk4f(inva, inva, inva, 1);
p4 = p4 * skvx::float4(inva, inva, inva, 1);
}
SkColor c;
SkNx_cast<uint8_t>(p4 * Sk4f(255) + Sk4f(0.5f)).store(&c);
// p4 is RGBA, but we want BGRA, so we need to swap next
return SkSwizzle_RB(c);
return Sk4f_toL32(swizzle_rb(p4));
}
case kUnknown_SkColorType:
break;
@ -639,4 +635,3 @@ bool SkPixmapPriv::Orient(const SkPixmap& dst, const SkPixmap& src, SkEncodedOri
SkImageInfo SkPixmapPriv::SwapWidthHeight(const SkImageInfo& info) {
return info.makeWH(info.height(), info.width());
}

View File

@ -81,7 +81,7 @@ static inline bool SkPMColor4fFitsInBytes(const SkPMColor4f& color) {
static inline uint64_t SkPMColor4f_toFP16(const SkPMColor4f& color) {
uint64_t halfColor;
SkFloatToHalf_finite_ftz(Sk4f::Load(color.vec())).store(&halfColor);
SkFloatToHalf_finite_ftz(skvx::float4::Load(color.vec())).store(&halfColor);
return halfColor;
}

View File

@ -129,16 +129,13 @@ void GrGradientBitmapCache::fillGradient(const SkPMColor4f* colors, const SkScal
SkHalf* pixelsF16 = reinterpret_cast<SkHalf*>(bitmap->getPixels());
uint32_t* pixels32 = reinterpret_cast<uint32_t*>(bitmap->getPixels());
typedef std::function<void(const Sk4f&, int)> pixelWriteFn_t;
typedef std::function<void(const skvx::float4&, int)> pixelWriteFn_t;
pixelWriteFn_t writeF16Pixel = [&](const Sk4f& x, int index) {
Sk4h c = SkFloatToHalf_finite_ftz(x);
pixelsF16[4*index+0] = c[0];
pixelsF16[4*index+1] = c[1];
pixelsF16[4*index+2] = c[2];
pixelsF16[4*index+3] = c[3];
pixelWriteFn_t writeF16Pixel = [&](const skvx::float4& x, int index) {
skvx::half4 c = SkFloatToHalf_finite_ftz(x);
c.store(pixelsF16 + (4 * index));
};
pixelWriteFn_t write8888Pixel = [&](const Sk4f& c, int index) {
pixelWriteFn_t write8888Pixel = [&](const skvx::float4& c, int index) {
pixels32[index] = Sk4f_toL32(c);
};
@ -154,11 +151,11 @@ void GrGradientBitmapCache::fillGradient(const SkPMColor4f* colors, const SkScal
SkIntToScalar(fResolution - 1));
if (nextIndex > prevIndex) {
Sk4f c0 = Sk4f::Load(colors[i - 1].vec()),
c1 = Sk4f::Load(colors[i ].vec());
auto c0 = skvx::float4::Load(colors[i - 1].vec()),
c1 = skvx::float4::Load(colors[i ].vec());
Sk4f step = Sk4f(1.0f / static_cast<float>(nextIndex - prevIndex));
Sk4f delta = (c1 - c0) * step;
auto step = skvx::float4(1.0f / static_cast<float>(nextIndex - prevIndex));
auto delta = (c1 - c0) * step;
for (int curIndex = prevIndex; curIndex <= nextIndex; ++curIndex) {
writePixel(c0, curIndex);

View File

@ -12,8 +12,8 @@ generated_cc_atom(
":Sk4fGradientPriv_hdr",
":SkGradientShaderPriv_hdr",
"//include/core:SkColor_hdr",
"//include/private:SkNx_hdr",
"//include/private:SkTArray_hdr",
"//include/private:SkVx_hdr",
"//src/core:SkMatrixPriv_hdr",
"//src/shaders:SkShaderBase_hdr",
],
@ -38,7 +38,7 @@ generated_cc_atom(
"//include/core:SkImageInfo_hdr",
"//include/private:SkColorData_hdr",
"//include/private:SkHalf_hdr",
"//include/private:SkNx_hdr",
"//include/private:SkVx_hdr",
"//src/core:SkOpts_hdr",
],
)

View File

@ -11,16 +11,15 @@
namespace {
Sk4f pack_color(const SkColor4f& c4f, bool premul, const Sk4f& component_scale) {
Sk4f pm4f = premul
? Sk4f::Load(c4f.premul().vec())
: Sk4f::Load(c4f.vec());
skvx::float4 pack_color(const SkColor4f& c4f, bool premul, const skvx::float4& component_scale) {
auto pm4f = premul ? skvx::float4::Load(c4f.premul().vec())
: skvx::float4::Load(c4f.vec());
if (premul) {
// If the stops are premul, we clamp them to gamut now.
// If the stops are unpremul, the colors will eventually go through Sk4f_toL32(),
// which ends up clamping to gamut then.
pm4f = Sk4f::Max(0, Sk4f::Min(pm4f, pm4f[3]));
pm4f = max(0, min(pm4f, pm4f[3]));
}
return pm4f * component_scale;
@ -97,7 +96,7 @@ private:
void addMirrorIntervals(const SkGradientShaderBase& shader,
const SkColor4f* colors,
const Sk4f& componentScale,
const skvx::float4& componentScale,
bool premulColors, bool reverse,
Sk4fGradientIntervalBuffer::BufferType* buffer) {
const IntervalIterator iter(shader, reverse);
@ -117,10 +116,10 @@ void addMirrorIntervals(const SkGradientShaderBase& shader,
} // anonymous namespace
Sk4fGradientInterval::Sk4fGradientInterval(const Sk4f& c0, SkScalar t0,
const Sk4f& c1, SkScalar t1)
: fT0(t0)
, fT1(t1) {
Sk4fGradientInterval::Sk4fGradientInterval(const skvx::float4& c0, SkScalar t0,
const skvx::float4& c1, SkScalar t1)
: fT0(t0)
, fT1(t1) {
SkASSERT(t0 != t1);
// Either p0 or p1 can be (-)inf for synthetic clamp edge intervals.
SkASSERT(SkScalarIsFinite(t0) || SkScalarIsFinite(t1));
@ -128,10 +127,10 @@ Sk4fGradientInterval::Sk4fGradientInterval(const Sk4f& c0, SkScalar t0,
const auto dt = t1 - t0;
// Clamp edge intervals are always zero-ramp.
SkASSERT(SkScalarIsFinite(dt) || (c0 == c1).allTrue());
SkASSERT(SkScalarIsFinite(t0) || (c0 == c1).allTrue());
const Sk4f dc = SkScalarIsFinite(dt) ? (c1 - c0) / dt : 0;
const Sk4f bias = c0 - (SkScalarIsFinite(t0) ? t0 * dc : 0);
SkASSERT(SkScalarIsFinite(dt) || all(c0 == c1));
SkASSERT(SkScalarIsFinite(t0) || all(c0 == c1));
const auto dc = SkScalarIsFinite(dt) ? (c1 - c0) / dt : 0;
const auto bias = c0 - (SkScalarIsFinite(t0) ? t0 * dc : 0);
bias.store(fCb.vec());
dc.store(fCg.vec());
@ -187,9 +186,8 @@ void Sk4fGradientIntervalBuffer::init(const SkGradientShaderBase& shader, SkColo
fIntervals.reset();
const Sk4f componentScale = premulColors
? Sk4f(alpha)
: Sk4f(1.0f, 1.0f, 1.0f, alpha);
const skvx::float4 componentScale = premulColors ? skvx::float4(alpha)
: skvx::float4(1.0f, 1.0f, 1.0f, alpha);
const int first_index = reverse ? count - 1 : 0;
const int last_index = count - 1 - first_index;
const SkScalar first_pos = reverse ? SK_Scalar1 : 0;
@ -200,7 +198,7 @@ void Sk4fGradientIntervalBuffer::init(const SkGradientShaderBase& shader, SkColo
if (tileMode == SkTileMode::kClamp) {
// synthetic edge interval: -/+inf .. P0
const Sk4f clamp_color = pack_color(xformedColors.fColors[first_index],
const auto clamp_color = pack_color(xformedColors.fColors[first_index],
premulColors, componentScale);
const SkScalar clamp_pos = reverse ? SK_ScalarInfinity : SK_ScalarNegativeInfinity;
fIntervals.emplace_back(clamp_color, clamp_pos,
@ -222,7 +220,7 @@ void Sk4fGradientIntervalBuffer::init(const SkGradientShaderBase& shader, SkColo
if (tileMode == SkTileMode::kClamp) {
// synthetic edge interval: Pn .. +/-inf
const Sk4f clamp_color = pack_color(xformedColors.fColors[last_index],
const auto clamp_color = pack_color(xformedColors.fColors[last_index],
premulColors, componentScale);
const SkScalar clamp_pos = reverse ? SK_ScalarNegativeInfinity : SK_ScalarInfinity;
fIntervals.emplace_back(clamp_color, last_pos,

View File

@ -9,16 +9,16 @@
#define Sk4fGradientBase_DEFINED
#include "include/core/SkColor.h"
#include "include/private/SkNx.h"
#include "include/private/SkTArray.h"
#include "include/private/SkVx.h"
#include "src/core/SkMatrixPriv.h"
#include "src/shaders/SkShaderBase.h"
#include "src/shaders/gradients/Sk4fGradientPriv.h"
#include "src/shaders/gradients/SkGradientShaderPriv.h"
struct Sk4fGradientInterval {
Sk4fGradientInterval(const Sk4f& c0, SkScalar t0,
const Sk4f& c1, SkScalar t1);
Sk4fGradientInterval(const skvx::float4& c0, SkScalar t0,
const skvx::float4& c1, SkScalar t1);
bool contains(SkScalar t) const {
// True if t is in [p0,p1]. Note: this helper assumes a
@ -73,7 +73,8 @@ private:
using INHERITED = Context;
void addMirrorIntervals(const SkGradientShaderBase&,
const Sk4f& componentScale, bool reverse);
const skvx::float4& componentScale,
bool reverse);
};
#endif // Sk4fGradientBase_DEFINED

View File

@ -12,7 +12,7 @@
#include "include/core/SkImageInfo.h"
#include "include/private/SkColorData.h"
#include "include/private/SkHalf.h"
#include "include/private/SkNx.h"
#include "include/private/SkVx.h"
#include "src/core/SkOpts.h"
// Templates shared by various 4f gradient flavors.
@ -26,29 +26,29 @@ struct PremulTraits;
template <>
struct PremulTraits<ApplyPremul::False> {
static Sk4f apply(const Sk4f& c) { return c; }
static skvx::float4 apply(const skvx::float4& c) { return c; }
};
template <>
struct PremulTraits<ApplyPremul::True> {
static Sk4f apply(const Sk4f& c) {
static skvx::float4 apply(const skvx::float4& c) {
const float alpha = c[3];
// FIXME: portable swizzle?
return c * Sk4f(alpha, alpha, alpha, 1);
return c * skvx::float4(alpha, alpha, alpha, 1);
}
};
// Struct encapsulating various dest-dependent ops:
//
// - load() Load a SkPMColor4f value into Sk4f. Normally called once per interval
// - load() Load a SkPMColor4f value into skvx::float4. Normally called once per interval
// advance. Also applies a scale and swizzle suitable for DstType.
//
// - store() Store one Sk4f to dest. Optionally handles premul, color space
// - store() Store one skvx::float4 to dest. Optionally handles premul, color space
// conversion, etc.
//
// - store(count) Store the Sk4f value repeatedly to dest, count times.
// - store(count) Store the skvx::float4 value repeatedly to dest, count times.
//
// - store4x() Store 4 Sk4f values to dest (opportunistic optimization).
// - store4x() Store 4 skvx::float4 values to dest (opportunistic optimization).
//
template <ApplyPremul premul>
@ -56,36 +56,39 @@ struct DstTraits {
using PM = PremulTraits<premul>;
// For L32, prescaling by 255 saves a per-pixel multiplication when premul is not needed.
static Sk4f load(const SkPMColor4f& c) {
Sk4f c4f = swizzle_rb_if_bgra(Sk4f::Load(c.vec()));
static skvx::float4 load(const SkPMColor4f& c) {
skvx::float4 c4f = swizzle_rb_if_bgra(skvx::float4::Load(c.vec()));
return premul == ApplyPremul::False
? c4f * Sk4f(255)
? c4f * skvx::float4(255)
: c4f;
}
static void store(const Sk4f& c, SkPMColor* dst, const Sk4f& bias) {
static void store(const skvx::float4& c, SkPMColor* dst, const skvx::float4& bias) {
if (premul == ApplyPremul::False) {
// c is pre-scaled by 255 and pre-biased, just store.
SkNx_cast<uint8_t>(c).store(dst);
skvx::cast<uint8_t>(c).store(dst);
} else {
*dst = Sk4f_toL32(PM::apply(c) + bias);
}
}
static void store(const Sk4f& c, SkPMColor* dst, int n) {
static void store(const skvx::float4& c, SkPMColor* dst, int n) {
SkPMColor pmc;
store(c, &pmc, Sk4f(0));
store(c, &pmc, skvx::float4(0));
sk_memset32(dst, pmc, n);
}
static void store4x(const Sk4f& c0, const Sk4f& c1,
const Sk4f& c2, const Sk4f& c3,
static void store4x(const skvx::float4& c0, const skvx::float4& c1,
const skvx::float4& c2, const skvx::float4& c3,
SkPMColor* dst,
const Sk4f& bias0,
const Sk4f& bias1) {
const skvx::float4& bias0,
const skvx::float4& bias1) {
if (premul == ApplyPremul::False) {
// colors are pre-scaled and pre-biased.
Sk4f_ToBytes((uint8_t*)dst, c0, c1, c2, c3);
skvx::cast<uint8_t>(c0).store(dst + 0);
skvx::cast<uint8_t>(c1).store(dst + 1);
skvx::cast<uint8_t>(c2).store(dst + 2);
skvx::cast<uint8_t>(c3).store(dst + 3);
} else {
store(c0, dst + 0, bias0);
store(c1, dst + 1, bias1);
@ -94,7 +97,7 @@ struct DstTraits {
}
}
static Sk4f pre_lerp_bias(const Sk4f& bias) {
static skvx::float4 pre_lerp_bias(const skvx::float4& bias) {
// We can apply the bias before interpolation when the colors are premultiplied.
return premul == ApplyPremul::False ? bias : 0;
}

View File

@ -15,14 +15,14 @@
namespace {
template<ApplyPremul premul>
void ramp(const Sk4f& c, const Sk4f& dc, SkPMColor dst[], int n,
const Sk4f& bias0, const Sk4f& bias1) {
void ramp(const skvx::float4& c, const skvx::float4& dc, SkPMColor dst[], int n,
const skvx::float4& bias0, const skvx::float4& bias1) {
SkASSERT(n > 0);
const Sk4f dc2 = dc + dc,
const auto dc2 = dc + dc,
dc4 = dc2 + dc2;
Sk4f c0 = c + DstTraits<premul>::pre_lerp_bias(bias0),
auto c0 = c + DstTraits<premul>::pre_lerp_bias(bias0),
c1 = c + dc + DstTraits<premul>::pre_lerp_bias(bias1),
c2 = c0 + dc2,
c3 = c1 + dc2;
@ -222,8 +222,8 @@ LinearGradient4fContext::shadeSpanInternal(int x, int y, SkPMColor dst[], int co
fx,
dx,
SkScalarNearlyZero(dx * count));
Sk4f bias4f0(bias0),
bias4f1(bias1);
skvx::float4 bias4f0(bias0),
bias4f1(bias1);
while (count > 0) {
// What we really want here is SkTPin(advance, 1, count)
@ -300,8 +300,8 @@ public:
}
bool currentRampIsZero() const { return fZeroRamp; }
const Sk4f& currentColor() const { return fCc; }
const Sk4f& currentColorGrad() const { return fDcDx; }
const skvx::float4& currentColor() const { return fCc; }
const skvx::float4& currentColorGrad() const { return fDcDx; }
void advance(SkScalar advX) {
SkASSERT(advX > 0);
@ -312,7 +312,7 @@ public:
}
SkASSERT(advX < fAdvX);
fCc = fCc + fDcDx * Sk4f(advX);
fCc = fCc + fDcDx * advX;
fAdvX -= advX;
}
@ -320,17 +320,17 @@ private:
void compute_interval_props(SkScalar t) {
SkASSERT(in_range(t, fInterval->fT0, fInterval->fT1));
const Sk4f dc = DstTraits<premul>::load(fInterval->fCg);
fCc = DstTraits<premul>::load(fInterval->fCb) + dc * Sk4f(t);
const auto dc = DstTraits<premul>::load(fInterval->fCg);
fCc = DstTraits<premul>::load(fInterval->fCb) + dc * t;
fDcDx = dc * fDx;
fZeroRamp = fIsVertical || (dc == 0).allTrue();
fZeroRamp = fIsVertical || all(dc == 0);
}
void init_average_props() {
fAdvX = SK_ScalarInfinity;
fZeroRamp = true;
fDcDx = 0;
fCc = Sk4f(0);
fCc = 0;
// TODO: precompute the average at interval setup time?
for (const auto* i = fFirstInterval; i <= fLastInterval; ++i) {
@ -376,10 +376,10 @@ private:
}
// Current interval properties.
Sk4f fDcDx; // dst color gradient (dc/dx)
Sk4f fCc; // current color, interpolated in dst
SkScalar fAdvX; // remaining interval advance in dst
bool fZeroRamp; // current interval color grad is 0
skvx::float4 fDcDx; // dst color gradient (dc/dx)
skvx::float4 fCc; // current color, interpolated in dst
SkScalar fAdvX; // remaining interval advance in dst
bool fZeroRamp; // current interval color grad is 0
const Sk4fGradientInterval* fFirstInterval;
const Sk4fGradientInterval* fLastInterval;

View File

@ -339,8 +339,8 @@ bool SkGradientShaderBase::onAppendStages(const SkStageRec& rec) const {
// See F and B below.
auto ctx = alloc->make<SkRasterPipeline_EvenlySpaced2StopGradientCtx>();
(Sk4f::Load(c_r.vec()) - Sk4f::Load(c_l.vec())).store(ctx->f);
( Sk4f::Load(c_l.vec())).store(ctx->b);
(skvx::float4::Load(c_r.vec()) - skvx::float4::Load(c_l.vec())).store(ctx->f);
( skvx::float4::Load(c_l.vec())).store(ctx->b);
ctx->interpolatedInPremul = premulGrad;
p->append(SkRasterPipeline::evenly_spaced_2_stop_gradient, ctx);
@ -705,11 +705,11 @@ static SkColor4f average_gradient_color(const SkColor4f colors[], const SkScalar
// the integral between the two endpoints is 0.5 * (ci + cj) * (pj - pi), which provides that
// intervals average color. The overall average color is thus the sum of each piece. The thing
// to keep in mind is that the provided gradient definition may implicitly use p=0 and p=1.
Sk4f blend(0.0f);
skvx::float4 blend(0.0f);
for (int i = 0; i < colorCount - 1; ++i) {
// Calculate the average color for the interval between pos(i) and pos(i+1)
Sk4f c0 = Sk4f::Load(&colors[i]);
Sk4f c1 = Sk4f::Load(&colors[i + 1]);
auto c0 = skvx::float4::Load(&colors[i]);
auto c1 = skvx::float4::Load(&colors[i + 1]);
// when pos == null, there are colorCount uniformly distributed stops, going from 0 to 1,
// so pos[i + 1] - pos[i] = 1/(colorCount-1)
@ -726,7 +726,7 @@ static SkColor4f average_gradient_color(const SkColor4f colors[], const SkScalar
if (p0 > 0.0f) {
// The first color is fixed between p = 0 to pos[0], so 0.5*(ci + cj)*(pj - pi)
// becomes 0.5*(c + c)*(pj - 0) = c * pj
Sk4f c = Sk4f::Load(&colors[0]);
auto c = skvx::float4::Load(&colors[0]);
blend += p0 * c;
}
}
@ -734,7 +734,7 @@ static SkColor4f average_gradient_color(const SkColor4f colors[], const SkScalar
if (p1 < 1.f) {
// The last color is fixed between pos[n-1] to p = 1, so 0.5*(ci + cj)*(pj - pi)
// becomes 0.5*(c + c)*(1 - pi) = c * (1 - pi)
Sk4f c = Sk4f::Load(&colors[colorCount - 1]);
auto c = skvx::float4::Load(&colors[colorCount - 1]);
blend += (1.f - p1) * c;
}
}

View File

@ -64,7 +64,7 @@ DEF_TEST(SkFloatToHalf_finite_ftz, r) {
alternate = std::signbit(f) ? 0x8000 : 0x0000;
}
uint16_t actual = SkFloatToHalf_finite_ftz(Sk4f{f})[0];
uint16_t actual = SkFloatToHalf_finite_ftz(skvx::float4{f})[0];
// _finite_ftz() may truncate instead of rounding, so it may be one too small.
REPORTER_ASSERT(r, actual == expected || actual == expected - 1 ||
actual == alternate || actual == alternate - 1);