float xfermodes (burn, dodge, softlight) in Sk8f, possibly using AVX.

Xfermode_ColorDodge_aa	10.3ms -> 7.85ms	0.76x
 Xfermode_SoftLight_aa	13.8ms -> 10.2ms	0.74x
 Xfermode_ColorBurn_aa	10.7ms -> 7.82ms	0.73x
    Xfermode_SoftLight	33.6ms -> 23.2ms	0.69x
   Xfermode_ColorDodge	  25ms -> 16.5ms	0.66x
    Xfermode_ColorBurn	26.1ms -> 16.6ms	0.63x

Ought to be no pixel diffs:
https://gold.skia.org/search2?issue=1432903002&unt=true&query=source_type%3Dgm&master=false

Incidental stuff:

I made the SkNx(T) constructors implicit to make writing math expressions simpler.
This allows us to write expressions like
  Sk4f v;
  ...
  v = v*4;
rather than
  Sk4f v;
  ...
  v = v * Sk4f(4);

As written it only works when the constant is on the right-hand side,
so expressions like `(Sk4f(1) - da)` have to stay for now.  I plan on
following up with a CL that lets those become `(1 - da)` too.

BUG=skia:4117
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

Review URL: https://codereview.chromium.org/1432903002
This commit is contained in:
mtklein 2015-11-11 11:39:09 -08:00 committed by Commit bot
parent 9be5ff6f98
commit 084db25d47
8 changed files with 196 additions and 82 deletions

View File

@ -57,7 +57,7 @@
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
],
'avx_sources': [
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
'<(skia_src_path)/opts/SkOpts_avx.cpp',
],
'avx2_sources': [
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',

View File

@ -31,7 +31,7 @@ class SkNi {
public:
SkNi() {}
SkNi(const SkNi<N/2, T>& lo, const SkNi<N/2, T>& hi) : fLo(lo), fHi(hi) {}
explicit SkNi(T val) : fLo(val), fHi(val) {}
SkNi(T val) : fLo(val), fHi(val) {}
static SkNi Load(const T vals[N]) {
return SkNi(SkNi<N/2,T>::Load(vals), SkNi<N/2,T>::Load(vals+N/2));
}
@ -85,7 +85,7 @@ template <int N>
class SkNf {
public:
SkNf() {}
explicit SkNf(float val) : fLo(val), fHi(val) {}
SkNf(float val) : fLo(val), fHi(val) {}
static SkNf Load(const float vals[N]) {
return SkNf(SkNf<N/2>::Load(vals), SkNf<N/2>::Load(vals+N/2));
}
@ -167,7 +167,7 @@ template <typename T>
class SkNi<1,T> {
public:
SkNi() {}
explicit SkNi(T val) : fVal(val) {}
SkNi(T val) : fVal(val) {}
static SkNi Load(const T vals[1]) { return SkNi(vals[0]); }
void store(T vals[1]) const { vals[0] = fVal; }
@ -205,7 +205,7 @@ template <>
class SkNf<1> {
public:
SkNf() {}
explicit SkNf(float val) : fVal(val) {}
SkNf(float val) : fVal(val) {}
static SkNf Load(const float vals[1]) { return SkNf(vals[0]); }
static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((float)bytes[0]); }
@ -274,7 +274,9 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); }
// Include platform specific specializations if available.
#ifndef SKNX_NO_SIMD
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
#include "../opts/SkNx_avx.h"
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#include "../opts/SkNx_sse.h"
#elif defined(SK_ARM_HAS_NEON)
#include "../opts/SkNx_neon.h"
@ -285,9 +287,10 @@ inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); }
typedef SkNf<2> Sk2f;
typedef SkNf<2> Sk2s;
typedef SkNf<4> Sk4f;
typedef SkNf<4> Sk4s;
typedef SkNf<8> Sk8f;
typedef SkNf<8> Sk8s;
typedef SkNi<8, uint16_t> Sk8h;
typedef SkNi<16, uint16_t> Sk16h;

View File

@ -84,7 +84,7 @@ namespace SkOpts {
void Init_ssse3();
void Init_sse41();
void Init_sse42() { SkDEBUGCODE( SkDebugf("sse 4.2 detected\n"); ) }
void Init_avx() { SkDEBUGCODE( SkDebugf("avx detected\n"); ) }
void Init_avx();
void Init_avx2() { SkDEBUGCODE( SkDebugf("avx2 detected\n"); ) }
void Init_neon();
//TODO: _dsp2, _armv7, _armv8, _x86, _x86_64, _sse42, ... ?

90
src/opts/SkNx_avx.h Normal file
View File

@ -0,0 +1,90 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkNx_avx_DEFINED
#define SkNx_avx_DEFINED
// This file may assume <= AVX, but must check SK_CPU_SSE_LEVEL for anything more recent.
// All the SSE specializations are still good ideas. We'll just add Sk8f.
#include "SkNx_sse.h"
namespace { // See SkNx.h
template <>
class SkNf<8> {
public:
SkNf(const __m256& vec) : fVec(vec) {}
SkNf() {}
SkNf(float val) : fVec(_mm256_set1_ps(val)) {}
static SkNf Load(const float vals[8]) { return _mm256_loadu_ps(vals); }
static SkNf FromBytes(const uint8_t bytes[8]) {
__m128i fix8 = _mm_loadl_epi64((const __m128i*)bytes),
fix16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),
lo32 = _mm_unpacklo_epi16(fix16, _mm_setzero_si128()),
hi32 = _mm_unpackhi_epi16(fix16, _mm_setzero_si128());
__m256i fix32 = _mm256_insertf128_si256(_mm256_castsi128_si256(lo32), hi32, 1);
return _mm256_cvtepi32_ps(fix32);
}
SkNf(float a, float b, float c, float d,
float e, float f, float g, float h) : fVec(_mm256_setr_ps(a,b,c,d,e,f,g,h)) {}
void store(float vals[8]) const { _mm256_storeu_ps(vals, fVec); }
void toBytes(uint8_t bytes[8]) const {
__m256i fix32 = _mm256_cvttps_epi32(fVec);
__m128i lo32 = _mm256_extractf128_si256(fix32, 0),
hi32 = _mm256_extractf128_si256(fix32, 1),
fix16 = _mm_packus_epi32(lo32, hi32),
fix8 = _mm_packus_epi16(fix16, fix16);
_mm_storel_epi64((__m128i*)bytes, fix8);
}
SkNf operator + (const SkNf& o) const { return _mm256_add_ps(fVec, o.fVec); }
SkNf operator - (const SkNf& o) const { return _mm256_sub_ps(fVec, o.fVec); }
SkNf operator * (const SkNf& o) const { return _mm256_mul_ps(fVec, o.fVec); }
SkNf operator / (const SkNf& o) const { return _mm256_div_ps(fVec, o.fVec); }
SkNf operator == (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_EQ_OQ); }
SkNf operator != (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_NEQ_OQ); }
SkNf operator < (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LT_OQ); }
SkNf operator > (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GT_OQ); }
SkNf operator <= (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_LE_OQ); }
SkNf operator >= (const SkNf& o) const { return _mm256_cmp_ps(fVec, o.fVec, _CMP_GE_OQ); }
static SkNf Min(const SkNf& l, const SkNf& r) { return _mm256_min_ps(l.fVec, r.fVec); }
static SkNf Max(const SkNf& l, const SkNf& r) { return _mm256_max_ps(l.fVec, r.fVec); }
SkNf sqrt() const { return _mm256_sqrt_ps (fVec); }
SkNf rsqrt0() const { return _mm256_rsqrt_ps(fVec); }
SkNf rsqrt1() const { return this->rsqrt0(); }
SkNf rsqrt2() const { return this->rsqrt1(); }
SkNf invert() const { return SkNf(1) / *this; }
SkNf approxInvert() const { return _mm256_rcp_ps(fVec); }
template <int k> float kth() const {
SkASSERT(0 <= k && k < 8);
union { __m256 v; float fs[8]; } pun = {fVec};
return pun.fs[k&7];
}
bool allTrue() const { return 0xff == _mm256_movemask_ps(fVec); }
bool anyTrue() const { return 0x00 != _mm256_movemask_ps(fVec); }
SkNf thenElse(const SkNf& t, const SkNf& e) const {
return _mm256_blendv_ps(e.fVec, t.fVec, fVec);
}
__m256 fVec;
};
} // namespace
#endif//SkNx_avx_DEFINED

View File

@ -38,7 +38,7 @@ public:
SkNf(float32x2_t vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}
SkNf(float val) : fVec(vdup_n_f32(val)) {}
static SkNf Load(const float vals[2]) { return vld1_f32(vals); }
SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }
@ -119,7 +119,7 @@ public:
SkNi(const int32x4_t& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {}
SkNi(int val) : fVec(vdupq_n_s32(val)) {}
static SkNi Load(const int vals[4]) { return vld1q_s32(vals); }
SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
@ -146,7 +146,7 @@ public:
SkNf(float32x4_t vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}
SkNf(float val) : fVec(vdupq_n_f32(val)) {}
static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }
static SkNf FromBytes(const uint8_t vals[4]) {
uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
@ -246,7 +246,7 @@ public:
SkNi(const uint16x8_t& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {}
SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {}
static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); }
SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
@ -283,7 +283,7 @@ public:
SkNi(const uint8x16_t& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {}
SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {}
static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); }
SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,

View File

@ -19,7 +19,7 @@ public:
SkNf(const __m128& vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {}
SkNf(float val) : fVec(_mm_set1_ps(val)) {}
static SkNf Load(const float vals[2]) {
return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
}
@ -68,7 +68,7 @@ public:
SkNi(const __m128i& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {}
SkNi(int val) : fVec(_mm_set1_epi32(val)) {}
static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*)vals); }
SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
@ -106,7 +106,7 @@ public:
SkNf(const __m128& vec) : fVec(vec) {}
SkNf() {}
explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
SkNf(float val) : fVec( _mm_set1_ps(val) ) {}
static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); }
static SkNf FromBytes(const uint8_t bytes[4]) {
@ -178,7 +178,7 @@ public:
SkNi(const __m128i& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m128i*)vals); }
SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a,b,c,d,0,0,0,0)) {}
@ -205,7 +205,7 @@ public:
SkNi(const __m128i& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m128i*)vals); }
SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a,b,c,d,e,f,g,h)) {}
@ -247,7 +247,7 @@ public:
SkNi(const __m128i& vec) : fVec(vec) {}
SkNi() {}
explicit SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m128i*)vals); }
SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
uint8_t e, uint8_t f, uint8_t g, uint8_t h,

17
src/opts/SkOpts_avx.cpp Normal file
View File

@ -0,0 +1,17 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkOpts.h"
#define SK_OPTS_NS sk_avx
#include "SkXfermode_opts.h"
namespace SkOpts {
void Init_avx() {
create_xfermode = sk_avx::create_xfermode;
}
}

View File

@ -109,71 +109,76 @@ XFERMODE(Lighten) {
}
#undef XFERMODE
// Some xfermodes use math like divide or sqrt that's best done in floats 1 pixel at a time.
#define XFERMODE(Name) static Sk4f SK_VECTORCALL Name(Sk4f d, Sk4f s)
// Some xfermodes use math like divide or sqrt that's best done in floats.
// We write it generically, then call it 1 or 2 pixels at a time (T == Sk4f or Sk8f).
#define XFERMODE(Name) struct Name { template <typename T> T operator()(const T&, const T&); }; \
template <typename T> T Name::operator()(const T& d, const T& s)
static_assert(SK_A32_SHIFT == 24, "");
static inline Sk4f a_rgb(const Sk4f& a, const Sk4f& rgb) {
static_assert(SK_A32_SHIFT == 24, "");
return a * Sk4f(0,0,0,1) + rgb * Sk4f(1,1,1,0);
}
static inline Sk4f alphas(const Sk4f& f) {
return SkNx_dup<SK_A32_SHIFT/8>(f);
static inline Sk8f a_rgb(const Sk8f& a, const Sk8f& rgb) {
// TODO: SkNx_blend<0,0,0,1,0,0,0,1>(a, rgb) to let us use _mm256_blend_ps?
return a * Sk8f(0,0,0,1,0,0,0,1) + rgb * Sk8f(1,1,1,0,1,1,1,0);
}
static inline Sk4f alphas(const Sk4f& f) { return SkNx_shuffle<3,3,3,3> (f); }
static inline Sk8f alphas(const Sk8f& f) { return SkNx_shuffle<3,3,3,3,7,7,7,7>(f); }
XFERMODE(ColorDodge) {
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
isa = T(1)-sa,
ida = T(1)-da;
auto srcover = s + d*isa,
dstover = d + s*ida,
otherwise = sa * Sk4f::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa;
otherwise = sa * T::Min(da, (d*sa)*(sa-s).approxInvert()) + s*ida + d*isa;
// Order matters here, preferring d==0 over s==sa.
auto colors = (d == Sk4f(0)).thenElse(dstover,
(s == sa).thenElse(srcover,
otherwise));
auto colors = (d == 0).thenElse(dstover,
(s == sa).thenElse(srcover,
otherwise));
return a_rgb(srcover, colors);
}
XFERMODE(ColorBurn) {
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
isa = T(1)-sa,
ida = T(1)-da;
auto srcover = s + d*isa,
dstover = d + s*ida,
otherwise = sa*(da-Sk4f::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa;
otherwise = sa*(da-T::Min(da, (da-d)*sa*s.approxInvert())) + s*ida + d*isa;
// Order matters here, preferring d==da over s==0.
auto colors = (d == da).thenElse(dstover,
(s == Sk4f(0)).thenElse(srcover,
otherwise));
auto colors = (d == da).thenElse(dstover,
(s == 0).thenElse(srcover,
otherwise));
return a_rgb(srcover, colors);
}
XFERMODE(SoftLight) {
auto sa = alphas(s),
da = alphas(d),
isa = Sk4f(1)-sa,
ida = Sk4f(1)-da;
isa = T(1)-sa,
ida = T(1)-da;
// Some common terms.
auto m = (da > Sk4f(0)).thenElse(d / da, Sk4f(0)),
s2 = Sk4f(2)*s,
m4 = Sk4f(4)*m;
auto m = (da > 0).thenElse(d / da, 0),
s2 = s*2,
m4 = m*4;
// The logic forks three ways:
// 1. dark src?
// 2. light src, dark dst?
// 3. light src, light dst?
auto darkSrc = d*(sa + (s2 - sa)*(Sk4f(1) - m)), // Used in case 1.
darkDst = (m4*m4 + m4)*(m - Sk4f(1)) + Sk4f(7)*m, // Used in case 2.
liteDst = m.sqrt() - m, // Used in case 3.
liteSrc = d*sa + da*(s2-sa)*(Sk4f(4)*d <= da).thenElse(darkDst, liteDst); // Case 2 or 3?
auto darkSrc = d*(sa + (s2 - sa)*(T(1) - m)), // Used in case 1.
darkDst = (m4*m4 + m4)*(m - 1) + m*7, // Used in case 2.
liteDst = m.sqrt() - m, // Used in case 3.
liteSrc = d*sa + da*(s2-sa)*(d*4 <= da).thenElse(darkDst, liteDst); // Case 2 or 3?
auto alpha = s + d*isa;
auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
auto colors = s*ida + d*isa + (s2 <= sa).thenElse(darkSrc, liteSrc); // Case 1 or 2/3?
return a_rgb(alpha, colors);
}
@ -240,53 +245,52 @@ private:
typedef SkProcCoeffXfermode INHERITED;
};
class Sk4fXfermode : public SkProcCoeffXfermode {
template <typename BlendFn>
class FloatXfermode : public SkProcCoeffXfermode {
public:
typedef Sk4f (SK_VECTORCALL *ProcF)(Sk4f, Sk4f);
Sk4fXfermode(const ProcCoeff& rec, SkXfermode::Mode mode, ProcF procf)
: INHERITED(rec, mode)
, fProcF(procf) {}
FloatXfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
: INHERITED(rec, mode) {}
void xfer32(SkPMColor dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
for (int i = 0; i < n; i++) {
dst[i] = aa ? this->xfer32(dst[i], src[i], aa[i])
: this->xfer32(dst[i], src[i]);
BlendFn blend;
while (n >= 2) {
auto d = Sk8f::FromBytes((const uint8_t*)dst) * (1.0f/255),
s = Sk8f::FromBytes((const uint8_t*)src) * (1.0f/255),
b = blend(d, s);
if (aa) {
auto a255 = Sk8f(aa[0],aa[0],aa[0],aa[0], aa[1],aa[1],aa[1],aa[1]);
(b*a255 + d*(Sk8f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
aa += 2;
} else {
(b * 255 + 0.5).toBytes((uint8_t*)dst);
}
dst += 2;
src += 2;
n -= 2;
}
if (n) {
auto d = Sk4f::FromBytes((const uint8_t*)dst) * (1.0f/255),
s = Sk4f::FromBytes((const uint8_t*)src) * (1.0f/255),
b = blend(d, s);
if (aa) {
auto a255 = Sk4f(aa[0],aa[0],aa[0],aa[0]);
(b*a255 + d*(Sk4f(255)-a255) + 0.5).toBytes((uint8_t*)dst);
aa++;
} else {
(b * 255 + 0.5).toBytes((uint8_t*)dst);
}
}
}
void xfer16(uint16_t dst[], const SkPMColor src[], int n, const SkAlpha aa[]) const override {
for (int i = 0; i < n; i++) {
SkPMColor dst32 = SkPixel16ToPixel32(dst[i]);
dst32 = aa ? this->xfer32(dst32, src[i], aa[i])
: this->xfer32(dst32, src[i]);
dst[i] = SkPixel32ToPixel16(dst32);
SkPMColor dst32 = SkPixel16ToPixel32(dst[i]); // Convert dst up to 8888.
this->xfer32(&dst32, src+i, 1, aa ? aa+i : nullptr); // Blend 1 pixel.
dst[i] = SkPixel32ToPixel16(dst32); // Repack dst to 565 and store.
}
}
private:
static Sk4f Load(SkPMColor c) {
return Sk4f::FromBytes((uint8_t*)&c) * Sk4f(1.0f/255);
}
static SkPMColor Round(const Sk4f& f) {
SkPMColor c;
(f * Sk4f(255) + Sk4f(0.5f)).toBytes((uint8_t*)&c);
return c;
}
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src) const {
return Round(fProcF(Load(dst), Load(src)));
}
inline SkPMColor xfer32(SkPMColor dst, SkPMColor src, SkAlpha aa) const {
Sk4f s(Load(src)),
d(Load(dst)),
b(fProcF(d,s));
// We do aa in full float precision before going back down to bytes, because we can!
Sk4f a = Sk4f(aa) * Sk4f(1.0f/255);
b = b*a + d*(Sk4f(1)-a);
return Round(b);
}
ProcF fProcF;
typedef SkProcCoeffXfermode INHERITED;
};
@ -323,7 +327,7 @@ static SkXfermode* create_xfermode(const ProcCoeff& rec, SkXfermode::Mode mode)
#undef CASE
#define CASE(Mode) \
case SkXfermode::k##Mode##_Mode: return new Sk4fXfermode(rec, mode, &Mode)
case SkXfermode::k##Mode##_Mode: return new FloatXfermode<Mode>(rec, mode)
CASE(ColorDodge);
CASE(ColorBurn);
CASE(SoftLight);