2015-03-20 13:33:02 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2015 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SkNx_DEFINED
|
|
|
|
#define SkNx_DEFINED
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
#define SKNX_NO_SIMDx // Remove the x to disable SIMD for all SkNx types.
|
|
|
|
|
|
|
|
|
|
|
|
#include "SkScalar.h"
|
|
|
|
#include "SkTypes.h"
|
|
|
|
#include <math.h>
|
|
|
|
#define REQUIRE(x) static_assert(x, #x)
|
|
|
|
|
2015-08-12 18:56:43 +00:00
|
|
|
// This file may be included multiple times by .cpp files with different flags, leading
|
|
|
|
// to different definitions. Usually that doesn't matter because it's all inlined, but
|
|
|
|
// in Debug modes the compilers may not inline everything. So wrap everything in an
|
|
|
|
// anonymous namespace to give each includer their own silo of this code (or the linker
|
|
|
|
// will probably pick one randomly for us, which is rarely correct).
|
|
|
|
namespace {
|
|
|
|
|
2015-04-14 18:49:14 +00:00
|
|
|
// The default implementations just fall back on a pair of size N/2.
|
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <int N, typename T>
|
2015-11-20 21:53:19 +00:00
|
|
|
class SkNx {
|
2015-04-14 21:02:52 +00:00
|
|
|
public:
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx() {}
|
|
|
|
SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {}
|
|
|
|
SkNx(T val) : fLo(val), fHi(val) {}
|
|
|
|
static SkNx Load(const T vals[N]) {
|
|
|
|
return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2));
|
2015-04-14 21:02:52 +00:00
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx(T a, T b) : fLo(a), fHi(b) { REQUIRE(N==2); }
|
|
|
|
SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
|
|
|
|
SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
|
|
|
|
SkNx(T a, T b, T c, T d, T e, T f, T g, T h,
|
2015-05-12 13:11:21 +00:00
|
|
|
T i, T j, T k, T l, T m, T n, T o, T p)
|
|
|
|
: fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
|
|
|
void store(T vals[N]) const {
|
|
|
|
fLo.store(vals);
|
|
|
|
fHi.store(vals+N/2);
|
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx saturatedAdd(const SkNx& o) const {
|
|
|
|
return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi));
|
2015-05-12 22:48:09 +00:00
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi); }
|
|
|
|
SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi); }
|
|
|
|
SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); }
|
|
|
|
SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx Min(const SkNx& a, const SkNx& b) {
|
|
|
|
return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.fHi));
|
2015-05-15 00:53:04 +00:00
|
|
|
}
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.fHi); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
|
|
|
template <int k> T kth() const {
|
|
|
|
SkASSERT(0 <= k && k < N);
|
|
|
|
return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
|
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
|
|
|
|
bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx thenElse(const SkNx& t, const SkNx& e) const {
|
|
|
|
return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi));
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
}
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
2015-04-14 21:02:52 +00:00
|
|
|
REQUIRE(0 == (N & (N-1)));
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx<N/2, T> fLo, fHi;
|
2015-04-14 21:02:52 +00:00
|
|
|
};
|
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
template <int N>
|
2015-11-20 21:53:19 +00:00
|
|
|
class SkNx<N,float> {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
public:
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx() {}
|
|
|
|
SkNx(float val) : fLo(val), fHi(val) {}
|
|
|
|
static SkNx Load(const float vals[N]) {
|
|
|
|
return SkNx(SkNx<N/2, float>::Load(vals), SkNx<N/2, float>::Load(vals+N/2));
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
2015-08-31 21:39:59 +00:00
|
|
|
// FromBytes() and toBytes() specializations may assume their argument is N-byte aligned.
|
|
|
|
// E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned pointer.
|
|
|
|
// Converts [0,255] bytes to [0.0, 255.0] floats.
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx FromBytes(const uint8_t bytes[N]) {
|
|
|
|
return SkNx(SkNx<N/2, float>::FromBytes(bytes), SkNx<N/2, float>::FromBytes(bytes+N/2));
|
2015-08-31 21:39:59 +00:00
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx(float a, float b) : fLo(a), fHi(b) { REQUIRE(N==2); }
|
|
|
|
SkNx(float a, float b, float c, float d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
|
|
|
|
SkNx(float a, float b, float c, float d, float e, float f, float g, float h)
|
2015-11-09 16:33:53 +00:00
|
|
|
: fLo(a,b,c,d)
|
|
|
|
, fHi(e,f,g,h) { REQUIRE(N==8); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
void store(float vals[N]) const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
fLo.store(vals);
|
|
|
|
fHi.store(vals+N/2);
|
|
|
|
}
|
2015-08-31 21:39:59 +00:00
|
|
|
// Please see note on FromBytes().
|
2015-09-01 13:29:45 +00:00
|
|
|
// Clamps to [0.0,255.0] floats and truncates to [0,255] bytes.
|
2015-08-31 21:39:59 +00:00
|
|
|
void toBytes(uint8_t bytes[N]) const {
|
|
|
|
fLo.toBytes(bytes);
|
|
|
|
fHi.toBytes(bytes+N/2);
|
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-12-01 15:10:21 +00:00
|
|
|
// Some implementations can do this faster.
|
|
|
|
static void ToBytes(uint8_t bytes[4*N],
|
|
|
|
const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
|
|
|
|
a.toBytes(bytes+0*N);
|
|
|
|
b.toBytes(bytes+1*N);
|
|
|
|
c.toBytes(bytes+2*N);
|
|
|
|
d.toBytes(bytes+3*N);
|
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi); }
|
|
|
|
SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi); }
|
|
|
|
SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi); }
|
|
|
|
SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.fHi); }
|
|
|
|
SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.fHi); }
|
|
|
|
SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.fHi); }
|
|
|
|
SkNx operator > (const SkNx& o) const { return SkNx(fLo > o.fLo, fHi > o.fHi); }
|
|
|
|
SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.fHi); }
|
|
|
|
SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.fHi); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx Min(const SkNx& l, const SkNx& r) {
|
|
|
|
return SkNx(SkNx<N/2, float>::Min(l.fLo, r.fLo), SkNx<N/2, float>::Min(l.fHi, r.fHi));
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx Max(const SkNx& l, const SkNx& r) {
|
|
|
|
return SkNx(SkNx<N/2, float>::Max(l.fLo, r.fLo), SkNx<N/2, float>::Max(l.fHi, r.fHi));
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx sqrt() const { return SkNx(fLo. sqrt(), fHi. sqrt()); }
|
2015-04-27 21:22:32 +00:00
|
|
|
|
|
|
|
// Generally, increasing precision, increasing cost.
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); }
|
|
|
|
SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); }
|
|
|
|
SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx invert() const { return SkNx(fLo. invert(), fHi. invert()); }
|
|
|
|
SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert()); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
template <int k> float kth() const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkASSERT(0 <= k && k < N);
|
2015-04-03 13:16:13 +00:00
|
|
|
return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
|
|
|
|
bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx thenElse(const SkNx& t, const SkNx& e) const {
|
|
|
|
return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi));
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
}
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
REQUIRE(0 == (N & (N-1)));
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx(const SkNx<N/2, float>& lo, const SkNx<N/2, float>& hi) : fLo(lo), fHi(hi) {}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx<N/2, float> fLo, fHi;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2015-04-14 18:49:14 +00:00
|
|
|
// Bottom out the default implementations with scalars when nothing's been specialized.
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <typename T>
|
2015-11-20 21:53:19 +00:00
|
|
|
class SkNx<1,T> {
|
2015-04-14 21:02:52 +00:00
|
|
|
public:
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx() {}
|
|
|
|
SkNx(T val) : fVal(val) {}
|
|
|
|
static SkNx Load(const T vals[1]) { return SkNx(vals[0]); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
|
|
|
void store(T vals[1]) const { vals[0] = fVal; }
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx saturatedAdd(const SkNx& o) const {
|
2015-05-12 22:48:09 +00:00
|
|
|
SkASSERT((T)(~0) > 0); // TODO: support signed T
|
|
|
|
T sum = fVal + o.fVal;
|
2015-11-20 21:53:19 +00:00
|
|
|
return SkNx(sum < fVal ? (T)(~0) : sum);
|
2015-05-12 22:48:09 +00:00
|
|
|
}
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); }
|
|
|
|
SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); }
|
|
|
|
SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator << (int bits) const { return SkNx(fVal << bits); }
|
|
|
|
SkNx operator >> (int bits) const { return SkNx(fVal >> bits); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b.fVal)); }
|
|
|
|
SkNx operator <(const SkNx& o) const { return SkNx(fVal < o.fVal); }
|
2015-05-15 00:53:04 +00:00
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <int k> T kth() const {
|
|
|
|
SkASSERT(0 == k);
|
|
|
|
return fVal;
|
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fVal; }
|
|
|
|
bool anyTrue() const { return fVal; }
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal ? t : e; }
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
2015-04-14 21:02:52 +00:00
|
|
|
T fVal;
|
|
|
|
};
|
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
template <>
|
2015-11-20 21:53:19 +00:00
|
|
|
class SkNx<1,float> {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
public:
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx() {}
|
|
|
|
SkNx(float val) : fVal(val) {}
|
|
|
|
static SkNx Load(const float vals[1]) { return SkNx(vals[0]); }
|
|
|
|
static SkNx FromBytes(const uint8_t bytes[1]) { return SkNx((float)bytes[0]); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
void store(float vals[1]) const { vals[0] = fVal; }
|
|
|
|
void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, 255.0f)); }
|
2015-04-27 19:08:01 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); }
|
|
|
|
SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); }
|
|
|
|
SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); }
|
|
|
|
SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); }
|
|
|
|
SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); }
|
|
|
|
SkNx operator < (const SkNx& o) const { return SkNx(fVal < o.fVal); }
|
|
|
|
SkNx operator > (const SkNx& o) const { return SkNx(fVal > o.fVal); }
|
|
|
|
SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); }
|
|
|
|
SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
static SkNx Min(const SkNx& l, const SkNx& r) { return SkNx(SkTMin(l.fVal, r.fVal)); }
|
|
|
|
static SkNx Max(const SkNx& l, const SkNx& r) { return SkNx(SkTMax(l.fVal, r.fVal)); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx sqrt() const { return SkNx(sqrtf(fVal)); }
|
|
|
|
SkNx rsqrt0() const { return SkNx(1.0f / sqrtf(fVal)); }
|
|
|
|
SkNx rsqrt1() const { return this->rsqrt0(); }
|
|
|
|
SkNx rsqrt2() const { return this->rsqrt1(); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx invert() const { return SkNx(1.0f / fVal); }
|
|
|
|
SkNx approxInvert() const { return this->invert(); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
template <int k> float kth() const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkASSERT(k == 0);
|
|
|
|
return fVal;
|
|
|
|
}
|
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
bool allTrue() const { return this->pun() != 0; }
|
|
|
|
bool anyTrue() const { return this->pun() != 0; }
|
2015-11-20 21:53:19 +00:00
|
|
|
SkNx thenElse(const SkNx& t, const SkNx& e) const { return this->pun() ? t : e; }
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
2015-11-09 16:33:53 +00:00
|
|
|
uint32_t pun() const {
|
|
|
|
union { float f; uint32_t i; } pun = { fVal };
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
return pun.i;
|
|
|
|
}
|
|
|
|
|
2015-11-09 16:33:53 +00:00
|
|
|
float fVal;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
};
|
|
|
|
|
2015-09-10 21:16:07 +00:00
|
|
|
// This default implementation can be specialized by ../opts/SkNx_foo.h
|
|
|
|
// if there's a better platform-specific shuffle strategy.
|
2015-11-20 21:53:19 +00:00
|
|
|
template <typename Nx, int... Ix>
|
|
|
|
inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()... ); }
|
2015-09-10 21:16:07 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
// This generic shuffle can be called with 1 or N indices:
|
2015-09-10 21:16:07 +00:00
|
|
|
// Sk4f f(a,b,c,d);
|
|
|
|
// SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d)
|
|
|
|
// SkNx_shuffle<2,1,0,3>(f); // ~~~> Sk4f(c,b,a,d)
|
2015-11-20 21:53:19 +00:00
|
|
|
template <int... Ix, typename Nx>
|
|
|
|
inline Nx SkNx_shuffle(const Nx& src) { return SkNx_shuffle_impl<Nx, Ix...>(src); }
|
2015-09-10 21:16:07 +00:00
|
|
|
|
|
|
|
// A reminder alias that shuffles can be used to duplicate a single index across a vector.
|
2015-11-20 21:53:19 +00:00
|
|
|
template <int Ix, typename Nx>
|
|
|
|
inline Nx SkNx_dup(const Nx& src) { return SkNx_shuffle<Ix>(src); }
|
|
|
|
|
|
|
|
// This is a poor-man's std::make_index_sequence from C++14.
|
|
|
|
// I'd implement it fully, but it hurts my head.
|
|
|
|
template <int...> struct SkIntSequence {};
|
|
|
|
template <int N> struct MakeSkIntSequence;
|
|
|
|
template <> struct MakeSkIntSequence< 1> : SkIntSequence<0 >{};
|
|
|
|
template <> struct MakeSkIntSequence< 2> : SkIntSequence<0,1 >{};
|
|
|
|
template <> struct MakeSkIntSequence< 4> : SkIntSequence<0,1,2,3 >{};
|
|
|
|
template <> struct MakeSkIntSequence< 8> : SkIntSequence<0,1,2,3,4,5,6,7 >{};
|
|
|
|
template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15>{};
|
|
|
|
|
|
|
|
// This is the default/fallback implementation for SkNx_cast. Best to specialize SkNx_cast!
|
|
|
|
template <typename D, typename S, int N, int... Ix>
|
|
|
|
SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) {
|
|
|
|
return SkNx<N,D>( (D)src.template kth<Ix>()... );
|
|
|
|
}
|
|
|
|
|
|
|
|
// This is a generic cast between two SkNx with the same number of elements N. E.g.
|
|
|
|
// Sk4b bs = ...; // Load 4 bytes.
|
|
|
|
// Sk4f fs = SkNx_cast<float>(bs); // (This will replace SkNf::FromBytes() one day.)
|
|
|
|
// Sk4i is = SkNx_cast<int>(fs); // Cast each float to int.
|
|
|
|
// This can be specialized in ../opts/SkNx_foo.h if there's a better platform-specific cast.
|
|
|
|
template <typename D, typename S, int N>
|
|
|
|
SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) {
|
|
|
|
return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>());
|
|
|
|
}
|
2015-09-10 21:16:07 +00:00
|
|
|
|
2015-08-12 18:56:43 +00:00
|
|
|
} // namespace
|
|
|
|
|
2015-09-10 21:16:07 +00:00
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
// Include platform specific specializations if available.
|
|
|
|
#ifndef SKNX_NO_SIMD
|
2015-11-11 19:39:09 +00:00
|
|
|
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX
|
|
|
|
#include "../opts/SkNx_avx.h"
|
|
|
|
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
#include "../opts/SkNx_sse.h"
|
|
|
|
#elif defined(SK_ARM_HAS_NEON)
|
|
|
|
#include "../opts/SkNx_neon.h"
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef REQUIRE
|
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
typedef SkNx<2, float> Sk2f;
|
|
|
|
typedef SkNx<2, float> Sk2s;
|
|
|
|
typedef SkNx<4, float> Sk4f;
|
|
|
|
typedef SkNx<4, float> Sk4s;
|
|
|
|
typedef SkNx<8, float> Sk8f;
|
|
|
|
typedef SkNx<8, float> Sk8s;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-11-20 21:53:19 +00:00
|
|
|
typedef SkNx<8, uint16_t> Sk8h;
|
|
|
|
typedef SkNx<16, uint16_t> Sk16h;
|
|
|
|
typedef SkNx<16, uint8_t> Sk16b;
|
|
|
|
|
|
|
|
typedef SkNx<4, int> Sk4i;
|
2015-04-27 19:08:01 +00:00
|
|
|
|
2015-03-20 13:33:02 +00:00
|
|
|
#endif//SkNx_DEFINED
|