2015-03-20 13:33:02 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2015 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SkNx_DEFINED
|
|
|
|
#define SkNx_DEFINED
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
#define SKNX_NO_SIMDx // Remove the x to disable SIMD for all SkNx types.
|
|
|
|
|
|
|
|
|
|
|
|
#include "SkScalar.h"
|
|
|
|
#include "SkTypes.h"
|
|
|
|
#include <math.h>
|
|
|
|
#define REQUIRE(x) static_assert(x, #x)
|
|
|
|
|
2015-08-12 18:56:43 +00:00
|
|
|
// This file may be included multiple times by .cpp files with different flags, leading
|
|
|
|
// to different definitions. Usually that doesn't matter because it's all inlined, but
|
|
|
|
// in Debug modes the compilers may not inline everything. So wrap everything in an
|
|
|
|
// anonymous namespace to give each includer their own silo of this code (or the linker
|
|
|
|
// will probably pick one randomly for us, which is rarely correct).
|
|
|
|
namespace {
|
|
|
|
|
2015-04-14 18:49:14 +00:00
|
|
|
// The default implementations just fall back on a pair of size N/2.
|
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <int N, typename T>
|
|
|
|
class SkNi {
|
|
|
|
public:
|
|
|
|
SkNi() {}
|
2015-04-27 19:08:01 +00:00
|
|
|
SkNi(const SkNi<N/2, T>& lo, const SkNi<N/2, T>& hi) : fLo(lo), fHi(hi) {}
|
2015-04-14 21:02:52 +00:00
|
|
|
explicit SkNi(T val) : fLo(val), fHi(val) {}
|
|
|
|
static SkNi Load(const T vals[N]) {
|
|
|
|
return SkNi(SkNi<N/2,T>::Load(vals), SkNi<N/2,T>::Load(vals+N/2));
|
|
|
|
}
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
SkNi(T a, T b) : fLo(a), fHi(b) { REQUIRE(N==2); }
|
|
|
|
SkNi(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
|
|
|
|
SkNi(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
|
|
|
|
SkNi(T a, T b, T c, T d, T e, T f, T g, T h,
|
|
|
|
T i, T j, T k, T l, T m, T n, T o, T p)
|
|
|
|
: fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
|
|
|
void store(T vals[N]) const {
|
|
|
|
fLo.store(vals);
|
|
|
|
fHi.store(vals+N/2);
|
|
|
|
}
|
|
|
|
|
2015-05-12 22:48:09 +00:00
|
|
|
SkNi saturatedAdd(const SkNi& o) const {
|
|
|
|
return SkNi(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi));
|
|
|
|
}
|
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
SkNi operator + (const SkNi& o) const { return SkNi(fLo + o.fLo, fHi + o.fHi); }
|
|
|
|
SkNi operator - (const SkNi& o) const { return SkNi(fLo - o.fLo, fHi - o.fHi); }
|
|
|
|
SkNi operator * (const SkNi& o) const { return SkNi(fLo * o.fLo, fHi * o.fHi); }
|
|
|
|
|
|
|
|
SkNi operator << (int bits) const { return SkNi(fLo << bits, fHi << bits); }
|
|
|
|
SkNi operator >> (int bits) const { return SkNi(fLo >> bits, fHi >> bits); }
|
|
|
|
|
2015-05-15 00:53:04 +00:00
|
|
|
static SkNi Min(const SkNi& a, const SkNi& b) {
|
|
|
|
return SkNi(SkNi<N/2, T>::Min(a.fLo, b.fLo), SkNi<N/2, T>::Min(a.fHi, b.fHi));
|
|
|
|
}
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNi operator < (const SkNi& o) const { return SkNi(fLo < o.fLo, fHi < o.fHi); }
|
2015-04-14 21:02:52 +00:00
|
|
|
|
|
|
|
template <int k> T kth() const {
|
|
|
|
SkASSERT(0 <= k && k < N);
|
|
|
|
return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
|
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
|
|
|
|
bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
|
|
|
|
SkNi thenElse(const SkNi& t, const SkNi& e) const {
|
|
|
|
return SkNi(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi));
|
|
|
|
}
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
2015-04-14 21:02:52 +00:00
|
|
|
REQUIRE(0 == (N & (N-1)));
|
|
|
|
|
|
|
|
SkNi<N/2, T> fLo, fHi;
|
|
|
|
};
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
template <int N, typename T>
|
|
|
|
class SkNf {
|
2015-04-27 19:08:01 +00:00
|
|
|
static int32_t MyNi(float);
|
|
|
|
static int64_t MyNi(double);
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
typedef decltype(MyNi(T())) I;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
public:
|
|
|
|
SkNf() {}
|
2015-04-14 18:49:14 +00:00
|
|
|
explicit SkNf(T val) : fLo(val), fHi(val) {}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
static SkNf Load(const T vals[N]) {
|
|
|
|
return SkNf(SkNf<N/2,T>::Load(vals), SkNf<N/2,T>::Load(vals+N/2));
|
|
|
|
}
|
2015-08-31 21:39:59 +00:00
|
|
|
// FromBytes() and toBytes() specializations may assume their argument is N-byte aligned.
|
|
|
|
// E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned pointer.
|
|
|
|
// Converts [0,255] bytes to [0.0, 255.0] floats.
|
|
|
|
static SkNf FromBytes(const uint8_t bytes[N]) {
|
|
|
|
return SkNf(SkNf<N/2,T>::FromBytes(bytes), SkNf<N/2,T>::FromBytes(bytes+N/2));
|
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
SkNf(T a, T b) : fLo(a), fHi(b) { REQUIRE(N==2); }
|
|
|
|
SkNf(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4); }
|
|
|
|
SkNf(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) { REQUIRE(N==8); }
|
|
|
|
|
|
|
|
void store(T vals[N]) const {
|
|
|
|
fLo.store(vals);
|
|
|
|
fHi.store(vals+N/2);
|
|
|
|
}
|
2015-08-31 21:39:59 +00:00
|
|
|
// Please see note on FromBytes().
|
2015-09-01 13:29:45 +00:00
|
|
|
// Clamps to [0.0,255.0] floats and truncates to [0,255] bytes.
|
2015-08-31 21:39:59 +00:00
|
|
|
void toBytes(uint8_t bytes[N]) const {
|
|
|
|
fLo.toBytes(bytes);
|
|
|
|
fHi.toBytes(bytes+N/2);
|
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNi<N,I> castTrunc() const { return SkNi<N,I>(fLo.castTrunc(), fHi.castTrunc()); }
|
2015-04-27 19:08:01 +00:00
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkNf operator + (const SkNf& o) const { return SkNf(fLo + o.fLo, fHi + o.fHi); }
|
|
|
|
SkNf operator - (const SkNf& o) const { return SkNf(fLo - o.fLo, fHi - o.fHi); }
|
|
|
|
SkNf operator * (const SkNf& o) const { return SkNf(fLo * o.fLo, fHi * o.fHi); }
|
|
|
|
SkNf operator / (const SkNf& o) const { return SkNf(fLo / o.fLo, fHi / o.fHi); }
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNf operator == (const SkNf& o) const { return SkNf(fLo == o.fLo, fHi == o.fHi); }
|
|
|
|
SkNf operator != (const SkNf& o) const { return SkNf(fLo != o.fLo, fHi != o.fHi); }
|
|
|
|
SkNf operator < (const SkNf& o) const { return SkNf(fLo < o.fLo, fHi < o.fHi); }
|
|
|
|
SkNf operator > (const SkNf& o) const { return SkNf(fLo > o.fLo, fHi > o.fHi); }
|
|
|
|
SkNf operator <= (const SkNf& o) const { return SkNf(fLo <= o.fLo, fHi <= o.fHi); }
|
|
|
|
SkNf operator >= (const SkNf& o) const { return SkNf(fLo >= o.fLo, fHi >= o.fHi); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
static SkNf Min(const SkNf& l, const SkNf& r) {
|
|
|
|
return SkNf(SkNf<N/2,T>::Min(l.fLo, r.fLo), SkNf<N/2,T>::Min(l.fHi, r.fHi));
|
|
|
|
}
|
|
|
|
static SkNf Max(const SkNf& l, const SkNf& r) {
|
|
|
|
return SkNf(SkNf<N/2,T>::Max(l.fLo, r.fLo), SkNf<N/2,T>::Max(l.fHi, r.fHi));
|
|
|
|
}
|
|
|
|
|
|
|
|
SkNf sqrt() const { return SkNf(fLo. sqrt(), fHi. sqrt()); }
|
2015-04-27 21:22:32 +00:00
|
|
|
|
|
|
|
// Generally, increasing precision, increasing cost.
|
|
|
|
SkNf rsqrt0() const { return SkNf(fLo.rsqrt0(), fHi.rsqrt0()); }
|
|
|
|
SkNf rsqrt1() const { return SkNf(fLo.rsqrt1(), fHi.rsqrt1()); }
|
|
|
|
SkNf rsqrt2() const { return SkNf(fLo.rsqrt2(), fHi.rsqrt2()); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
SkNf invert() const { return SkNf(fLo. invert(), fHi. invert()); }
|
|
|
|
SkNf approxInvert() const { return SkNf(fLo.approxInvert(), fHi.approxInvert()); }
|
|
|
|
|
2015-04-03 13:16:13 +00:00
|
|
|
template <int k> T kth() const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkASSERT(0 <= k && k < N);
|
2015-04-03 13:16:13 +00:00
|
|
|
return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>();
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
|
|
|
|
bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
|
|
|
|
SkNf thenElse(const SkNf& t, const SkNf& e) const {
|
2015-07-14 12:23:50 +00:00
|
|
|
return SkNf(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi));
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
}
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
REQUIRE(0 == (N & (N-1)));
|
|
|
|
SkNf(const SkNf<N/2, T>& lo, const SkNf<N/2, T>& hi) : fLo(lo), fHi(hi) {}
|
|
|
|
|
|
|
|
SkNf<N/2, T> fLo, fHi;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2015-04-14 18:49:14 +00:00
|
|
|
// Bottom out the default implementations with scalars when nothing's been specialized.
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <typename T>
|
|
|
|
class SkNi<1,T> {
|
|
|
|
public:
|
|
|
|
SkNi() {}
|
|
|
|
explicit SkNi(T val) : fVal(val) {}
|
|
|
|
static SkNi Load(const T vals[1]) { return SkNi(vals[0]); }
|
|
|
|
|
|
|
|
void store(T vals[1]) const { vals[0] = fVal; }
|
|
|
|
|
2015-05-12 22:48:09 +00:00
|
|
|
SkNi saturatedAdd(const SkNi& o) const {
|
|
|
|
SkASSERT((T)(~0) > 0); // TODO: support signed T
|
|
|
|
T sum = fVal + o.fVal;
|
2015-06-15 17:58:42 +00:00
|
|
|
return SkNi(sum < fVal ? (T)(~0) : sum);
|
2015-05-12 22:48:09 +00:00
|
|
|
}
|
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
SkNi operator + (const SkNi& o) const { return SkNi(fVal + o.fVal); }
|
|
|
|
SkNi operator - (const SkNi& o) const { return SkNi(fVal - o.fVal); }
|
|
|
|
SkNi operator * (const SkNi& o) const { return SkNi(fVal * o.fVal); }
|
|
|
|
|
|
|
|
SkNi operator << (int bits) const { return SkNi(fVal << bits); }
|
|
|
|
SkNi operator >> (int bits) const { return SkNi(fVal >> bits); }
|
|
|
|
|
2015-05-15 00:53:04 +00:00
|
|
|
static SkNi Min(const SkNi& a, const SkNi& b) { return SkNi(SkTMin(a.fVal, b.fVal)); }
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNi operator <(const SkNi& o) const { return SkNi(fVal < o.fVal); }
|
2015-05-15 00:53:04 +00:00
|
|
|
|
2015-04-14 21:02:52 +00:00
|
|
|
template <int k> T kth() const {
|
|
|
|
SkASSERT(0 == k);
|
|
|
|
return fVal;
|
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return fVal; }
|
|
|
|
bool anyTrue() const { return fVal; }
|
|
|
|
SkNi thenElse(const SkNi& t, const SkNi& e) const { return fVal ? t : e; }
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
2015-04-14 21:02:52 +00:00
|
|
|
T fVal;
|
|
|
|
};
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
template <typename T>
|
|
|
|
class SkNf<1,T> {
|
2015-04-27 19:08:01 +00:00
|
|
|
static int32_t MyNi(float);
|
|
|
|
static int64_t MyNi(double);
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
typedef decltype(MyNi(T())) I;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
public:
|
|
|
|
SkNf() {}
|
2015-04-14 18:49:14 +00:00
|
|
|
explicit SkNf(T val) : fVal(val) {}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
static SkNf Load(const T vals[1]) { return SkNf(vals[0]); }
|
2015-08-31 21:39:59 +00:00
|
|
|
static SkNf FromBytes(const uint8_t bytes[1]) { return SkNf((T)bytes[0]); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
void store(T vals[1]) const { vals[0] = fVal; }
|
2015-09-01 13:29:45 +00:00
|
|
|
void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, (T)255.0)); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNi<1,I> castTrunc() const { return SkNi<1,I>(fVal); }
|
2015-04-27 19:08:01 +00:00
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkNf operator + (const SkNf& o) const { return SkNf(fVal + o.fVal); }
|
|
|
|
SkNf operator - (const SkNf& o) const { return SkNf(fVal - o.fVal); }
|
|
|
|
SkNf operator * (const SkNf& o) const { return SkNf(fVal * o.fVal); }
|
|
|
|
SkNf operator / (const SkNf& o) const { return SkNf(fVal / o.fVal); }
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
SkNf operator == (const SkNf& o) const { return SkNf(fVal == o.fVal); }
|
|
|
|
SkNf operator != (const SkNf& o) const { return SkNf(fVal != o.fVal); }
|
|
|
|
SkNf operator < (const SkNf& o) const { return SkNf(fVal < o.fVal); }
|
|
|
|
SkNf operator > (const SkNf& o) const { return SkNf(fVal > o.fVal); }
|
|
|
|
SkNf operator <= (const SkNf& o) const { return SkNf(fVal <= o.fVal); }
|
|
|
|
SkNf operator >= (const SkNf& o) const { return SkNf(fVal >= o.fVal); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
static SkNf Min(const SkNf& l, const SkNf& r) { return SkNf(SkTMin(l.fVal, r.fVal)); }
|
|
|
|
static SkNf Max(const SkNf& l, const SkNf& r) { return SkNf(SkTMax(l.fVal, r.fVal)); }
|
|
|
|
|
2015-04-14 18:49:14 +00:00
|
|
|
SkNf sqrt() const { return SkNf(Sqrt(fVal)); }
|
2015-04-27 21:22:32 +00:00
|
|
|
SkNf rsqrt0() const { return SkNf((T)1 / Sqrt(fVal)); }
|
|
|
|
SkNf rsqrt1() const { return this->rsqrt0(); }
|
|
|
|
SkNf rsqrt2() const { return this->rsqrt1(); }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
|
|
|
SkNf invert() const { return SkNf((T)1 / fVal); }
|
|
|
|
SkNf approxInvert() const { return this->invert(); }
|
|
|
|
|
2015-04-03 13:16:13 +00:00
|
|
|
template <int k> T kth() const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkASSERT(k == 0);
|
|
|
|
return fVal;
|
|
|
|
}
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
bool allTrue() const { return this->pun(); }
|
|
|
|
bool anyTrue() const { return this->pun(); }
|
|
|
|
SkNf thenElse(const SkNf& t, const SkNf& e) const { return this->pun() ? t : e; }
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
protected:
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
// We do double sqrts natively, or via floats for any other type.
|
|
|
|
template <typename U>
|
|
|
|
static U Sqrt(U val) { return (U) ::sqrtf((float)val); }
|
|
|
|
static double Sqrt(double val) { return ::sqrt ( val); }
|
|
|
|
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
I pun() const {
|
|
|
|
union { T f; I i; } pun = { fVal };
|
|
|
|
return pun.i;
|
|
|
|
}
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
T fVal;
|
|
|
|
};
|
|
|
|
|
2015-09-10 21:16:07 +00:00
|
|
|
// This default implementation can be specialized by ../opts/SkNx_foo.h
|
|
|
|
// if there's a better platform-specific shuffle strategy.
|
|
|
|
template <typename SkNx, int... Ix>
|
|
|
|
inline SkNx SkNx_shuffle_impl(const SkNx& src) { return SkNx( src.template kth<Ix>()... ); }
|
|
|
|
|
|
|
|
// This generic shuffle can be called on either SkNi or SkNf with 1 or N indices:
|
|
|
|
// Sk4f f(a,b,c,d);
|
|
|
|
// SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d)
|
|
|
|
// SkNx_shuffle<2,1,0,3>(f); // ~~~> Sk4f(c,b,a,d)
|
|
|
|
template <int... Ix, typename SkNx>
|
|
|
|
inline SkNx SkNx_shuffle(const SkNx& src) { return SkNx_shuffle_impl<SkNx, Ix...>(src); }
|
|
|
|
|
|
|
|
// A reminder alias that shuffles can be used to duplicate a single index across a vector.
|
|
|
|
template <int Ix, typename SkNx>
|
|
|
|
inline SkNx SkNx_dup(const SkNx& src) { return SkNx_shuffle<Ix>(src); }
|
|
|
|
|
2015-08-12 18:56:43 +00:00
|
|
|
} // namespace
|
|
|
|
|
2015-09-10 21:16:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
// Include platform specific specializations if available.
|
|
|
|
#ifndef SKNX_NO_SIMD
|
|
|
|
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
|
|
|
#include "../opts/SkNx_sse.h"
|
|
|
|
#elif defined(SK_ARM_HAS_NEON)
|
|
|
|
#include "../opts/SkNx_neon.h"
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef REQUIRE
|
|
|
|
|
|
|
|
typedef SkNf<2, float> Sk2f;
|
|
|
|
typedef SkNf<2, double> Sk2d;
|
|
|
|
typedef SkNf<2, SkScalar> Sk2s;
|
|
|
|
|
|
|
|
typedef SkNf<4, float> Sk4f;
|
|
|
|
typedef SkNf<4, double> Sk4d;
|
|
|
|
typedef SkNf<4, SkScalar> Sk4s;
|
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
typedef SkNi<4, uint16_t> Sk4h;
|
|
|
|
typedef SkNi<8, uint16_t> Sk8h;
|
|
|
|
typedef SkNi<16, uint16_t> Sk16h;
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2015-05-12 13:11:21 +00:00
|
|
|
typedef SkNi<16, uint8_t> Sk16b;
|
|
|
|
|
|
|
|
typedef SkNi<4, int32_t> Sk4i;
|
|
|
|
typedef SkNi<4, uint32_t> Sk4u;
|
2015-04-27 19:08:01 +00:00
|
|
|
|
2015-03-20 13:33:02 +00:00
|
|
|
#endif//SkNx_DEFINED
|