2015-03-20 13:33:02 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2015 Google Inc.
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef SkNx_DEFINED
|
|
|
|
#define SkNx_DEFINED
|
|
|
|
|
2016-11-22 13:57:45 +00:00
|
|
|
#include "SkSafe_math.h"
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
#include "SkScalar.h"
|
|
|
|
#include "SkTypes.h"
|
2018-06-13 13:59:02 +00:00
|
|
|
|
|
|
|
#include <algorithm>
|
2016-03-21 17:04:46 +00:00
|
|
|
#include <limits>
|
|
|
|
#include <type_traits>
|
2015-08-12 18:56:43 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
// Every single SkNx method wants to be fully inlined. (We know better than MSVC).
|
|
|
|
#define AI SK_ALWAYS_INLINE
|
2016-10-14 21:09:03 +00:00
|
|
|
|
2018-12-12 13:47:54 +00:00
|
|
|
namespace { // NOLINT(google-build-namespaces)
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// The default SkNx<N,T> just proxies down to a pair of SkNx<N/2, T>.
|
|
|
|
template <int N, typename T>
|
|
|
|
struct SkNx {
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<N/2, T> Half;
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
Half fLo, fHi;
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx() = default;
|
|
|
|
AI SkNx(const Half& lo, const Half& hi) : fLo(lo), fHi(hi) {}
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx(T v) : fLo(v), fHi(v) {}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx(T a, T b) : fLo(a) , fHi(b) { static_assert(N==2, ""); }
|
|
|
|
AI SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) { static_assert(N==4, ""); }
|
|
|
|
AI SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
|
2016-03-21 17:04:46 +00:00
|
|
|
static_assert(N==8, "");
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx(T a, T b, T c, T d, T e, T f, T g, T h,
|
|
|
|
T i, T j, T k, T l, T m, T n, T o, T p)
|
|
|
|
: fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) {
|
2016-03-21 17:04:46 +00:00
|
|
|
static_assert(N==16, "");
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI T operator[](int k) const {
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
SkASSERT(0 <= k && k < N);
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
return k < N/2 ? fLo[k] : fHi[k-N/2];
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx Load(const void* vptr) {
|
2016-03-21 17:04:46 +00:00
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
return { Half::Load(ptr), Half::Load(ptr + N/2*sizeof(T)) };
|
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI void store(void* vptr) const {
|
2016-03-21 17:04:46 +00:00
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
fLo.store(ptr);
|
|
|
|
fHi.store(ptr + N/2*sizeof(T));
|
|
|
|
}
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
|
2016-10-06 15:09:27 +00:00
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
Half al, bl, cl, dl,
|
|
|
|
ah, bh, ch, dh;
|
|
|
|
Half::Load4(ptr , &al, &bl, &cl, &dl);
|
|
|
|
Half::Load4(ptr + 4*N/2*sizeof(T), &ah, &bh, &ch, &dh);
|
|
|
|
*a = SkNx{al, ah};
|
|
|
|
*b = SkNx{bl, bh};
|
|
|
|
*c = SkNx{cl, ch};
|
|
|
|
*d = SkNx{dl, dh};
|
|
|
|
}
|
2017-01-19 17:04:32 +00:00
|
|
|
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
|
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
Half al, bl, cl,
|
|
|
|
ah, bh, ch;
|
|
|
|
Half::Load3(ptr , &al, &bl, &cl);
|
|
|
|
Half::Load3(ptr + 3*N/2*sizeof(T), &ah, &bh, &ch);
|
|
|
|
*a = SkNx{al, ah};
|
|
|
|
*b = SkNx{bl, bh};
|
|
|
|
*c = SkNx{cl, ch};
|
|
|
|
}
|
2017-11-30 17:07:20 +00:00
|
|
|
AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
|
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
Half al, bl,
|
|
|
|
ah, bh;
|
|
|
|
Half::Load2(ptr , &al, &bl);
|
|
|
|
Half::Load2(ptr + 2*N/2*sizeof(T), &ah, &bh);
|
|
|
|
*a = SkNx{al, ah};
|
|
|
|
*b = SkNx{bl, bh};
|
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
|
2016-10-06 15:09:27 +00:00
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
Half::Store4(ptr, a.fLo, b.fLo, c.fLo, d.fLo);
|
|
|
|
Half::Store4(ptr + 4*N/2*sizeof(T), a.fHi, b.fHi, c.fHi, d.fHi);
|
|
|
|
}
|
2017-12-01 20:23:05 +00:00
|
|
|
AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
|
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
Half::Store3(ptr, a.fLo, b.fLo, c.fLo);
|
|
|
|
Half::Store3(ptr + 3*N/2*sizeof(T), a.fHi, b.fHi, c.fHi);
|
|
|
|
}
|
2018-04-09 05:58:43 +00:00
|
|
|
AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
|
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
Half::Store2(ptr, a.fLo, b.fLo);
|
|
|
|
Half::Store2(ptr + 2*N/2*sizeof(T), a.fHi, b.fHi);
|
|
|
|
}
|
2016-10-06 15:09:27 +00:00
|
|
|
|
2018-06-11 15:56:57 +00:00
|
|
|
AI T min() const { return SkTMin(fLo.min(), fHi.min()); }
|
|
|
|
AI T max() const { return SkTMax(fLo.max(), fHi.max()); }
|
2016-10-19 13:21:11 +00:00
|
|
|
AI bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); }
|
|
|
|
AI bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx abs() const { return { fLo. abs(), fHi. abs() }; }
|
|
|
|
AI SkNx sqrt() const { return { fLo. sqrt(), fHi. sqrt() }; }
|
|
|
|
AI SkNx rsqrt() const { return { fLo. rsqrt(), fHi. rsqrt() }; }
|
|
|
|
AI SkNx floor() const { return { fLo. floor(), fHi. floor() }; }
|
|
|
|
AI SkNx invert() const { return { fLo.invert(), fHi.invert() }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator!() const { return { !fLo, !fHi }; }
|
|
|
|
AI SkNx operator-() const { return { -fLo, -fHi }; }
|
|
|
|
AI SkNx operator~() const { return { ~fLo, ~fHi }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator<<(int bits) const { return { fLo << bits, fHi << bits }; }
|
|
|
|
AI SkNx operator>>(int bits) const { return { fLo >> bits, fHi >> bits }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator+(const SkNx& y) const { return { fLo + y.fLo, fHi + y.fHi }; }
|
|
|
|
AI SkNx operator-(const SkNx& y) const { return { fLo - y.fLo, fHi - y.fHi }; }
|
|
|
|
AI SkNx operator*(const SkNx& y) const { return { fLo * y.fLo, fHi * y.fHi }; }
|
|
|
|
AI SkNx operator/(const SkNx& y) const { return { fLo / y.fLo, fHi / y.fHi }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator&(const SkNx& y) const { return { fLo & y.fLo, fHi & y.fHi }; }
|
|
|
|
AI SkNx operator|(const SkNx& y) const { return { fLo | y.fLo, fHi | y.fHi }; }
|
|
|
|
AI SkNx operator^(const SkNx& y) const { return { fLo ^ y.fLo, fHi ^ y.fHi }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator==(const SkNx& y) const { return { fLo == y.fLo, fHi == y.fHi }; }
|
|
|
|
AI SkNx operator!=(const SkNx& y) const { return { fLo != y.fLo, fHi != y.fHi }; }
|
|
|
|
AI SkNx operator<=(const SkNx& y) const { return { fLo <= y.fLo, fHi <= y.fHi }; }
|
|
|
|
AI SkNx operator>=(const SkNx& y) const { return { fLo >= y.fLo, fHi >= y.fHi }; }
|
|
|
|
AI SkNx operator< (const SkNx& y) const { return { fLo < y.fLo, fHi < y.fHi }; }
|
|
|
|
AI SkNx operator> (const SkNx& y) const { return { fLo > y.fLo, fHi > y.fHi }; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx saturatedAdd(const SkNx& y) const {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { fLo.saturatedAdd(y.fLo), fHi.saturatedAdd(y.fHi) };
|
|
|
|
}
|
2017-10-10 21:14:18 +00:00
|
|
|
|
|
|
|
AI SkNx mulHi(const SkNx& m) const {
|
|
|
|
return { fLo.mulHi(m.fLo), fHi.mulHi(m.fHi) };
|
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx thenElse(const SkNx& t, const SkNx& e) const {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi) };
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx Min(const SkNx& x, const SkNx& y) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { Half::Min(x.fLo, y.fLo), Half::Min(x.fHi, y.fHi) };
|
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx Max(const SkNx& x, const SkNx& y) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { Half::Max(x.fLo, y.fLo), Half::Max(x.fHi, y.fHi) };
|
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
};
|
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// The N -> N/2 recursion bottoms out at N == 1, a scalar value.
|
2015-04-14 21:02:52 +00:00
|
|
|
template <typename T>
|
2016-03-21 17:04:46 +00:00
|
|
|
struct SkNx<1,T> {
|
|
|
|
T fVal;
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx() = default;
|
|
|
|
AI SkNx(T v) : fVal(v) {}
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2016-07-12 16:12:30 +00:00
|
|
|
// Android complains against unused parameters, so we guard it
|
2016-10-19 13:21:11 +00:00
|
|
|
AI T operator[](int SkDEBUGCODE(k)) const {
|
2016-03-21 17:04:46 +00:00
|
|
|
SkASSERT(k == 0);
|
|
|
|
return fVal;
|
2016-01-31 16:02:47 +00:00
|
|
|
}
|
2015-04-14 21:02:52 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx Load(const void* ptr) {
|
2016-03-21 17:04:46 +00:00
|
|
|
SkNx v;
|
|
|
|
memcpy(&v, ptr, sizeof(T));
|
|
|
|
return v;
|
2015-05-12 22:48:09 +00:00
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI void store(void* ptr) const { memcpy(ptr, &fVal, sizeof(T)); }
|
2015-05-12 22:48:09 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void Load4(const void* vptr, SkNx* a, SkNx* b, SkNx* c, SkNx* d) {
|
2016-10-06 15:09:27 +00:00
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
*a = Load(ptr + 0*sizeof(T));
|
|
|
|
*b = Load(ptr + 1*sizeof(T));
|
|
|
|
*c = Load(ptr + 2*sizeof(T));
|
|
|
|
*d = Load(ptr + 3*sizeof(T));
|
|
|
|
}
|
2017-01-19 17:04:32 +00:00
|
|
|
AI static void Load3(const void* vptr, SkNx* a, SkNx* b, SkNx* c) {
|
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
*a = Load(ptr + 0*sizeof(T));
|
|
|
|
*b = Load(ptr + 1*sizeof(T));
|
|
|
|
*c = Load(ptr + 2*sizeof(T));
|
|
|
|
}
|
2017-11-30 17:07:20 +00:00
|
|
|
AI static void Load2(const void* vptr, SkNx* a, SkNx* b) {
|
|
|
|
auto ptr = (const char*)vptr;
|
|
|
|
*a = Load(ptr + 0*sizeof(T));
|
|
|
|
*b = Load(ptr + 1*sizeof(T));
|
|
|
|
}
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void Store4(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c, const SkNx& d) {
|
2016-10-06 15:09:27 +00:00
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
a.store(ptr + 0*sizeof(T));
|
|
|
|
b.store(ptr + 1*sizeof(T));
|
|
|
|
c.store(ptr + 2*sizeof(T));
|
|
|
|
d.store(ptr + 3*sizeof(T));
|
|
|
|
}
|
2017-12-01 20:23:05 +00:00
|
|
|
AI static void Store3(void* vptr, const SkNx& a, const SkNx& b, const SkNx& c) {
|
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
a.store(ptr + 0*sizeof(T));
|
|
|
|
b.store(ptr + 1*sizeof(T));
|
|
|
|
c.store(ptr + 2*sizeof(T));
|
|
|
|
}
|
2018-04-09 05:58:43 +00:00
|
|
|
AI static void Store2(void* vptr, const SkNx& a, const SkNx& b) {
|
|
|
|
auto ptr = (char*)vptr;
|
|
|
|
a.store(ptr + 0*sizeof(T));
|
|
|
|
b.store(ptr + 1*sizeof(T));
|
|
|
|
}
|
2016-10-06 15:09:27 +00:00
|
|
|
|
2018-04-11 19:18:09 +00:00
|
|
|
AI T min() const { return fVal; }
|
|
|
|
AI T max() const { return fVal; }
|
2016-10-19 13:21:11 +00:00
|
|
|
AI bool anyTrue() const { return fVal != 0; }
|
|
|
|
AI bool allTrue() const { return fVal != 0; }
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx abs() const { return Abs(fVal); }
|
|
|
|
AI SkNx sqrt() const { return Sqrt(fVal); }
|
|
|
|
AI SkNx rsqrt() const { return T(1) / this->sqrt(); }
|
|
|
|
AI SkNx floor() const { return Floor(fVal); }
|
|
|
|
AI SkNx invert() const { return T(1) / *this; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator!() const { return !fVal; }
|
|
|
|
AI SkNx operator-() const { return -fVal; }
|
|
|
|
AI SkNx operator~() const { return FromBits(~ToBits(fVal)); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator<<(int bits) const { return fVal << bits; }
|
|
|
|
AI SkNx operator>>(int bits) const { return fVal >> bits; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator+(const SkNx& y) const { return fVal + y.fVal; }
|
|
|
|
AI SkNx operator-(const SkNx& y) const { return fVal - y.fVal; }
|
|
|
|
AI SkNx operator*(const SkNx& y) const { return fVal * y.fVal; }
|
|
|
|
AI SkNx operator/(const SkNx& y) const { return fVal / y.fVal; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator&(const SkNx& y) const { return FromBits(ToBits(fVal) & ToBits(y.fVal)); }
|
|
|
|
AI SkNx operator|(const SkNx& y) const { return FromBits(ToBits(fVal) | ToBits(y.fVal)); }
|
|
|
|
AI SkNx operator^(const SkNx& y) const { return FromBits(ToBits(fVal) ^ ToBits(y.fVal)); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx operator==(const SkNx& y) const { return FromBits(fVal == y.fVal ? ~0 : 0); }
|
|
|
|
AI SkNx operator!=(const SkNx& y) const { return FromBits(fVal != y.fVal ? ~0 : 0); }
|
|
|
|
AI SkNx operator<=(const SkNx& y) const { return FromBits(fVal <= y.fVal ? ~0 : 0); }
|
|
|
|
AI SkNx operator>=(const SkNx& y) const { return FromBits(fVal >= y.fVal ? ~0 : 0); }
|
|
|
|
AI SkNx operator< (const SkNx& y) const { return FromBits(fVal < y.fVal ? ~0 : 0); }
|
|
|
|
AI SkNx operator> (const SkNx& y) const { return FromBits(fVal > y.fVal ? ~0 : 0); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx Min(const SkNx& x, const SkNx& y) { return x.fVal < y.fVal ? x : y; }
|
|
|
|
AI static SkNx Max(const SkNx& x, const SkNx& y) { return x.fVal > y.fVal ? x : y; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx saturatedAdd(const SkNx& y) const {
|
2016-03-21 17:04:46 +00:00
|
|
|
static_assert(std::is_unsigned<T>::value, "");
|
|
|
|
T sum = fVal + y.fVal;
|
|
|
|
return sum < fVal ? std::numeric_limits<T>::max() : sum;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
}
|
|
|
|
|
2017-10-10 21:14:18 +00:00
|
|
|
AI SkNx mulHi(const SkNx& m) const {
|
|
|
|
static_assert(std::is_unsigned<T>::value, "");
|
|
|
|
static_assert(sizeof(T) <= 4, "");
|
|
|
|
return static_cast<T>((static_cast<uint64_t>(fVal) * m.fVal) >> (sizeof(T)*8));
|
|
|
|
}
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e; }
|
Implement four more xfermodes with Sk4px.
HardLight, Overlay, Darken, and Lighten are all
~2x faster with SSE, ~25% faster with NEON.
This covers all previously-implemented NEON xfermodes.
3 previous SSE xfermodes remain. Those need division
and sqrt, so I'm planning on using SkPMFloat for them.
It'll help the readability and NEON speed if I move that
into [0,1] space first.
The main new concept here is c.thenElse(t,e), which behaves like
(c ? t : e) except, of course, both t and e are evaluated. This allows
us to emulate conditionals with vectors.
This also removes the concept of SkNb. Instead of a standalone bool
vector, each SkNi or SkNf will just return their own types for
comparisons. Turns out to be a lot more manageable this way.
BUG=skia:
Committed: https://skia.googlesource.com/skia/+/b9d4163bebab0f5639f9c5928bb5fc15f472dddc
CQ_EXTRA_TRYBOTS=client.skia.compile:Build-Ubuntu-GCC-Arm64-Debug-Android-Trybot
Review URL: https://codereview.chromium.org/1196713004
2015-06-24 22:18:39 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
private:
|
|
|
|
// Helper functions to choose the right float/double methods. (In <cmath> madness lies...)
|
2017-07-12 17:36:05 +00:00
|
|
|
AI static int Abs(int val) { return val < 0 ? -val : val; }
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static float Abs(float val) { return ::fabsf(val); }
|
|
|
|
AI static float Sqrt(float val) { return ::sqrtf(val); }
|
|
|
|
AI static float Floor(float val) { return ::floorf(val); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static double Abs(double val) { return ::fabs(val); }
|
|
|
|
AI static double Sqrt(double val) { return ::sqrt(val); }
|
|
|
|
AI static double Floor(double val) { return ::floor(val); }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
|
|
|
// Helper functions for working with floats/doubles as bit patterns.
|
2016-10-19 13:21:11 +00:00
|
|
|
template <typename U>
|
|
|
|
AI static U ToBits(U v) { return v; }
|
|
|
|
AI static int32_t ToBits(float v) { int32_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
|
|
|
|
AI static int64_t ToBits(double v) { int64_t bits; memcpy(&bits, &v, sizeof(v)); return bits; }
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
template <typename Bits>
|
|
|
|
AI static T FromBits(Bits bits) {
|
2016-03-21 17:04:46 +00:00
|
|
|
static_assert(std::is_pod<T >::value &&
|
|
|
|
std::is_pod<Bits>::value &&
|
|
|
|
sizeof(T) <= sizeof(Bits), "");
|
|
|
|
T val;
|
|
|
|
memcpy(&val, &bits, sizeof(T));
|
|
|
|
return val;
|
|
|
|
}
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
};
|
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// Allow scalars on the left or right of binary operators, and things like +=, &=, etc.
|
2016-10-19 13:21:11 +00:00
|
|
|
#define V template <int N, typename T> AI static SkNx<N,T>
|
2016-03-21 17:04:46 +00:00
|
|
|
V operator+ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) + y; }
|
|
|
|
V operator- (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) - y; }
|
|
|
|
V operator* (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) * y; }
|
|
|
|
V operator/ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) / y; }
|
|
|
|
V operator& (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) & y; }
|
|
|
|
V operator| (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) | y; }
|
|
|
|
V operator^ (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) ^ y; }
|
|
|
|
V operator==(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) == y; }
|
|
|
|
V operator!=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) != y; }
|
|
|
|
V operator<=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) <= y; }
|
|
|
|
V operator>=(T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) >= y; }
|
|
|
|
V operator< (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) < y; }
|
|
|
|
V operator> (T x, const SkNx<N,T>& y) { return SkNx<N,T>(x) > y; }
|
|
|
|
|
|
|
|
V operator+ (const SkNx<N,T>& x, T y) { return x + SkNx<N,T>(y); }
|
|
|
|
V operator- (const SkNx<N,T>& x, T y) { return x - SkNx<N,T>(y); }
|
|
|
|
V operator* (const SkNx<N,T>& x, T y) { return x * SkNx<N,T>(y); }
|
|
|
|
V operator/ (const SkNx<N,T>& x, T y) { return x / SkNx<N,T>(y); }
|
|
|
|
V operator& (const SkNx<N,T>& x, T y) { return x & SkNx<N,T>(y); }
|
|
|
|
V operator| (const SkNx<N,T>& x, T y) { return x | SkNx<N,T>(y); }
|
|
|
|
V operator^ (const SkNx<N,T>& x, T y) { return x ^ SkNx<N,T>(y); }
|
|
|
|
V operator==(const SkNx<N,T>& x, T y) { return x == SkNx<N,T>(y); }
|
|
|
|
V operator!=(const SkNx<N,T>& x, T y) { return x != SkNx<N,T>(y); }
|
|
|
|
V operator<=(const SkNx<N,T>& x, T y) { return x <= SkNx<N,T>(y); }
|
|
|
|
V operator>=(const SkNx<N,T>& x, T y) { return x >= SkNx<N,T>(y); }
|
|
|
|
V operator< (const SkNx<N,T>& x, T y) { return x < SkNx<N,T>(y); }
|
|
|
|
V operator> (const SkNx<N,T>& x, T y) { return x > SkNx<N,T>(y); }
|
|
|
|
|
|
|
|
V& operator<<=(SkNx<N,T>& x, int bits) { return (x = x << bits); }
|
|
|
|
V& operator>>=(SkNx<N,T>& x, int bits) { return (x = x >> bits); }
|
|
|
|
|
|
|
|
V& operator +=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x + y); }
|
|
|
|
V& operator -=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x - y); }
|
|
|
|
V& operator *=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x * y); }
|
|
|
|
V& operator /=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x / y); }
|
|
|
|
V& operator &=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x & y); }
|
|
|
|
V& operator |=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x | y); }
|
|
|
|
V& operator ^=(SkNx<N,T>& x, const SkNx<N,T>& y) { return (x = x ^ y); }
|
|
|
|
|
|
|
|
V& operator +=(SkNx<N,T>& x, T y) { return (x = x + SkNx<N,T>(y)); }
|
|
|
|
V& operator -=(SkNx<N,T>& x, T y) { return (x = x - SkNx<N,T>(y)); }
|
|
|
|
V& operator *=(SkNx<N,T>& x, T y) { return (x = x * SkNx<N,T>(y)); }
|
|
|
|
V& operator /=(SkNx<N,T>& x, T y) { return (x = x / SkNx<N,T>(y)); }
|
|
|
|
V& operator &=(SkNx<N,T>& x, T y) { return (x = x & SkNx<N,T>(y)); }
|
|
|
|
V& operator |=(SkNx<N,T>& x, T y) { return (x = x | SkNx<N,T>(y)); }
|
|
|
|
V& operator ^=(SkNx<N,T>& x, T y) { return (x = x ^ SkNx<N,T>(y)); }
|
|
|
|
#undef V
|
|
|
|
|
|
|
|
// SkNx<N,T> ~~> SkNx<N/2,T> + SkNx<N/2,T>
|
|
|
|
template <int N, typename T>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void SkNx_split(const SkNx<N,T>& v, SkNx<N/2,T>* lo, SkNx<N/2,T>* hi) {
|
2016-03-21 17:04:46 +00:00
|
|
|
*lo = v.fLo;
|
|
|
|
*hi = v.fHi;
|
2015-11-20 21:53:19 +00:00
|
|
|
}
|
2015-09-10 21:16:07 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// SkNx<N/2,T> + SkNx<N/2,T> ~~> SkNx<N,T>
|
|
|
|
template <int N, typename T>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx<N*2,T> SkNx_join(const SkNx<N,T>& lo, const SkNx<N,T>& hi) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { lo, hi };
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
}
|
2015-08-12 18:56:43 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// A very generic shuffle. Can reorder, duplicate, contract, expand...
|
|
|
|
// Sk4f v = { R,G,B,A };
|
|
|
|
// SkNx_shuffle<2,1,0,3>(v) ~~> {B,G,R,A}
|
|
|
|
// SkNx_shuffle<2,1>(v) ~~> {B,G}
|
|
|
|
// SkNx_shuffle<2,1,2,1,2,1,2,1>(v) ~~> {B,G,B,G,B,G,B,G}
|
|
|
|
// SkNx_shuffle<3,3,3,3>(v) ~~> {A,A,A,A}
|
|
|
|
template <int... Ix, int N, typename T>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx<sizeof...(Ix),T> SkNx_shuffle(const SkNx<N,T>& v) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { v[Ix]... };
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
}
|
2015-12-15 15:38:54 +00:00
|
|
|
|
2016-03-21 17:04:46 +00:00
|
|
|
// Cast from SkNx<N, Src> to SkNx<N, Dst>, as if you called static_cast<Dst>(Src).
|
|
|
|
template <typename Dst, typename Src, int N>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx<N,Dst> SkNx_cast(const SkNx<N,Src>& v) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return { SkNx_cast<Dst>(v.fLo), SkNx_cast<Dst>(v.fHi) };
|
|
|
|
}
|
|
|
|
template <typename Dst, typename Src>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx<1,Dst> SkNx_cast(const SkNx<1,Src>& v) {
|
2016-03-21 17:04:46 +00:00
|
|
|
return static_cast<Dst>(v.fVal);
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
}
|
2015-12-15 15:38:54 +00:00
|
|
|
|
2016-10-12 13:52:55 +00:00
|
|
|
template <int N, typename T>
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static SkNx<N,T> SkNx_fma(const SkNx<N,T>& f, const SkNx<N,T>& m, const SkNx<N,T>& a) {
|
2016-10-12 13:52:55 +00:00
|
|
|
return f*m+a;
|
|
|
|
}
|
|
|
|
|
2016-10-14 21:09:03 +00:00
|
|
|
} // namespace
|
|
|
|
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<2, float> Sk2f;
|
|
|
|
typedef SkNx<4, float> Sk4f;
|
2016-03-21 17:04:46 +00:00
|
|
|
typedef SkNx<8, float> Sk8f;
|
|
|
|
typedef SkNx<16, float> Sk16f;
|
|
|
|
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<2, SkScalar> Sk2s;
|
|
|
|
typedef SkNx<4, SkScalar> Sk4s;
|
2016-03-21 17:04:46 +00:00
|
|
|
typedef SkNx<8, SkScalar> Sk8s;
|
|
|
|
typedef SkNx<16, SkScalar> Sk16s;
|
Refactor Sk2x<T> + Sk4x<T> into SkNf<N,T> and SkNi<N,T>
The primary feature this delivers is SkNf and SkNd for arbitrary power-of-two N. Non-specialized types or types larger than 128 bits should now Just Work (and we can drop in a specialization to make them faster). Sk4s is now just a typedef for SkNf<4, SkScalar>; Sk4d is SkNf<4, double>, Sk2f SkNf<2, float>, etc.
This also makes implementing new specializations easier and more encapsulated. We're now using template specialization, which means the specialized versions don't have to leak out so much from SkNx_sse.h and SkNx_neon.h.
This design leaves us room to grow up, e.g to SkNf<8, SkScalar> == Sk8s, and to grown down too, to things like SkNi<8, uint16_t> == Sk8h.
To simplify things, I've stripped away most APIs (swizzles, casts, reinterpret_casts) that no one's using yet. I will happily add them back if they seem useful.
You shouldn't feel bad about using any of the typedef Sk4s, Sk4f, Sk4d, Sk2s, Sk2f, Sk2d, Sk4i, etc. Here's how you should feel:
- Sk4f, Sk4s, Sk2d: feel awesome
- Sk2f, Sk2s, Sk4d: feel pretty good
No public API changes.
TBR=reed@google.com
BUG=skia:3592
Review URL: https://codereview.chromium.org/1048593002
2015-03-30 17:50:27 +00:00
|
|
|
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<4, uint8_t> Sk4b;
|
2016-03-21 17:04:46 +00:00
|
|
|
typedef SkNx<8, uint8_t> Sk8b;
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<16, uint8_t> Sk16b;
|
2016-03-21 17:04:46 +00:00
|
|
|
|
sknx refactoring
- trim unused specializations (Sk4i, Sk2d) and apis (SkNx_dup)
- expand apis a little
* v[0] == v.kth<0>()
* SkNx_shuffle can now convert to different-sized vectors, e.g. Sk2f <-> Sk4f
- remove anonymous namespace
I believe it's safe to remove the anonymous namespace right now.
We're worried about violating the One Definition Rule; the anonymous namespace protected us from that.
In Release builds, this is mostly moot, as everything tends to inline completely.
In Debug builds, violating the ODR is at worst an inconvenience, time spent trying to figure out why the bot is broken.
Now that we're building with SSE2/NEON everywhere, very few bots have even a chance about getting confused by two definitions of the same type or function. Where we do compile variants depending on, e.g., SSSE3, we do so in static inline functions. These are not subject to the ODR.
I plan to follow up with a tedious .kth<...>() -> [...] auto-replace.
BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1683543002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot
Review URL: https://codereview.chromium.org/1683543002
2016-02-09 18:35:27 +00:00
|
|
|
typedef SkNx<4, uint16_t> Sk4h;
|
2016-03-21 17:04:46 +00:00
|
|
|
typedef SkNx<8, uint16_t> Sk8h;
|
2015-11-20 21:53:19 +00:00
|
|
|
typedef SkNx<16, uint16_t> Sk16h;
|
2015-12-14 19:25:18 +00:00
|
|
|
|
2016-07-29 17:10:15 +00:00
|
|
|
typedef SkNx<4, int32_t> Sk4i;
|
2016-10-12 13:52:55 +00:00
|
|
|
typedef SkNx<8, int32_t> Sk8i;
|
2016-07-29 17:10:15 +00:00
|
|
|
typedef SkNx<4, uint32_t> Sk4u;
|
2016-02-16 22:33:08 +00:00
|
|
|
|
2015-12-14 19:25:18 +00:00
|
|
|
// Include platform specific specializations if available.
|
2016-02-08 23:50:22 +00:00
|
|
|
#if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
2018-10-29 14:46:05 +00:00
|
|
|
#include "SkNx_sse.h"
|
2015-12-14 19:25:18 +00:00
|
|
|
#elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON)
|
2018-10-29 14:46:05 +00:00
|
|
|
#include "SkNx_neon.h"
|
2016-07-12 21:55:45 +00:00
|
|
|
#else
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static Sk4i Sk4f_round(const Sk4f& x) {
|
2016-07-12 21:55:45 +00:00
|
|
|
return { (int) lrintf (x[0]),
|
|
|
|
(int) lrintf (x[1]),
|
|
|
|
(int) lrintf (x[2]),
|
|
|
|
(int) lrintf (x[3]), };
|
|
|
|
}
|
|
|
|
|
2015-12-14 19:25:18 +00:00
|
|
|
#endif
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
AI static void Sk4f_ToBytes(uint8_t p[16],
|
|
|
|
const Sk4f& a, const Sk4f& b, const Sk4f& c, const Sk4f& d) {
|
2016-03-21 17:04:46 +00:00
|
|
|
SkNx_cast<uint8_t>(SkNx_join(SkNx_join(a,b), SkNx_join(c,d))).store(p);
|
|
|
|
}
|
|
|
|
|
2016-10-19 13:21:11 +00:00
|
|
|
#undef AI
|
2016-03-21 17:04:46 +00:00
|
|
|
|
2015-03-20 13:33:02 +00:00
|
|
|
#endif//SkNx_DEFINED
|