Start to vectorize SkTileGrid.
This adds Sk4x.h to help. BUG=skia: Review URL: https://codereview.chromium.org/634543004
This commit is contained in:
parent
70171683e6
commit
90c7992bfc
@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "Benchmark.h"
|
||||
#include "Sk4x.h"
|
||||
#include "SkGeometry.h"
|
||||
#include "SkRandom.h"
|
||||
#include "SkRect.h"
|
||||
@ -44,6 +45,9 @@ public:
|
||||
GeoRectBench(const char suffix[]) : GeometryBench(suffix) {}
|
||||
|
||||
protected:
|
||||
// void* vptr;
|
||||
size_t align_fRects_to_16Bytes[sizeof(void*) == 8 ? 1 : 3];
|
||||
|
||||
SkRect fRects[2048];
|
||||
|
||||
virtual void onPreDraw() {
|
||||
@ -97,7 +101,7 @@ protected:
|
||||
class GeoRectBench_Intersects : public GeoRectBench {
|
||||
public:
|
||||
GeoRectBench_Intersects() : GeoRectBench("rect_Intersects") {}
|
||||
|
||||
|
||||
protected:
|
||||
virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
|
||||
for (int outer = 0; outer < loops; ++outer) {
|
||||
@ -113,7 +117,7 @@ protected:
|
||||
class GeoRectBench_sort : public GeoRectBench {
|
||||
public:
|
||||
GeoRectBench_sort() : GeoRectBench("rect_sort") {}
|
||||
|
||||
|
||||
protected:
|
||||
virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
|
||||
for (int outer = 0; outer < loops; ++outer) {
|
||||
@ -129,3 +133,59 @@ DEF_BENCH( return new GeoRectBench_intersect_rect; )
|
||||
DEF_BENCH( return new GeoRectBench_Intersects; )
|
||||
|
||||
DEF_BENCH( return new GeoRectBench_sort; )
|
||||
|
||||
class GeoRectBench_sort_4f : public GeoRectBench {
|
||||
public:
|
||||
GeoRectBench_sort_4f() : GeoRectBench("rect_sort_4f") { }
|
||||
|
||||
protected:
|
||||
static SkRect Sort(const SkRect& rect) {
|
||||
// To sort:
|
||||
// left, right = minmax(left, right)
|
||||
// top, bottom = minmax(top, bottom)
|
||||
Sk4f ltrb(&rect.fLeft),
|
||||
rblt = ltrb.zwxy(),
|
||||
ltlt = Sk4f::Min(ltrb, rblt), // Holds (2 copies of) new left and top.
|
||||
rbrb = Sk4f::Max(ltrb, rblt), // Holds (2 copies of) new right and bottom.
|
||||
sort = Sk4f::XYAB(ltlt, rbrb);
|
||||
|
||||
SkRect sorted;
|
||||
sort.store(&sorted.fLeft);
|
||||
return sorted;
|
||||
}
|
||||
|
||||
virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
|
||||
for (int outer = 0; outer < loops; ++outer) {
|
||||
for (size_t i = 0; i < SK_ARRAY_COUNT(fRects); ++i) {
|
||||
fRects[i] = Sort(fRects[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
DEF_BENCH( return new GeoRectBench_sort_4f; )
|
||||
|
||||
class GeoRectBench_Intersects_4f : public GeoRectBench {
|
||||
public:
|
||||
GeoRectBench_Intersects_4f() : GeoRectBench("rect_Intersects_4f") {}
|
||||
|
||||
protected:
|
||||
static bool Intersects(const SkRect& a, const SkRect& b) {
|
||||
Sk4f r1(&a.fLeft),
|
||||
r2(&b.fLeft),
|
||||
lt = Sk4f::XYAB(r1, r2), // a.L a.T b.L b.T <
|
||||
rb = Sk4f::ZWCD(r2, r1); // b.R b.B a.R a.B ?
|
||||
return lt.lessThan(rb).allTrue();
|
||||
}
|
||||
|
||||
virtual void onDraw(const int loops, SkCanvas* canvas) SK_OVERRIDE {
|
||||
for (int outer = 0; outer < loops; ++outer) {
|
||||
int count = 0;
|
||||
for (size_t i = 0; i < SK_ARRAY_COUNT(fRects); ++i) {
|
||||
count += Intersects(fRects[0], fRects[i]);
|
||||
}
|
||||
this->virtualCallToFoilOptimizers(count);
|
||||
}
|
||||
}
|
||||
};
|
||||
DEF_BENCH( return new GeoRectBench_Intersects_4f; )
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
4275, # An exported class was derived from a class that was not exported
|
||||
4345, # This is an FYI about a behavior change from long ago. Chrome stifles it too.
|
||||
4355, # 'this' used in base member initializer list. Off by default in newer compilers.
|
||||
4800, # forcing value to bool 'true' or 'false'
|
||||
],
|
||||
'msvs_cygwin_shell': 0,
|
||||
'msvs_settings': {
|
||||
|
@ -300,6 +300,9 @@ static inline bool SkIsU16(long x) {
|
||||
#define SkAlign8(x) (((x) + 7) >> 3 << 3)
|
||||
#define SkIsAlign8(x) (0 == ((x) & 7))
|
||||
|
||||
#define SkAlign16(x) (((x) + 15) >> 4 << 4)
|
||||
#define SkIsAlign16(x) (0 == ((x) & 15))
|
||||
|
||||
#define SkAlignPtr(x) (sizeof(void*) == 8 ? SkAlign8(x) : SkAlign4(x))
|
||||
#define SkIsAlignPtr(x) (sizeof(void*) == 8 ? SkIsAlign8(x) : SkIsAlign4(x))
|
||||
|
||||
|
98
src/core/Sk4x.h
Normal file
98
src/core/Sk4x.h
Normal file
@ -0,0 +1,98 @@
|
||||
#ifndef Sk4x_DEFINED
|
||||
#define Sk4x_DEFINED
|
||||
|
||||
#include "SkTypes.h"
|
||||
|
||||
// First we'll let Clang or GCC try their best with whatever instructions are available.
|
||||
// Otherwise fall back on portable code. This really should be a last resort.
|
||||
|
||||
#define SK4X_PREAMBLE 1
|
||||
#if defined(__clang__)
|
||||
#include "Sk4x_clang.h"
|
||||
#elif defined(__GNUC__)
|
||||
#include "Sk4x_gcc.h"
|
||||
#else
|
||||
#include "Sk4x_portable.h"
|
||||
#endif
|
||||
#undef SK4X_PREAMBLE
|
||||
|
||||
template <typename T> class Sk4x;
|
||||
typedef Sk4x<int> Sk4i;
|
||||
typedef Sk4x<float> Sk4f;
|
||||
|
||||
template <typename T> class Sk4x {
|
||||
public:
|
||||
Sk4x(); // Uninitialized; use Sk4x(0,0,0,0) for zero.
|
||||
Sk4x(T, T, T, T);
|
||||
explicit Sk4x(const T[4]);
|
||||
|
||||
Sk4x(const Sk4x&);
|
||||
Sk4x& operator=(const Sk4x&);
|
||||
|
||||
void set(T, T, T, T);
|
||||
|
||||
void store(T[4]) const;
|
||||
|
||||
template <typename Dst> Dst reinterpret() const;
|
||||
template <typename Dst> Dst cast() const;
|
||||
|
||||
bool allTrue() const;
|
||||
bool anyTrue() const;
|
||||
|
||||
Sk4x bitNot() const;
|
||||
Sk4x bitAnd(const Sk4x&) const;
|
||||
Sk4x bitOr (const Sk4x&) const;
|
||||
|
||||
Sk4i equal(const Sk4x&) const;
|
||||
Sk4i notEqual(const Sk4x&) const;
|
||||
Sk4i lessThan(const Sk4x&) const;
|
||||
Sk4i greaterThan(const Sk4x&) const;
|
||||
Sk4i lessThanEqual(const Sk4x&) const;
|
||||
Sk4i greaterThanEqual(const Sk4x&) const;
|
||||
|
||||
Sk4x add(const Sk4x&) const;
|
||||
Sk4x subtract(const Sk4x&) const;
|
||||
Sk4x multiply(const Sk4x&) const;
|
||||
Sk4x divide(const Sk4x&) const;
|
||||
|
||||
static Sk4x Min(const Sk4x& a, const Sk4x& b);
|
||||
static Sk4x Max(const Sk4x& a, const Sk4x& b);
|
||||
|
||||
// Swizzles follow OpenCL xyzw convention.
|
||||
Sk4x zwxy() const;
|
||||
|
||||
// When there's a second argument, it's abcd.
|
||||
static Sk4x XYAB(const Sk4x& xyzw, const Sk4x& abcd);
|
||||
static Sk4x ZWCD(const Sk4x& xyzw, const Sk4x& abcd);
|
||||
|
||||
private:
|
||||
// It's handy to have Sk4f and Sk4i be mutual friends.
|
||||
template <typename S> friend class Sk4x;
|
||||
|
||||
#define SK4X_PRIVATE 1
|
||||
#if defined(__clang__)
|
||||
#include "Sk4x_clang.h"
|
||||
#elif defined(__GNUC__)
|
||||
#include "Sk4x_gcc.h"
|
||||
#else
|
||||
#include "Sk4x_portable.h"
|
||||
#endif
|
||||
#undef SK4X_PRIVATE
|
||||
};
|
||||
|
||||
#if defined(__clang__)
|
||||
#include "Sk4x_clang.h"
|
||||
#elif defined(__GNUC__)
|
||||
#include "Sk4x_gcc.h"
|
||||
#else
|
||||
#include "Sk4x_portable.h"
|
||||
#endif
|
||||
|
||||
// TODO ideas for enterprising coders:
|
||||
// 1) Code generated for Max() isn't as good in Sk4x_gcc.h as it is in _clang. Why?
|
||||
// 2) Sk4x_sse.h would be good for Windows, and could possibly beat _clang / _gcc
|
||||
// (e.g. they can't generate _mm_movemask_ps for allTrue/anyTrue).
|
||||
// 3) Sk4x_neon.h might be a good idea if _clang / _gcc aren't good enough on ARM.
|
||||
|
||||
|
||||
#endif//Sk4x_DEFINED
|
125
src/core/Sk4x_clang.h
Normal file
125
src/core/Sk4x_clang.h
Normal file
@ -0,0 +1,125 @@
|
||||
// It is important _not_ to put header guards here.
|
||||
// This file will be intentionally included three times.
|
||||
|
||||
// Useful reading:
|
||||
// http://clang.llvm.org/docs/LanguageExtensions.html#vectors-and-extended-vectors
|
||||
|
||||
#if defined(SK4X_PREAMBLE)
|
||||
|
||||
#elif defined(SK4X_PRIVATE)
|
||||
typedef T Vector __attribute__((ext_vector_type(4)));
|
||||
|
||||
/*implicit*/ Sk4x(Vector vec) : fVec(vec) {}
|
||||
|
||||
template <int m, int a, int s, int k>
|
||||
static Sk4x Shuffle(const Sk4x&, const Sk4x&);
|
||||
|
||||
Vector fVec;
|
||||
|
||||
#else // defined(SK4X_PRIVATE)
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x() { }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(T a, T b, T c, T d) { this->set(a,b,c,d); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const T vals[4]) { this->set(vals[0], vals[1], vals[2], vals[3]); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const Sk4x<T>& other) { *this = other; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>& Sk4x<T>::operator=(const Sk4x<T>& other) { fVec = other.fVec; return *this; }
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::set(T a, T b, T c, T d) {
|
||||
Vector v = { a, b, c, d };
|
||||
fVec = v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::store(T vals[4]) const {
|
||||
SkASSERT(SkIsAlign16((uintptr_t)vals));
|
||||
*reinterpret_cast<Vector*>(vals) = fVec;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::reinterpret() const {
|
||||
return Dst((typename Dst::Vector)fVec);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::cast() const {
|
||||
#if __has_builtin(__builtin_convertvector)
|
||||
return Dst(__builtin_convertvector(fVec, typename Dst::Vector));
|
||||
#else
|
||||
return Dst(fVec[0], fVec[1], fVec[2], fVec[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Sk4x<T>::allTrue() const { return fVec[0] & fVec[1] & fVec[2] & fVec[3]; }
|
||||
template <typename T>
|
||||
bool Sk4x<T>::anyTrue() const { return fVec[0] | fVec[1] | fVec[2] | fVec[3]; }
|
||||
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitNot() const { return ~fVec; }
|
||||
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitAnd(const Sk4x& other) const { return fVec & other.fVec; }
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitOr (const Sk4x& other) const { return fVec | other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: equal(const Sk4x<T>& other) const { return fVec == other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: notEqual(const Sk4x<T>& other) const { return fVec != other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThan(const Sk4x<T>& other) const { return fVec < other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: greaterThan(const Sk4x<T>& other) const { return fVec > other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThanEqual(const Sk4x<T>& other) const { return fVec <= other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>::greaterThanEqual(const Sk4x<T>& other) const { return fVec >= other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: add(const Sk4x<T>& other) const { return fVec + other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::subtract(const Sk4x<T>& other) const { return fVec - other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::multiply(const Sk4x<T>& other) const { return fVec * other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: divide(const Sk4x<T>& other) const { return fVec / other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Min(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
Sk4i less(a.lessThan(b));
|
||||
Sk4i val = a.reinterpret<Sk4i>().bitAnd(less).bitOr(
|
||||
b.reinterpret<Sk4i>().bitAnd(less.bitNot()));
|
||||
return val.reinterpret<Sk4x>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Max(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
Sk4i less(a.lessThan(b));
|
||||
Sk4i val = b.reinterpret<Sk4i>().bitAnd(less).bitOr(
|
||||
a.reinterpret<Sk4i>().bitAnd(less.bitNot()));
|
||||
return val.reinterpret<Sk4x>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <int m, int a, int s, int k>
|
||||
Sk4x<T> Sk4x<T>::Shuffle(const Sk4x<T>& x, const Sk4x<T>& y) {
|
||||
return __builtin_shufflevector(x.fVec, y.fVec, m,a,s,k);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::zwxy() const { return fVec.zwxy; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::XYAB(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<0,1,4,5>(xyzw, abcd); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::ZWCD(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<2,3,6,7>(xyzw, abcd); }
|
||||
|
||||
#endif // defined(SK4X_PRIVATE)
|
135
src/core/Sk4x_gcc.h
Normal file
135
src/core/Sk4x_gcc.h
Normal file
@ -0,0 +1,135 @@
|
||||
// It is important _not_ to put header guards here.
|
||||
// This file will be intentionally included three times.
|
||||
|
||||
// Useful reading:
|
||||
// https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
|
||||
|
||||
#if defined(SK4X_PREAMBLE)
|
||||
|
||||
#elif defined(SK4X_PRIVATE)
|
||||
typedef T Vector __attribute__((vector_size(16)));
|
||||
|
||||
/*implicit*/ Sk4x(Vector vec) : fVec(vec) {}
|
||||
static inline Vector ShuffleImpl(Vector a, Vector b, int __attribute__((vector_size(16))) mask);
|
||||
template <int m, int a, int s, int k>
|
||||
static Sk4x Shuffle(const Sk4x&, const Sk4x&);
|
||||
|
||||
Vector fVec;
|
||||
|
||||
#else // defined(SK4X_PRIVATE)
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x() { }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(T a, T b, T c, T d) { this->set(a,b,c,d); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const T vals[4]) {
|
||||
fVec = *reinterpret_cast<const Vector*>(vals); // Should compile to moveaps or moveups.
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const Sk4x<T>& other) { *this = other; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>& Sk4x<T>::operator=(const Sk4x<T>& other) { fVec = other.fVec; return *this; }
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::set(T a, T b, T c, T d) {
|
||||
Vector v = { a, b, c, d };
|
||||
fVec = v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::store(T vals[4]) const {
|
||||
SkASSERT(SkIsAlign16((uintptr_t)vals));
|
||||
*reinterpret_cast<Vector*>(vals) = fVec;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::reinterpret() const {
|
||||
return Dst((typename Dst::Vector)fVec);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::cast() const {
|
||||
return Dst(fVec[0], fVec[1], fVec[2], fVec[3]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Sk4x<T>::allTrue() const { return fVec[0] & fVec[1] & fVec[2] & fVec[3]; }
|
||||
template <typename T>
|
||||
bool Sk4x<T>::anyTrue() const { return fVec[0] | fVec[1] | fVec[2] | fVec[3]; }
|
||||
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitNot() const { return Sk4i(~fVec); }
|
||||
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitAnd(const Sk4x& other) const { return fVec & other.fVec; }
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitOr (const Sk4x& other) const { return fVec | other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: equal(const Sk4x<T>& other) const { return fVec == other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: notEqual(const Sk4x<T>& other) const { return fVec != other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThan(const Sk4x<T>& other) const { return fVec < other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: greaterThan(const Sk4x<T>& other) const { return fVec > other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThanEqual(const Sk4x<T>& other) const { return fVec <= other.fVec; }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>::greaterThanEqual(const Sk4x<T>& other) const { return fVec >= other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: add(const Sk4x<T>& other) const { return fVec + other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::subtract(const Sk4x<T>& other) const { return fVec - other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::multiply(const Sk4x<T>& other) const { return fVec * other.fVec; }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: divide(const Sk4x<T>& other) const { return fVec / other.fVec; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Min(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
return a.fVec < b.fVec ? a.fVec : b.fVec; // This makes great SSE code (1 minps op)...
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Max(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
return a.fVec < b.fVec ? b.fVec : a.fVec; // ...but this doesn't look so good (7 ops?).
|
||||
}
|
||||
|
||||
// GCC 4.8 has a bug that leads it to segfault when presented with the obvious code for Shuffle:
|
||||
// Sk4i::Vector mask = { m,a,s,k };
|
||||
// return __builtin_shuffle(x.fVec, y.fVec, mask);
|
||||
//
|
||||
// This roundabout implementation via ShuffleImpl works around that bug,
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57509
|
||||
|
||||
template <>
|
||||
inline Sk4i::Vector Sk4i::ShuffleImpl(Sk4i::Vector x, Sk4i::Vector y, Sk4i::Vector mask) {
|
||||
return __builtin_shuffle(x,y, mask);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Sk4f::Vector Sk4f::ShuffleImpl(Sk4f::Vector x, Sk4f::Vector y, Sk4i::Vector mask) {
|
||||
return __builtin_shuffle(x,y, mask);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <int m, int a, int s, int k>
|
||||
Sk4x<T> Sk4x<T>::Shuffle(const Sk4x<T>& x, const Sk4x<T>& y) {
|
||||
Sk4i::Vector mask = { m,a,s,k };
|
||||
return ShuffleImpl(x.fVec, y.fVec, mask);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::zwxy() const { return Shuffle<2,3,0,1>(*this, *this); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::XYAB(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<0,1,4,5>(xyzw, abcd); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::ZWCD(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<2,3,6,7>(xyzw, abcd); }
|
||||
|
||||
#endif // defined(SK4X_PRIVATE)
|
134
src/core/Sk4x_portable.h
Normal file
134
src/core/Sk4x_portable.h
Normal file
@ -0,0 +1,134 @@
|
||||
// It is important _not_ to put header guards here.
|
||||
// This file will be intentionally included three times.
|
||||
|
||||
#if defined(SK4X_PREAMBLE)
|
||||
|
||||
#elif defined(SK4X_PRIVATE)
|
||||
typedef T Vector[4];
|
||||
|
||||
Vector fVec;
|
||||
|
||||
template <int m, int a, int s, int k>
|
||||
static Sk4x Shuffle(const Sk4x&, const Sk4x&);
|
||||
|
||||
#else // defined(SK4X_PRIVATE)
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x() { }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(T a, T b, T c, T d) { this->set(a,b,c,d); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const T vals[4]) { this->set(vals[0], vals[1], vals[2], vals[3]); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>::Sk4x(const Sk4x<T>& other) { *this = other; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T>& Sk4x<T>::operator=(const Sk4x<T>& other) {
|
||||
this->set(other.fVec[0], other.fVec[1], other.fVec[2], other.fVec[3]);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::set(T a, T b, T c, T d) {
|
||||
fVec[0] = a;
|
||||
fVec[1] = b;
|
||||
fVec[2] = c;
|
||||
fVec[3] = d;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Sk4x<T>::store(T vals[4]) const {
|
||||
vals[0] = fVec[0];
|
||||
vals[1] = fVec[1];
|
||||
vals[2] = fVec[2];
|
||||
vals[3] = fVec[3];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::reinterpret() const {
|
||||
return Dst(reinterpret_cast<const typename Dst::Vector*>(fVec));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename Dst> Dst Sk4x<T>::cast() const {
|
||||
return Dst(fVec[0], fVec[1], fVec[2], fVec[3]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool Sk4x<T>::allTrue() const { return fVec[0] & fVec[1] & fVec[2] & fVec[3]; }
|
||||
template <typename T>
|
||||
bool Sk4x<T>::anyTrue() const { return fVec[0] | fVec[1] | fVec[2] | fVec[3]; }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::bitNot() const { return Sk4x(~fVec[0], ~fVec[1], ~fVec[2], ~fVec[3]); }
|
||||
|
||||
#define BINOP(op) fVec[0] op other.fVec[0], \
|
||||
fVec[1] op other.fVec[1], \
|
||||
fVec[2] op other.fVec[2], \
|
||||
fVec[3] op other.fVec[3]
|
||||
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitAnd(const Sk4x& other) const { return Sk4x(BINOP(&)); }
|
||||
template <typename T> Sk4x<T> Sk4x<T>::bitOr (const Sk4x& other) const { return Sk4x(BINOP(|)); }
|
||||
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: equal(const Sk4x<T>& other) const { return Sk4i(BINOP(==)); }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: notEqual(const Sk4x<T>& other) const { return Sk4i(BINOP(!=)); }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThan(const Sk4x<T>& other) const { return Sk4i(BINOP( <)); }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: greaterThan(const Sk4x<T>& other) const { return Sk4i(BINOP( >)); }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>:: lessThanEqual(const Sk4x<T>& other) const { return Sk4i(BINOP(<=)); }
|
||||
template <typename T>
|
||||
Sk4i Sk4x<T>::greaterThanEqual(const Sk4x<T>& other) const { return Sk4i(BINOP(>=)); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: add(const Sk4x<T>& other) const { return Sk4x(BINOP(+)); }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::subtract(const Sk4x<T>& other) const { return Sk4x(BINOP(-)); }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::multiply(const Sk4x<T>& other) const { return Sk4x(BINOP(*)); }
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>:: divide(const Sk4x<T>& other) const { return Sk4x(BINOP(/)); }
|
||||
|
||||
#undef BINOP
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Min(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
return Sk4x(SkTMin(a.fVec[0], b.fVec[0]),
|
||||
SkTMin(a.fVec[1], b.fVec[1]),
|
||||
SkTMin(a.fVec[2], b.fVec[2]),
|
||||
SkTMin(a.fVec[3], b.fVec[3]));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::Max(const Sk4x<T>& a, const Sk4x<T>& b) {
|
||||
return Sk4x(SkTMax(a.fVec[0], b.fVec[0]),
|
||||
SkTMax(a.fVec[1], b.fVec[1]),
|
||||
SkTMax(a.fVec[2], b.fVec[2]),
|
||||
SkTMax(a.fVec[3], b.fVec[3]));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <int m, int a, int s, int k>
|
||||
Sk4x<T> Sk4x<T>::Shuffle(const Sk4x<T>& x, const Sk4x<T>& y) {
|
||||
return Sk4x(m < 4 ? x.fVec[m] : y.fVec[m-4],
|
||||
a < 4 ? x.fVec[a] : y.fVec[a-4],
|
||||
s < 4 ? x.fVec[s] : y.fVec[s-4],
|
||||
k < 4 ? x.fVec[k] : y.fVec[k-4]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::zwxy() const { return Shuffle<2,3,0,1>(*this, *this); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::XYAB(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<0,1,4,5>(xyzw, abcd); }
|
||||
|
||||
template <typename T>
|
||||
Sk4x<T> Sk4x<T>::ZWCD(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<2,3,6,7>(xyzw, abcd); }
|
||||
|
||||
#endif // defined(SK4X_PRIVATE)
|
@ -6,25 +6,34 @@
|
||||
*/
|
||||
|
||||
#include "SkTileGrid.h"
|
||||
#include "Sk4x.h"
|
||||
|
||||
SkTileGrid::SkTileGrid(int xTiles, int yTiles, const SkTileGridFactory::TileGridInfo& info)
|
||||
: fXTiles(xTiles)
|
||||
, fYTiles(yTiles)
|
||||
, fInvWidth( SkScalarInvert(info.fTileInterval.width()))
|
||||
, fInvHeight(SkScalarInvert(info.fTileInterval.height()))
|
||||
, fMarginWidth (info.fMargin.fWidth +1) // Margin is offset by 1 as a provision for AA and
|
||||
, fMarginHeight(info.fMargin.fHeight+1) // to cancel the outset applied by getClipDeviceBounds.
|
||||
, fOffset(SkPoint::Make(info.fOffset.fX, info.fOffset.fY))
|
||||
, fNumTiles(xTiles * yTiles)
|
||||
, fGridBounds(SkRect::MakeWH(xTiles * info.fTileInterval.width(),
|
||||
yTiles * info.fTileInterval.height()))
|
||||
, fTiles(SkNEW_ARRAY(SkTDArray<unsigned>, xTiles * yTiles)) {}
|
||||
, fMargin(-info.fMargin.fWidth - 1, // Outset margin by 1 as a provision for AA and to
|
||||
-info.fMargin.fHeight - 1, // cancel the outset applied by getClipDeviceBounds().
|
||||
+info.fMargin.fWidth + 1,
|
||||
+info.fMargin.fHeight + 1)
|
||||
, fOffset(info.fOffset.fX,
|
||||
info.fOffset.fY,
|
||||
info.fOffset.fX - SK_ScalarNearlyZero, // We scrunch user-provided bounds in a little
|
||||
info.fOffset.fY - SK_ScalarNearlyZero) // to make right and bottom edges exclusive.
|
||||
, fUserToGrid(SkScalarInvert(info.fTileInterval.width()),
|
||||
SkScalarInvert(info.fTileInterval.height()),
|
||||
SkScalarInvert(info.fTileInterval.width()),
|
||||
SkScalarInvert(info.fTileInterval.height()))
|
||||
, fGridHigh(fXTiles - 1, yTiles - 1, fXTiles - 1, yTiles - 1)
|
||||
, fTiles(SkNEW_ARRAY(SkTDArray<unsigned>, fNumTiles)) {}
|
||||
|
||||
SkTileGrid::~SkTileGrid() {
|
||||
SkDELETE_ARRAY(fTiles);
|
||||
}
|
||||
|
||||
void SkTileGrid::reserve(unsigned opCount) {
|
||||
if (fXTiles * fYTiles == 0) {
|
||||
if (fNumTiles == 0) {
|
||||
return; // A tileless tile grid is nonsensical, but happens in at least cc_unittests.
|
||||
}
|
||||
|
||||
@ -34,9 +43,9 @@ void SkTileGrid::reserve(unsigned opCount) {
|
||||
|
||||
// If we take those observations and further assume the ops are distributed evenly
|
||||
// across the picture, we get this guess for number of ops per tile:
|
||||
const int opsPerTileGuess = (2 * opCount) / (fXTiles * fYTiles);
|
||||
const int opsPerTileGuess = (2 * opCount) / fNumTiles;
|
||||
|
||||
for (SkTDArray<unsigned>* tile = fTiles; tile != fTiles + (fXTiles * fYTiles); tile++) {
|
||||
for (SkTDArray<unsigned>* tile = fTiles; tile != fTiles + fNumTiles; tile++) {
|
||||
tile->setReserve(opsPerTileGuess);
|
||||
}
|
||||
|
||||
@ -45,39 +54,51 @@ void SkTileGrid::reserve(unsigned opCount) {
|
||||
}
|
||||
|
||||
void SkTileGrid::flushDeferredInserts() {
|
||||
for (SkTDArray<unsigned>* tile = fTiles; tile != fTiles + (fXTiles * fYTiles); tile++) {
|
||||
for (SkTDArray<unsigned>* tile = fTiles; tile != fTiles + fNumTiles; tile++) {
|
||||
tile->shrinkToFit();
|
||||
}
|
||||
}
|
||||
|
||||
// Adjustments to user-provided bounds common to both insert() and search().
|
||||
// Call this after making insert- or search- specific adjustments.
|
||||
void SkTileGrid::commonAdjust(SkRect* rect) const {
|
||||
// Apply our offset.
|
||||
rect->offset(fOffset);
|
||||
// Convert user-space bounds to grid tiles they cover (LT+RB both inclusive).
|
||||
// Out of bounds queries are clamped to the single nearest tile.
|
||||
void SkTileGrid::userToGrid(const Sk4f& user, SkIRect* out) const {
|
||||
// Map from user coordinates to grid tile coordinates.
|
||||
Sk4f grid = user.multiply(fUserToGrid);
|
||||
|
||||
// Scrunch the bounds in just a little to make the right and bottom edges
|
||||
// exclusive. We want bounds of exactly one tile to hit exactly one tile.
|
||||
rect->fRight -= SK_ScalarNearlyZero;
|
||||
rect->fBottom -= SK_ScalarNearlyZero;
|
||||
// Now that we're in grid coordinates, clamp to the grid bounds.
|
||||
grid = Sk4f::Max(grid, Sk4f(0,0,0,0));
|
||||
grid = Sk4f::Min(grid, fGridHigh);
|
||||
|
||||
// Truncate to integers.
|
||||
grid.cast<Sk4i>().store(&out->fLeft);
|
||||
}
|
||||
|
||||
// Convert user-space bounds to grid tiles they cover (LT and RB both inclusive).
|
||||
void SkTileGrid::userToGrid(const SkRect& user, SkIRect* grid) const {
|
||||
grid->fLeft = SkPin32(user.left() * fInvWidth , 0, fXTiles - 1);
|
||||
grid->fTop = SkPin32(user.top() * fInvHeight, 0, fYTiles - 1);
|
||||
grid->fRight = SkPin32(user.right() * fInvWidth , 0, fXTiles - 1);
|
||||
grid->fBottom = SkPin32(user.bottom() * fInvHeight, 0, fYTiles - 1);
|
||||
// If the rect is inverted, sort it.
|
||||
static Sk4f sorted(const Sk4f& ltrb) {
|
||||
// To sort:
|
||||
// left, right = minmax(left, right)
|
||||
// top, bottom = minmax(top, bottom)
|
||||
Sk4f rblt = ltrb.zwxy(),
|
||||
ltlt = Sk4f::Min(ltrb, rblt), // Holds (2 copies of) new left and top.
|
||||
rbrb = Sk4f::Max(ltrb, rblt), // Holds (2 copies of) new right and bottom.
|
||||
sort = Sk4f::XYAB(ltlt, rbrb);
|
||||
return sort;
|
||||
}
|
||||
|
||||
// Does this rect intersect the grid?
|
||||
bool SkTileGrid::intersectsGrid(const Sk4f& ltrb) const {
|
||||
SkRect bounds;
|
||||
ltrb.store(&bounds.fLeft);
|
||||
return SkRect::Intersects(bounds, fGridBounds);
|
||||
// TODO: If we can get it fast enough, write intersect using Sk4f.
|
||||
}
|
||||
|
||||
void SkTileGrid::insert(unsigned opIndex, const SkRect& originalBounds, bool) {
|
||||
SkRect bounds = originalBounds;
|
||||
bounds.outset(fMarginWidth, fMarginHeight);
|
||||
this->commonAdjust(&bounds);
|
||||
Sk4f bounds = Sk4f(&originalBounds.fLeft).add(fMargin).add(fOffset);
|
||||
SkASSERT(sorted(bounds).equal(bounds).allTrue());
|
||||
|
||||
// TODO(mtklein): can we assert this instead to save an intersection in Release mode,
|
||||
// or just allow out-of-bound insertions to insert anyway (clamped to nearest tile)?
|
||||
if (!SkRect::Intersects(bounds, fGridBounds)) {
|
||||
// TODO(mtklein): skip this check and just let out-of-bounds rects insert into nearest tile?
|
||||
if (!this->intersectsGrid(bounds)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -103,20 +124,11 @@ void SkTileGrid::insert(unsigned opIndex, const SkRect& originalBounds, bool) {
|
||||
static const int kStackAllocationTileCount = 1024;
|
||||
|
||||
void SkTileGrid::search(const SkRect& originalQuery, SkTDArray<unsigned>* results) const {
|
||||
// The inset counteracts the outset that applied in 'insert', which optimizes
|
||||
// for lookups of size 'tileInterval + 2 * margin' (aligned with the tile grid).
|
||||
SkRect query = originalQuery;
|
||||
query.inset(fMarginWidth, fMarginHeight);
|
||||
this->commonAdjust(&query);
|
||||
// The .subtract(fMargin) counteracts the .add(fMargin) applied in insert(),
|
||||
// which optimizes for lookups of size tileInterval + 2 * margin (aligned with the tile grid).
|
||||
// That .subtract(fMargin) may have inverted the rect, so we sort it.
|
||||
Sk4f query = sorted(Sk4f(&originalQuery.fLeft).subtract(fMargin).add(fOffset));
|
||||
|
||||
// The inset may have inverted the rectangle, so sort().
|
||||
// TODO(mtklein): It looks like we only end up with inverted bounds in unit tests
|
||||
// that make explicitly inverted queries, not from insetting. If we can drop support for
|
||||
// unsorted bounds (i.e. we don't see them outside unit tests), I think we can drop this.
|
||||
query.sort();
|
||||
|
||||
// No intersection check. We optimize for queries that are in bounds.
|
||||
// We're safe anyway: userToGrid() will clamp out-of-bounds queries to nearest tile.
|
||||
SkIRect grid;
|
||||
this->userToGrid(query, &grid);
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#ifndef SkTileGrid_DEFINED
|
||||
#define SkTileGrid_DEFINED
|
||||
|
||||
#include "Sk4x.h"
|
||||
#include "SkBBHFactory.h"
|
||||
#include "SkBBoxHierarchy.h"
|
||||
|
||||
@ -43,16 +44,16 @@ public:
|
||||
virtual void flushDeferredInserts() SK_OVERRIDE;
|
||||
|
||||
private:
|
||||
void commonAdjust(SkRect*) const;
|
||||
void userToGrid(const SkRect&, SkIRect* grid) const;
|
||||
void userToGrid(const Sk4f&, SkIRect*) const;
|
||||
bool intersectsGrid(const Sk4f&) const;
|
||||
|
||||
const int fXTiles, fYTiles;
|
||||
const SkScalar fInvWidth, fInvHeight;
|
||||
const SkScalar fMarginWidth, fMarginHeight;
|
||||
const SkPoint fOffset;
|
||||
const SkRect fGridBounds;
|
||||
const int fXTiles, // Number of tiles in a single row.
|
||||
fNumTiles; // Total number of tiles.
|
||||
|
||||
// (fXTiles * fYTiles) SkTDArrays, each listing ops overlapping that tile in order.
|
||||
const SkRect fGridBounds; // Only used for intersectsGrid(). Remove if that's removed.
|
||||
const Sk4f fMargin, fOffset, fUserToGrid, fGridHigh;
|
||||
|
||||
// fNumTiles SkTDArrays, each listing ops overlapping that tile in order.
|
||||
SkTDArray<unsigned>* fTiles;
|
||||
|
||||
typedef SkBBoxHierarchy INHERITED;
|
||||
|
Loading…
Reference in New Issue
Block a user