add constrained_add
This adds check to make sure that the results in the last add of the lerp are in range. Also, Smooth out types. Change-Id: I853835e530f6b6790e16464db12964d68ab9ef8d Reviewed-on: https://skia-review.googlesource.com/c/skia/+/453718 Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com> Commit-Queue: Herb Derby <herb@google.com>
This commit is contained in:
parent
afa657d6ab
commit
83e99569bd
@ -23,11 +23,26 @@ static_assert(false, "This only works on clang.");
|
|||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
using Q15 = V<8, uint16_t>;
|
using Q15 = V<8, uint16_t>;
|
||||||
|
using I16 = V<8, int16_t>;
|
||||||
|
using U16 = V<8, uint16_t>;
|
||||||
|
|
||||||
|
|
||||||
|
static inline U16 constrained_add(I16 a, U16 b) {
|
||||||
|
for (size_t i = 0; i < 8; i++) {
|
||||||
|
// Ensure that a + b is on the interval [0, UINT16_MAX]
|
||||||
|
assert(-b[i] <= a[i] && a[i] <= UINT16_MAX - b[i]);
|
||||||
|
}
|
||||||
|
U16 answer = b + a;
|
||||||
|
return answer;
|
||||||
|
}
|
||||||
|
|
||||||
// A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
|
// A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
|
||||||
static inline Q15 simulate_ssse3_mm_mulhrs_epi16(Q15 a, Q15 b) {
|
static inline I16 simulate_ssse3_mm_mulhrs_epi16(I16 a, I16 b) {
|
||||||
Q15 result;
|
I16 result;
|
||||||
auto m = [](int16_t r, int16_t s) {
|
auto m = [](int16_t r, int16_t s) {
|
||||||
const int32_t rounding = 1 << 14;
|
const int32_t rounding = 1 << 14;
|
||||||
int32_t temp = (int32_t)r * (int32_t)s + rounding;
|
int32_t temp = (int32_t)r * (int32_t)s + rounding;
|
||||||
|
@ -65,37 +65,25 @@ static int16_t full_res_bilerp(
|
|||||||
return rounded >> 32;
|
return rounded >> 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Change of parameters on t from [0, 1) to [-1, 1). This cuts the number if differences in half.
|
|
||||||
static int16_t lerp(float t, int16_t a, int16_t b) {
|
|
||||||
const int logPixelScale = 7;
|
|
||||||
const uint16_t half = 1 << logPixelScale;
|
|
||||||
// t on [-1, 1).
|
|
||||||
Q15 qt (floor(t * 65536.0f - 32768.0f + 0.5f));
|
|
||||||
// need to pick logPixelScale to scale by addition 1/2.
|
|
||||||
Q15 qw ((b - a) << logPixelScale);
|
|
||||||
Q15 qm ((a + b) << logPixelScale);
|
|
||||||
Q15 answer = simulate_ssse3_mm_mulhrs_epi16(qt, qw) + qm;
|
|
||||||
// Extra shift to divide by 2.
|
|
||||||
return (answer[0] + half) >> (logPixelScale + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
|
static int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
|
||||||
const int logPixelScale = 7;
|
const int logPixelScale = 7;
|
||||||
const int16_t half = 1 << logPixelScale;
|
const int16_t half = 1 << logPixelScale;
|
||||||
Q15 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
|
I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
|
||||||
Q15 qw = (p10 - p00) << logPixelScale;
|
I16 qw = (p10 - p00) << logPixelScale;
|
||||||
Q15 qm = (p10 + p00) << logPixelScale;
|
U16 qm = (p10 + p00) << logPixelScale;
|
||||||
Q15 top = (simulate_ssse3_mm_mulhrs_epi16(qtx, qw) + qm + 1) >> 1;
|
I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
|
||||||
|
|
||||||
qw = (p11 - p01) << logPixelScale;
|
qw = (p11 - p01) << logPixelScale;
|
||||||
qm = (p11 + p01) << logPixelScale;
|
qm = (p11 + p01) << logPixelScale;
|
||||||
Q15 bottom = (simulate_ssse3_mm_mulhrs_epi16(qtx, qw) + qm + 1) >> 1;
|
I16 bottom =
|
||||||
|
(I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
|
||||||
|
|
||||||
Q15 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);
|
I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);
|
||||||
|
|
||||||
qw = bottom - top;
|
qw = bottom - top;
|
||||||
qm = bottom + top;
|
qm = (U16)bottom + (U16)top;
|
||||||
Q15 scaledAnswer = simulate_ssse3_mm_mulhrs_epi16(qty, qw) + qm;
|
U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm);
|
||||||
|
|
||||||
return (scaledAnswer[0] + half) >> (logPixelScale + 1);
|
return (scaledAnswer[0] + half) >> (logPixelScale + 1);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user