add constrained_add

This adds check to make sure that the results in the last add of the lerp are in range. Also, Smooth out types. Change-Id: I853835e530f6b6790e16464db12964d68ab9ef8d Reviewed-on: https://skia-review.googlesource.com/c/skia/+/453718 Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com> Commit-Queue: Herb Derby <herb@google.com>
2021-09-28 17:24:00 -04:00 · 2021-09-28 17:24:00 -04:00 · 83e99569bd
commit 83e99569bd
parent afa657d6ab
2 changed files with 26 additions and 23 deletions
--- a/experimental/lowp-basic/QMath.h
+++ b/experimental/lowp-basic/QMath.h
@ -23,11 +23,26 @@ static_assert(false, "This only works on clang.");
    #include <arm_neon.h>
 #endif
 #include <cassert>
 #include <cstdint>
 using Q15 = V<8, uint16_t>;
 using I16 = V<8, int16_t>;
 using U16 = V<8, uint16_t>;
 static inline U16 constrained_add(I16 a, U16 b) {
 for (size_t i = 0; i < 8; i++) {
    // Ensure that a + b is on the interval [0, UINT16_MAX]
    assert(-b[i] <= a[i] && a[i] <= UINT16_MAX - b[i]);
 }
    U16 answer = b + a;
    return answer;
 }
 // A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
-static inline Q15 simulate_ssse3_mm_mulhrs_epi16(Q15 a, Q15 b) {
+static inline I16 simulate_ssse3_mm_mulhrs_epi16(I16 a, I16 b) {
-    Q15 result;
+    I16 result;
    auto m = [](int16_t r, int16_t s) {
        const int32_t rounding = 1 << 14;
        int32_t temp = (int32_t)r * (int32_t)s + rounding;
--- a/experimental/lowp-basic/bilerp-study.cpp
+++ b/experimental/lowp-basic/bilerp-study.cpp
@ -65,37 +65,25 @@ static int16_t full_res_bilerp(
    return rounded >> 32;
 }
 // Change of parameters on t from [0, 1) to [-1, 1). This cuts the number if differences in half.
 static int16_t lerp(float t, int16_t a, int16_t b) {
    const int logPixelScale = 7;
    const uint16_t half = 1 << logPixelScale;
    // t on [-1, 1).
    Q15 qt (floor(t * 65536.0f - 32768.0f + 0.5f));
    // need to pick logPixelScale to scale by addition 1/2.
    Q15 qw ((b - a) << logPixelScale);
    Q15 qm ((a + b) << logPixelScale);
    Q15 answer = simulate_ssse3_mm_mulhrs_epi16(qt, qw) + qm;
    // Extra shift to divide by 2.
    return (answer[0] + half) >> (logPixelScale + 1);
 }
 static int16_t bilerp_1(float tx, float ty, int16_t p00, int16_t p10, int16_t p01, int16_t p11) {
    const int logPixelScale = 7;
    const int16_t half = 1 << logPixelScale;
-    Q15 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
+    I16 qtx = floor(tx * 65536.0f - 32768.0f + 0.5f);
-    Q15 qw = (p10 - p00) << logPixelScale;
+    I16 qw = (p10 - p00) << logPixelScale;
-    Q15 qm = (p10 + p00) << logPixelScale;
+    U16 qm = (p10 + p00) << logPixelScale;
-    Q15 top = (simulate_ssse3_mm_mulhrs_epi16(qtx, qw) + qm + 1) >> 1;
+    I16 top = (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
    qw = (p11 - p01) << logPixelScale;
    qm = (p11 + p01) << logPixelScale;
-    Q15 bottom = (simulate_ssse3_mm_mulhrs_epi16(qtx, qw) + qm + 1) >> 1;
+    I16 bottom =
            (I16)((U16)(constrained_add(simulate_ssse3_mm_mulhrs_epi16(qtx, qw), qm) + 1) >> 1);
-    Q15 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);
+    I16 qty = floor(ty * 65536.0f - 32768.0f + 0.5f);
    qw = bottom - top;
-    qm = bottom + top;
+    qm = (U16)bottom + (U16)top;
-    Q15 scaledAnswer = simulate_ssse3_mm_mulhrs_epi16(qty, qw) + qm;
+    U16 scaledAnswer = constrained_add(simulate_ssse3_mm_mulhrs_epi16(qty, qw), qm);
    return (scaledAnswer[0] + half) >> (logPixelScale + 1);
 }