skia2/experimental/lowp-basic/QMath.h

/*
 * Copyright 2021 Google LLC
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#ifndef QMath_DEFINED
#define QMath_DEFINED

template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));

#if !defined(__clang__)
static_assert(false, "This only works on clang.");
#endif

#if defined(__SSSE3__)
#include <immintrin.h>
#endif

#if defined(__ARM_NEON)
// From section 5.5.5 of the ARM C Language Extensions (ACLE)
    #include <arm_neon.h>
#endif

using Q15 = V<8, uint16_t>;

// A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
static inline Q15 simulate_ssse3_mm_mulhrs_epi16(Q15 a, Q15 b) {
    Q15 result;
    auto m = [](int16_t r, int16_t s) {
        const int32_t rounding = 1 << 14;
        int32_t temp = (int32_t)r * (int32_t)s + rounding;
        return (int16_t)(temp >> 15);
    };
    for (int i = 0; i < 8; i++) {
        result[i] = m(a[i], b[i]);
    }
    return result;
}

// A pure C version of the neon intrinsic vqrdmulhq_s16;
static inline Q15 simulate_neon_vqrdmulhq_s16(Q15 a, Q15 b) {
    Q15 result;
    const int esize = 16;
    auto m = [](int16_t r, int16_t s) {
        const int64_t rounding = 1 << (esize - 1);
        int64_t product = 2LL * (int64_t)r * (int64_t)s + rounding;
        int64_t result = product >> esize;

        // Saturate the result
        if (int64_t limit =  (1LL << (esize - 1)) - 1; result > limit) { result = limit; }
        if (int64_t limit = -(1LL << (esize - 1))    ; result < limit) { result = limit; }
        return result;
    };
    for (int i = 0; i < 8; i++) {
        result[i] = m(a[i], b[i]);
    }
    return result;
}

#endif  // QMath_DEFINED
math and error rates for lerp Study that compares scaled integer lerp to float lerp. This shows an error rate of 0.38% with a max difference of 1. Study to compare scaled values the would result as the intermediate values of a bilerp. This shows an error rate of 24% with a max difference of 1. Change-Id: Id27b6baa9ce56efedb30f490a3f84ccf808315fa Reviewed-on: https://skia-review.googlesource.com/c/skia/+/447183 Reviewed-by: Brian Osman <brianosman@google.com> 2021-09-09 14:34:13 +00:00			`/*`
			`* Copyright 2021 Google LLC`
			`*`
			`* Use of this source code is governed by a BSD-style license that can be`
			`* found in the LICENSE file.`
			`*/`

			`#ifndef QMath_DEFINED`
			`#define QMath_DEFINED`

			`template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));`

			`#if !defined(__clang__)`
			`static_assert(false, "This only works on clang.");`
			`#endif`

			`#if defined(__SSSE3__)`
			`#include <immintrin.h>`
			`#endif`

			`#if defined(__ARM_NEON)`
			`// From section 5.5.5 of the ARM C Language Extensions (ACLE)`
			`#include <arm_neon.h>`
			`#endif`

			`using Q15 = V<8, uint16_t>;`

			`// A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;`
			`static inline Q15 simulate_ssse3_mm_mulhrs_epi16(Q15 a, Q15 b) {`
			`Q15 result;`
			`auto m = [](int16_t r, int16_t s) {`
			`const int32_t rounding = 1 << 14;`
			`int32_t temp = (int32_t)r * (int32_t)s + rounding;`
			`return (int16_t)(temp >> 15);`
			`};`
			`for (int i = 0; i < 8; i++) {`
			`result[i] = m(a[i], b[i]);`
			`}`
			`return result;`
			`}`

			`// A pure C version of the neon intrinsic vqrdmulhq_s16;`
			`static inline Q15 simulate_neon_vqrdmulhq_s16(Q15 a, Q15 b) {`
			`Q15 result;`
			`const int esize = 16;`
			`auto m = [](int16_t r, int16_t s) {`
			`const int64_t rounding = 1 << (esize - 1);`
			`int64_t product = 2LL * (int64_t)r * (int64_t)s + rounding;`
			`int64_t result = product >> esize;`

			`// Saturate the result`
			`if (int64_t limit = (1LL << (esize - 1)) - 1; result > limit) { result = limit; }`
			`if (int64_t limit = -(1LL << (esize - 1)) ; result < limit) { result = limit; }`
			`return result;`
			`};`
			`for (int i = 0; i < 8; i++) {`
			`result[i] = m(a[i], b[i]);`
			`}`
			`return result;`
			`}`

			`#endif // QMath_DEFINED`