/* * Copyright 2021 Google LLC * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #ifndef QMath_DEFINED #define QMath_DEFINED template using V = T __attribute__((ext_vector_type(N))); #if !defined(__clang__) static_assert(false, "This only works on clang."); #endif #if defined(__SSSE3__) #include #endif #if defined(__ARM_NEON) // From section 5.5.5 of the ARM C Language Extensions (ACLE) #include #endif using Q15 = V<8, uint16_t>; // A pure C version of the ssse3 intrinsic mm_mulhrs_epi16; static inline Q15 simulate_ssse3_mm_mulhrs_epi16(Q15 a, Q15 b) { Q15 result; auto m = [](int16_t r, int16_t s) { const int32_t rounding = 1 << 14; int32_t temp = (int32_t)r * (int32_t)s + rounding; return (int16_t)(temp >> 15); }; for (int i = 0; i < 8; i++) { result[i] = m(a[i], b[i]); } return result; } // A pure C version of the neon intrinsic vqrdmulhq_s16; static inline Q15 simulate_neon_vqrdmulhq_s16(Q15 a, Q15 b) { Q15 result; const int esize = 16; auto m = [](int16_t r, int16_t s) { const int64_t rounding = 1 << (esize - 1); int64_t product = 2LL * (int64_t)r * (int64_t)s + rounding; int64_t result = product >> esize; // Saturate the result if (int64_t limit = (1LL << (esize - 1)) - 1; result > limit) { result = limit; } if (int64_t limit = -(1LL << (esize - 1)) ; result < limit) { result = limit; } return result; }; for (int i = 0; i < 8; i++) { result[i] = m(a[i], b[i]); } return result; } #endif // QMath_DEFINED