v8/test/cctest/wasm/test-run-wasm-relaxed-simd.cc
Ng Zhi An dd3234bcbb [wasm-relaxed-simd][ia32] Prototype relaxed min and max
Relaxed f32x4 and f64x2 min and max.

These instructions only guarantee results when the inputs are non nans,
and when the inputs are not 0s of opposite signs.

Drive-by rename of instruction codes to be Minps/Maxps/Minpd/Maxpd
since they map down exactly to a single instruction.

ia32 port of c3f346b7ac.

Bug: v8:12284
Change-Id: If64da551524ea8c304e1fa8f9cf4acbd54abfe5c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3251708
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77780}
2021-11-09 00:20:25 +00:00

419 lines
16 KiB
C++

// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <type_traits>
#include "src/base/overflowing-math.h"
#include "src/base/safe_conversions.h"
#include "src/common/globals.h"
#include "src/wasm/compilation-environment.h"
#include "test/cctest/cctest.h"
#include "test/cctest/wasm/wasm-run-utils.h"
#include "test/cctest/wasm/wasm-simd-utils.h"
#include "test/common/wasm/flag-utils.h"
#include "test/common/wasm/wasm-macro-gen.h"
namespace v8 {
namespace internal {
namespace wasm {
namespace test_run_wasm_relaxed_simd {
// Use this for experimental relaxed-simd opcodes.
#define WASM_RELAXED_SIMD_TEST(name) \
void RunWasm_##name##_Impl(TestExecutionTier execution_tier); \
TEST(RunWasm_##name##_turbofan) { \
if (!CpuFeatures::SupportsWasmSimd128()) return; \
EXPERIMENTAL_FLAG_SCOPE(relaxed_simd); \
RunWasm_##name##_Impl(TestExecutionTier::kTurbofan); \
} \
TEST(RunWasm_##name##_interpreter) { \
EXPERIMENTAL_FLAG_SCOPE(relaxed_simd); \
RunWasm_##name##_Impl(TestExecutionTier::kInterpreter); \
} \
void RunWasm_##name##_Impl(TestExecutionTier execution_tier)
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_PPC64
// Only used for qfma and qfms tests below.
// FMOperation holds the params (a, b, c) for a Multiply-Add or
// Multiply-Subtract operation, and the expected result if the operation was
// fused, rounded only once for the entire operation, or unfused, rounded after
// multiply and again after add/subtract.
template <typename T>
struct FMOperation {
const T a;
const T b;
const T c;
const T fused_result;
const T unfused_result;
};
// large_n is large number that overflows T when multiplied by itself, this is a
// useful constant to test fused/unfused behavior.
template <typename T>
constexpr T large_n = T(0);
template <>
constexpr double large_n<double> = 1e200;
template <>
constexpr float large_n<float> = 1e20;
// Fused Multiply-Add performs a + b * c.
template <typename T>
static constexpr FMOperation<T> qfma_array[] = {
{1.0f, 2.0f, 3.0f, 7.0f, 7.0f},
// fused: a + b * c = -inf + (positive overflow) = -inf
// unfused: a + b * c = -inf + inf = NaN
{-std::numeric_limits<T>::infinity(), large_n<T>, large_n<T>,
-std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
// fused: a + b * c = inf + (negative overflow) = inf
// unfused: a + b * c = inf + -inf = NaN
{std::numeric_limits<T>::infinity(), -large_n<T>, large_n<T>,
std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
// NaN
{std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()},
// -NaN
{-std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()}};
template <typename T>
static constexpr base::Vector<const FMOperation<T>> qfma_vector() {
return base::ArrayVector(qfma_array<T>);
}
// Fused Multiply-Subtract performs a - b * c.
template <typename T>
static constexpr FMOperation<T> qfms_array[]{
{1.0f, 2.0f, 3.0f, -5.0f, -5.0f},
// fused: a - b * c = inf - (positive overflow) = inf
// unfused: a - b * c = inf - inf = NaN
{std::numeric_limits<T>::infinity(), large_n<T>, large_n<T>,
std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
// fused: a - b * c = -inf - (negative overflow) = -inf
// unfused: a - b * c = -inf - -inf = NaN
{-std::numeric_limits<T>::infinity(), -large_n<T>, large_n<T>,
-std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
// NaN
{std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()},
// -NaN
{-std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()}};
template <typename T>
static constexpr base::Vector<const FMOperation<T>> qfms_vector() {
return base::ArrayVector(qfms_array<T>);
}
// Fused results only when fma3 feature is enabled, and running on TurboFan or
// Liftoff (which can fall back to TurboFan if FMA is not implemented).
bool ExpectFused(TestExecutionTier tier) {
#ifdef V8_TARGET_ARCH_X64
return CpuFeatures::IsSupported(FMA3) &&
(tier == TestExecutionTier::kTurbofan ||
tier == TestExecutionTier::kLiftoff);
#else
return (tier == TestExecutionTier::kTurbofan ||
tier == TestExecutionTier::kLiftoff);
#endif
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_PPC64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_PPC64
WASM_RELAXED_SIMD_TEST(F32x4Qfma) {
WasmRunner<int32_t, float, float, float> r(execution_tier);
// Set up global to hold mask output.
float* g = r.builder().AddGlobal<float>(kWasmS128);
// Build fn to splat test values, perform compare op, and write the result.
byte value1 = 0, value2 = 1, value3 = 2;
BUILD(r,
WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMA(
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)),
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)),
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))),
WASM_ONE);
for (FMOperation<float> x : qfma_vector<float>()) {
r.Call(x.a, x.b, x.c);
float expected =
ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
for (int i = 0; i < 4; i++) {
float actual = LANE(g, i);
CheckFloatResult(x.a, x.b, expected, actual, true /* exact */);
}
}
}
WASM_RELAXED_SIMD_TEST(F32x4Qfms) {
WasmRunner<int32_t, float, float, float> r(execution_tier);
// Set up global to hold mask output.
float* g = r.builder().AddGlobal<float>(kWasmS128);
// Build fn to splat test values, perform compare op, and write the result.
byte value1 = 0, value2 = 1, value3 = 2;
BUILD(r,
WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMS(
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)),
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)),
WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))),
WASM_ONE);
for (FMOperation<float> x : qfms_vector<float>()) {
r.Call(x.a, x.b, x.c);
float expected =
ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
for (int i = 0; i < 4; i++) {
float actual = LANE(g, i);
CheckFloatResult(x.a, x.b, expected, actual, true /* exact */);
}
}
}
WASM_RELAXED_SIMD_TEST(F64x2Qfma) {
WasmRunner<int32_t, double, double, double> r(execution_tier);
// Set up global to hold mask output.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test values, perform compare op, and write the result.
byte value1 = 0, value2 = 1, value3 = 2;
BUILD(r,
WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMA(
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)),
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)),
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))),
WASM_ONE);
for (FMOperation<double> x : qfma_vector<double>()) {
r.Call(x.a, x.b, x.c);
double expected =
ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
for (int i = 0; i < 2; i++) {
double actual = LANE(g, i);
CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */);
}
}
}
WASM_RELAXED_SIMD_TEST(F64x2Qfms) {
WasmRunner<int32_t, double, double, double> r(execution_tier);
// Set up global to hold mask output.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test values, perform compare op, and write the result.
byte value1 = 0, value2 = 1, value3 = 2;
BUILD(r,
WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMS(
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)),
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)),
WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))),
WASM_ONE);
for (FMOperation<double> x : qfms_vector<double>()) {
r.Call(x.a, x.b, x.c);
double expected =
ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
for (int i = 0; i < 2; i++) {
double actual = LANE(g, i);
CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */);
}
}
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_PPC64
WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) {
RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip,
false /* !exact */);
}
WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
RunF32x4UnOpTest(execution_tier, kExprF32x4RecipSqrtApprox, base::RecipSqrt,
false /* !exact */);
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
namespace {
// Helper to convert an array of T into an array of uint8_t to be used a v128
// constants.
template <typename T, size_t N = kSimd128Size / sizeof(T)>
std::array<uint8_t, kSimd128Size> as_uint8(const T* src) {
std::array<uint8_t, kSimd128Size> arr;
for (size_t i = 0; i < N; i++) {
WriteLittleEndianValue<T>(bit_cast<T*>(&arr[0]) + i, src[i]);
}
return arr;
}
template <typename T, int kElems>
void RelaxedLaneSelectTest(TestExecutionTier execution_tier, const T v1[kElems],
const T v2[kElems], const T s[kElems],
const T expected[kElems], WasmOpcode laneselect) {
auto lhs = as_uint8<T>(v1);
auto rhs = as_uint8<T>(v2);
auto mask = as_uint8<T>(s);
WasmRunner<int32_t> r(execution_tier);
T* dst = r.builder().AddGlobal<T>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(0, WASM_SIMD_OPN(laneselect, WASM_SIMD_CONSTANT(lhs),
WASM_SIMD_CONSTANT(rhs),
WASM_SIMD_CONSTANT(mask))),
WASM_ONE);
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(expected[i], LANE(dst, i));
}
}
} // namespace
WASM_RELAXED_SIMD_TEST(I8x16RelaxedLaneSelect) {
constexpr int kElems = 16;
constexpr uint8_t v1[kElems] = {0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15};
constexpr uint8_t v2[kElems] = {16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31};
constexpr uint8_t s[kElems] = {0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF,
0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF};
constexpr uint8_t expected[kElems] = {16, 1, 18, 3, 20, 5, 22, 7,
24, 9, 26, 11, 28, 13, 30, 15};
RelaxedLaneSelectTest<uint8_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI8x16RelaxedLaneSelect);
}
WASM_RELAXED_SIMD_TEST(I16x8RelaxedLaneSelect) {
constexpr int kElems = 8;
uint16_t v1[kElems] = {0, 1, 2, 3, 4, 5, 6, 7};
uint16_t v2[kElems] = {8, 9, 10, 11, 12, 13, 14, 15};
uint16_t s[kElems] = {0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF, 0, 0xFFFF};
constexpr uint16_t expected[kElems] = {8, 1, 10, 3, 12, 5, 14, 7};
RelaxedLaneSelectTest<uint16_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI16x8RelaxedLaneSelect);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedLaneSelect) {
constexpr int kElems = 4;
uint32_t v1[kElems] = {0, 1, 2, 3};
uint32_t v2[kElems] = {4, 5, 6, 7};
uint32_t s[kElems] = {0, 0xFFFF'FFFF, 0, 0xFFFF'FFFF};
constexpr uint32_t expected[kElems] = {4, 1, 6, 3};
RelaxedLaneSelectTest<uint32_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI32x4RelaxedLaneSelect);
}
WASM_RELAXED_SIMD_TEST(I64x2RelaxedLaneSelect) {
constexpr int kElems = 2;
uint64_t v1[kElems] = {0, 1};
uint64_t v2[kElems] = {2, 3};
uint64_t s[kElems] = {0, 0xFFFF'FFFF'FFFF'FFFF};
constexpr uint64_t expected[kElems] = {2, 1};
RelaxedLaneSelectTest<uint64_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI64x2RelaxedLaneSelect);
}
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMin) {
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMin, Minimum);
}
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMax) {
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMax, Maximum);
}
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMin, Minimum);
}
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
namespace {
// For relaxed trunc instructions, don't test out of range values.
// FloatType comes later so caller can rely on template argument deduction and
// just pass IntType.
template <typename IntType, typename FloatType>
typename std::enable_if<std::is_floating_point<FloatType>::value, bool>::type
ShouldSkipTestingConstant(FloatType x) {
return std::isnan(x) || !base::IsValueInRangeForNumericType<IntType>(x) ||
!PlatformCanRepresent(x);
}
template <typename IntType, typename FloatType>
void IntRelaxedTruncFloatTest(TestExecutionTier execution_tier,
WasmOpcode trunc_op, WasmOpcode splat_op) {
WasmRunner<int, FloatType> r(execution_tier);
IntType* g0 = r.builder().template AddGlobal<IntType>(kWasmS128);
constexpr int lanes = kSimd128Size / sizeof(FloatType);
// global[0] = trunc(splat(local[0])).
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_UNOP(trunc_op,
WASM_SIMD_UNOP(splat_op, WASM_LOCAL_GET(0)))),
WASM_ONE);
for (FloatType x : compiler::ValueHelper::GetVector<FloatType>()) {
if (ShouldSkipTestingConstant<IntType>(x)) continue;
CHECK_EQ(1, r.Call(x));
IntType expected = base::checked_cast<IntType>(x);
for (int i = 0; i < lanes; i++) {
CHECK_EQ(expected, LANE(g0, i));
}
}
}
} // namespace
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF64x2SZero) {
IntRelaxedTruncFloatTest<int32_t, double>(
execution_tier, kExprI32x4RelaxedTruncF64x2SZero, kExprF64x2Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF64x2UZero) {
IntRelaxedTruncFloatTest<uint32_t, double>(
execution_tier, kExprI32x4RelaxedTruncF64x2UZero, kExprF64x2Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4S) {
IntRelaxedTruncFloatTest<int32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4S, kExprF32x4Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4U) {
IntRelaxedTruncFloatTest<uint32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4U, kExprF32x4Splat);
}
#endif // V8_TARGET_ARCH_X64
#undef WASM_RELAXED_SIMD_TEST
} // namespace test_run_wasm_relaxed_simd
} // namespace wasm
} // namespace internal
} // namespace v8