// Copyright 2021 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "src/base/overflowing-math.h" #include "src/wasm/compilation-environment.h" #include "test/cctest/cctest.h" #include "test/cctest/wasm/wasm-run-utils.h" #include "test/cctest/wasm/wasm-simd-utils.h" #include "test/common/wasm/flag-utils.h" #include "test/common/wasm/wasm-macro-gen.h" namespace v8 { namespace internal { namespace wasm { namespace test_run_wasm_relaxed_simd { // Use this for experimental relaxed-simd opcodes. #define WASM_RELAXED_SIMD_TEST(name) \ void RunWasm_##name##_Impl(TestExecutionTier execution_tier); \ TEST(RunWasm_##name##_turbofan) { \ if (!CpuFeatures::SupportsWasmSimd128()) return; \ EXPERIMENTAL_FLAG_SCOPE(relaxed_simd); \ RunWasm_##name##_Impl(TestExecutionTier::kTurbofan); \ } \ TEST(RunWasm_##name##_interpreter) { \ EXPERIMENTAL_FLAG_SCOPE(relaxed_simd); \ RunWasm_##name##_Impl(TestExecutionTier::kInterpreter); \ } \ void RunWasm_##name##_Impl(TestExecutionTier execution_tier) #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \ V8_TARGET_ARCH_PPC64 // Only used for qfma and qfms tests below. // FMOperation holds the params (a, b, c) for a Multiply-Add or // Multiply-Subtract operation, and the expected result if the operation was // fused, rounded only once for the entire operation, or unfused, rounded after // multiply and again after add/subtract. template struct FMOperation { const T a; const T b; const T c; const T fused_result; const T unfused_result; }; // large_n is large number that overflows T when multiplied by itself, this is a // useful constant to test fused/unfused behavior. template constexpr T large_n = T(0); template <> constexpr double large_n = 1e200; template <> constexpr float large_n = 1e20; // Fused Multiply-Add performs a + b * c. template static constexpr FMOperation qfma_array[] = { {1.0f, 2.0f, 3.0f, 7.0f, 7.0f}, // fused: a + b * c = -inf + (positive overflow) = -inf // unfused: a + b * c = -inf + inf = NaN {-std::numeric_limits::infinity(), large_n, large_n, -std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()}, // fused: a + b * c = inf + (negative overflow) = inf // unfused: a + b * c = inf + -inf = NaN {std::numeric_limits::infinity(), -large_n, large_n, std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()}, // NaN {std::numeric_limits::quiet_NaN(), 2.0f, 3.0f, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}, // -NaN {-std::numeric_limits::quiet_NaN(), 2.0f, 3.0f, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}}; template static constexpr base::Vector> qfma_vector() { return base::ArrayVector(qfma_array); } // Fused Multiply-Subtract performs a - b * c. template static constexpr FMOperation qfms_array[]{ {1.0f, 2.0f, 3.0f, -5.0f, -5.0f}, // fused: a - b * c = inf - (positive overflow) = inf // unfused: a - b * c = inf - inf = NaN {std::numeric_limits::infinity(), large_n, large_n, std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()}, // fused: a - b * c = -inf - (negative overflow) = -inf // unfused: a - b * c = -inf - -inf = NaN {-std::numeric_limits::infinity(), -large_n, large_n, -std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()}, // NaN {std::numeric_limits::quiet_NaN(), 2.0f, 3.0f, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}, // -NaN {-std::numeric_limits::quiet_NaN(), 2.0f, 3.0f, std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}}; template static constexpr base::Vector> qfms_vector() { return base::ArrayVector(qfms_array); } // Fused results only when fma3 feature is enabled, and running on TurboFan or // Liftoff (which can fall back to TurboFan if FMA is not implemented). bool ExpectFused(TestExecutionTier tier) { #ifdef V8_TARGET_ARCH_X64 return CpuFeatures::IsSupported(FMA3) && (tier == TestExecutionTier::kTurbofan || tier == TestExecutionTier::kLiftoff); #else return (tier == TestExecutionTier::kTurbofan || tier == TestExecutionTier::kLiftoff); #endif } #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || // V8_TARGET_ARCH_PPC64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \ V8_TARGET_ARCH_PPC64 WASM_RELAXED_SIMD_TEST(F32x4Qfma) { WasmRunner r(execution_tier); // Set up global to hold mask output. float* g = r.builder().AddGlobal(kWasmS128); // Build fn to splat test values, perform compare op, and write the result. byte value1 = 0, value2 = 1, value3 = 2; BUILD(r, WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMA( WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)), WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)), WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))), WASM_ONE); for (FMOperation x : qfma_vector()) { r.Call(x.a, x.b, x.c); float expected = ExpectFused(execution_tier) ? x.fused_result : x.unfused_result; for (int i = 0; i < 4; i++) { float actual = LANE(g, i); CheckFloatResult(x.a, x.b, expected, actual, true /* exact */); } } } WASM_RELAXED_SIMD_TEST(F32x4Qfms) { WasmRunner r(execution_tier); // Set up global to hold mask output. float* g = r.builder().AddGlobal(kWasmS128); // Build fn to splat test values, perform compare op, and write the result. byte value1 = 0, value2 = 1, value3 = 2; BUILD(r, WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMS( WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)), WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)), WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))), WASM_ONE); for (FMOperation x : qfms_vector()) { r.Call(x.a, x.b, x.c); float expected = ExpectFused(execution_tier) ? x.fused_result : x.unfused_result; for (int i = 0; i < 4; i++) { float actual = LANE(g, i); CheckFloatResult(x.a, x.b, expected, actual, true /* exact */); } } } WASM_RELAXED_SIMD_TEST(F64x2Qfma) { WasmRunner r(execution_tier); // Set up global to hold mask output. double* g = r.builder().AddGlobal(kWasmS128); // Build fn to splat test values, perform compare op, and write the result. byte value1 = 0, value2 = 1, value3 = 2; BUILD(r, WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMA( WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)), WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)), WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))), WASM_ONE); for (FMOperation x : qfma_vector()) { r.Call(x.a, x.b, x.c); double expected = ExpectFused(execution_tier) ? x.fused_result : x.unfused_result; for (int i = 0; i < 2; i++) { double actual = LANE(g, i); CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */); } } } WASM_RELAXED_SIMD_TEST(F64x2Qfms) { WasmRunner r(execution_tier); // Set up global to hold mask output. double* g = r.builder().AddGlobal(kWasmS128); // Build fn to splat test values, perform compare op, and write the result. byte value1 = 0, value2 = 1, value3 = 2; BUILD(r, WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMS( WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)), WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)), WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))), WASM_ONE); for (FMOperation x : qfms_vector()) { r.Call(x.a, x.b, x.c); double expected = ExpectFused(execution_tier) ? x.fused_result : x.unfused_result; for (int i = 0; i < 2; i++) { double actual = LANE(g, i); CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */); } } } #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || // V8_TARGET_ARCH_PPC64 WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) { RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip, false /* !exact */); } WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) { RunF32x4UnOpTest(execution_tier, kExprF32x4RecipSqrtApprox, base::RecipSqrt, false /* !exact */); } #if V8_TARGET_ARCH_X64 WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) { // Output is only defined for indices in the range [0,15]. WasmRunner r(execution_tier); static const int kElems = kSimd128Size / sizeof(uint8_t); uint8_t* dst = r.builder().AddGlobal(kWasmS128); uint8_t* src = r.builder().AddGlobal(kWasmS128); uint8_t* indices = r.builder().AddGlobal(kWasmS128); BUILD(r, WASM_GLOBAL_SET( 0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1), WASM_GLOBAL_GET(2))), WASM_ONE); for (int i = 0; i < kElems; i++) { LANE(src, i) = kElems - i - 1; LANE(indices, i) = kElems - i - 1; } CHECK_EQ(1, r.Call()); for (int i = 0; i < kElems; i++) { CHECK_EQ(LANE(dst, i), i); } } #endif // V8_TARGET_ARCH_X64 #undef WASM_RELAXED_SIMD_TEST } // namespace test_run_wasm_relaxed_simd } // namespace wasm } // namespace internal } // namespace v8