[wasm-simd] Fix unsigned narrow instructions

These instructions should always treat inputs as signed, and saturate to
unsigned min/max values.

E.g. given -1, it should saturate to 0.

The spec text,
https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#integer-to-integer-narrowing,
has been updated to describe this.

The changes here include codegen changes to ia32, x64, arm, and arm64,
changes to arm simulator, assembler, and disassembler to handle the case
of treating input as signed and narrowing to unsigned. The vqmovn
instruction can handle this case, our assembler wasn't allowing callers
to specify this.

The interpreter and scalar lowering are also fixed with this change.

Bug: v8:9729
Change-Id: I6f72baa825f59037f7754485df6a2964af59fe31
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1879423
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
Reviewed-by: Bill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65051}
This commit is contained in:
Ng Zhi An 2019-10-28 11:05:16 -07:00 committed by Commit Bot
parent d30ec8566b
commit e927764216
13 changed files with 97 additions and 119 deletions

View File

@ -3690,17 +3690,19 @@ void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
0xA * B8 | m * B5 | B4 | vm);
}
void Assembler::vqmovn(NeonDataType dt, DwVfpRegister dst, QwNeonRegister src) {
void Assembler::vqmovn(NeonDataType dst_dt, NeonDataType src_dt,
DwVfpRegister dst, QwNeonRegister src) {
// Instruction details available in ARM DDI 0406C.b, A8.8.1004.
// vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
// vqmovun.<type><size> Dd, Qm. Same as above, but produces unsigned results.
DCHECK(IsEnabled(NEON));
DCHECK_IMPLIES(NeonU(src_dt), NeonU(dst_dt));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
int size = NeonSz(dt);
int u = NeonU(dt);
int op = u != 0 ? 3 : 2;
int size = NeonSz(dst_dt);
int op = NeonU(src_dt) ? 0b11 : NeonU(dst_dt) ? 0b01 : 0b10;
emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
0x2 * B8 | op * B6 | m * B5 | vm);
}

View File

@ -843,8 +843,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
const NeonMemOperand& dst);
// dt represents the narrower type
void vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src);
// dt represents the narrower type.
void vqmovn(NeonDataType dt, DwVfpRegister dst, QwNeonRegister src);
// dst_dt represents the narrower type, src_dt represents the src type.
void vqmovn(NeonDataType dst_dt, NeonDataType src_dt, DwVfpRegister dst,
QwNeonRegister src);
// Only unconditional core <-> scalar moves are currently supported.
void vmov(NeonDataType dt, DwVfpRegister dst, int index, Register src);

View File

@ -459,20 +459,20 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
DCHECK_EQ(LeaveCC, i.OutputSBit()); \
} while (0)
#define ASSEMBLE_NEON_NARROWING_OP(dt) \
#define ASSEMBLE_NEON_NARROWING_OP(dt, sdt) \
do { \
Simd128Register dst = i.OutputSimd128Register(), \
src0 = i.InputSimd128Register(0), \
src1 = i.InputSimd128Register(1); \
if (dst == src0 && dst == src1) { \
__ vqmovn(dt, dst.low(), src0); \
__ vqmovn(dt, sdt, dst.low(), src0); \
__ vmov(dst.high(), dst.low()); \
} else if (dst == src0) { \
__ vqmovn(dt, dst.low(), src0); \
__ vqmovn(dt, dst.high(), src1); \
__ vqmovn(dt, sdt, dst.low(), src0); \
__ vqmovn(dt, sdt, dst.high(), src1); \
} else { \
__ vqmovn(dt, dst.high(), src1); \
__ vqmovn(dt, dst.low(), src0); \
__ vqmovn(dt, sdt, dst.high(), src1); \
__ vqmovn(dt, sdt, dst.low(), src0); \
} \
} while (0)
@ -2259,7 +2259,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI16x8SConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonS16);
ASSEMBLE_NEON_NARROWING_OP(NeonS16, NeonS16);
break;
case kArmI16x8Add: {
__ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
@ -2343,7 +2343,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI16x8UConvertI32x4:
ASSEMBLE_NEON_NARROWING_OP(NeonU16);
ASSEMBLE_NEON_NARROWING_OP(NeonU16, NeonS16);
break;
case kArmI16x8AddSaturateU: {
__ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
@ -2415,7 +2415,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI8x16SConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonS8);
ASSEMBLE_NEON_NARROWING_OP(NeonS8, NeonS8);
break;
case kArmI8x16Add: {
__ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
@ -2485,7 +2485,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kArmI8x16UConvertI16x8:
ASSEMBLE_NEON_NARROWING_OP(NeonU8);
ASSEMBLE_NEON_NARROWING_OP(NeonU8, NeonS8);
break;
case kArmI8x16AddSaturateU: {
__ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),

View File

@ -2245,8 +2245,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(temp, src1.V4S());
src1 = temp;
}
__ Uqxtn(dst.V4H(), src0.V4S());
__ Uqxtn2(dst.V8H(), src1.V4S());
__ Sqxtun(dst.V4H(), src0.V4S());
__ Sqxtun2(dst.V8H(), src1.V4S());
break;
}
SIMD_BINOP_CASE(kArm64I16x8AddSaturateU, Uqadd, 8H);
@ -2347,8 +2347,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Mov(temp, src1.V8H());
src1 = temp;
}
__ Uqxtn(dst.V8B(), src0.V8H());
__ Uqxtn2(dst.V16B(), src1.V8H());
__ Sqxtun(dst.V8B(), src0.V8H());
__ Sqxtun2(dst.V16B(), src1.V8H());
break;
}
SIMD_BINOP_CASE(kArm64I8x16AddSaturateU, Uqadd, 16B);

View File

@ -3063,25 +3063,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kSSEI16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFFFFFF
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ pminud(dst, kScratchDoubleReg);
__ pminud(kScratchDoubleReg, i.InputOperand(1));
__ packusdw(dst, kScratchDoubleReg);
__ packusdw(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXI16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFFFFFF
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
__ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
__ vpackusdw(dst, dst, kScratchDoubleReg);
__ vpackusdw(dst, dst, i.InputOperand(1));
break;
}
case kSSEI16x8AddSaturateU: {
@ -3481,24 +3470,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFF
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ psrlw(kScratchDoubleReg, 1);
__ pminuw(dst, kScratchDoubleReg);
__ pminuw(kScratchDoubleReg, i.InputOperand(1));
__ packuswb(dst, kScratchDoubleReg);
__ packuswb(dst, i.InputOperand(1));
break;
}
case kAVXI8x16UConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFF
__ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
__ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
__ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
__ vpackuswb(dst, dst, kScratchDoubleReg);
__ vpackuswb(dst, dst, i.InputOperand(1));
break;
}
case kSSEI8x16AddSaturateU: {

View File

@ -3306,13 +3306,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I16x8UConvertI32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFFFFFF
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ pminud(dst, kScratchDoubleReg);
__ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
__ packusdw(dst, kScratchDoubleReg);
__ packusdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I16x8AddSaturateU: {
@ -3524,13 +3518,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I8x16UConvertI16x8: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
// Change negative lanes to 0x7FFF
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ psrlw(kScratchDoubleReg, 1);
__ pminuw(dst, kScratchDoubleReg);
__ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
__ packuswb(dst, kScratchDoubleReg);
__ packuswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I8x16ShrU: {

View File

@ -779,11 +779,9 @@ void SimdScalarLowering::LowerPack(Node* node, SimdType input_rep_type,
DCHECK_EQ(2, node->InputCount());
Node** rep_left = GetReplacementsWithType(node->InputAt(0), input_rep_type);
Node** rep_right = GetReplacementsWithType(node->InputAt(1), input_rep_type);
const Operator* less_op =
is_signed ? machine()->Int32LessThan() : machine()->Uint32LessThan();
const Operator* less_op = machine()->Int32LessThan();
Node* min = nullptr;
Node* max = nullptr;
int32_t shift_val = 0;
MachineRepresentation phi_rep;
if (output_rep_type == SimdType::kInt16x8) {
DCHECK(input_rep_type == SimdType::kInt32x4);
@ -791,8 +789,8 @@ void SimdScalarLowering::LowerPack(Node* node, SimdType input_rep_type,
min = mcgraph_->Int32Constant(std::numeric_limits<int16_t>::min());
max = mcgraph_->Int32Constant(std::numeric_limits<int16_t>::max());
} else {
min = mcgraph_->Int32Constant(std::numeric_limits<uint16_t>::min());
max = mcgraph_->Uint32Constant(std::numeric_limits<uint16_t>::max());
shift_val = kShift16;
}
phi_rep = MachineRepresentation::kWord16;
} else {
@ -802,8 +800,8 @@ void SimdScalarLowering::LowerPack(Node* node, SimdType input_rep_type,
min = mcgraph_->Int32Constant(std::numeric_limits<int8_t>::min());
max = mcgraph_->Int32Constant(std::numeric_limits<int8_t>::max());
} else {
min = mcgraph_->Int32Constant(std::numeric_limits<uint8_t>::min());
max = mcgraph_->Uint32Constant(std::numeric_limits<uint8_t>::max());
shift_val = kShift8;
}
phi_rep = MachineRepresentation::kWord8;
}
@ -815,14 +813,10 @@ void SimdScalarLowering::LowerPack(Node* node, SimdType input_rep_type,
input = rep_left[i];
else
input = rep_right[i - num_lanes / 2];
if (is_signed) {
Diamond d_min(graph(), common(), graph()->NewNode(less_op, input, min));
input = d_min.Phi(phi_rep, min, input);
}
Diamond d_min(graph(), common(), graph()->NewNode(less_op, input, min));
input = d_min.Phi(phi_rep, min, input);
Diamond d_max(graph(), common(), graph()->NewNode(less_op, max, input));
rep_node[i] = d_max.Phi(phi_rep, max, input);
rep_node[i] =
is_signed ? rep_node[i] : FixUpperBits(rep_node[i], shift_val);
}
ReplaceNode(node, rep_node, num_lanes);
}

View File

@ -2235,13 +2235,15 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
PrintDRegister(Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
instr->Bits(7, 6) != 0) {
// vqmovn.<type><size> Dd, Qm.
// vqmov{u}n.<type><size> Dd, Qm.
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
char type = instr->Bit(6) != 0 ? 'u' : 's';
int op = instr->Bits(7, 6);
const char* name = op == 0b01 ? "vqmovun" : "vqmovn";
char type = op == 0b11 ? 'u' : 's';
int size = 2 * kBitsPerByte * (1 << instr->Bits(19, 18));
out_buffer_pos_ +=
SNPrintF(out_buffer_ + out_buffer_pos_, "vqmovn.%c%i d%d, q%d",
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.%c%i d%d, q%d", name,
type, size, Vd, Vm);
} else {
int Vd, Vm;

View File

@ -3912,6 +3912,18 @@ void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {
simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
}
template <typename T, typename U>
void SaturatingUnsignedNarrow(Simulator* simulator, int Vd, int Vm) {
static const int kLanes = 16 / sizeof(T);
T src[kLanes];
U dst[kLanes];
simulator->get_neon_register(Vm, src);
for (int i = 0; i < kLanes; i++) {
dst[i] = Clamp<U>(src[i]);
}
simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
}
template <typename T>
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
static const int kLanes = 16 / sizeof(T);
@ -5332,27 +5344,35 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int Vd = instr->VFPDRegValue(kDoublePrecision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
bool is_unsigned = instr->Bit(6) != 0;
bool dst_unsigned = instr->Bit(6) != 0;
bool src_unsigned = instr->Bit(7, 6) == 0b11;
DCHECK_IMPLIES(src_unsigned, dst_unsigned);
switch (size) {
case Neon8: {
if (is_unsigned) {
if (src_unsigned) {
SaturatingNarrow<uint16_t, uint8_t>(this, Vd, Vm);
} else if (dst_unsigned) {
SaturatingUnsignedNarrow<int16_t, uint8_t>(this, Vd, Vm);
} else {
SaturatingNarrow<int16_t, int8_t>(this, Vd, Vm);
}
break;
}
case Neon16: {
if (is_unsigned) {
if (src_unsigned) {
SaturatingNarrow<uint32_t, uint16_t>(this, Vd, Vm);
} else if (dst_unsigned) {
SaturatingUnsignedNarrow<int32_t, uint16_t>(this, Vd, Vm);
} else {
SaturatingNarrow<int32_t, int16_t>(this, Vd, Vm);
}
break;
}
case Neon32: {
if (is_unsigned) {
if (src_unsigned) {
SaturatingNarrow<uint64_t, uint32_t>(this, Vd, Vm);
} else if (dst_unsigned) {
SaturatingUnsignedNarrow<int64_t, uint32_t>(this, Vd, Vm);
} else {
SaturatingNarrow<int64_t, int32_t>(this, Vd, Vm);
}

View File

@ -2515,34 +2515,28 @@ class ThreadImpl {
CONVERT_CASE(I16x8UConvertI8x16Low, int16, i8x16, int8, 8, 0, uint8_t,
a)
#undef CONVERT_CASE
#define PACK_CASE(op, src_type, name, dst_type, count, ctype, dst_ctype, \
is_unsigned) \
case kExpr##op: { \
WasmValue v2 = Pop(); \
WasmValue v1 = Pop(); \
src_type s1 = v1.to_s128().to_##name(); \
src_type s2 = v2.to_s128().to_##name(); \
dst_type res; \
int64_t min = std::numeric_limits<ctype>::min(); \
int64_t max = std::numeric_limits<ctype>::max(); \
for (size_t i = 0; i < count; ++i) { \
int32_t v = i < count / 2 ? s1.val[LANE(i, s1)] \
: s2.val[LANE(i - count / 2, s2)]; \
int64_t a = is_unsigned ? static_cast<int64_t>(v & 0xFFFFFFFFu) : v; \
res.val[LANE(i, res)] = \
static_cast<dst_ctype>(std::max(min, std::min(max, a))); \
} \
Push(WasmValue(Simd128(res))); \
return true; \
#define PACK_CASE(op, src_type, name, dst_type, count, ctype, dst_ctype) \
case kExpr##op: { \
WasmValue v2 = Pop(); \
WasmValue v1 = Pop(); \
src_type s1 = v1.to_s128().to_##name(); \
src_type s2 = v2.to_s128().to_##name(); \
dst_type res; \
int64_t min = std::numeric_limits<ctype>::min(); \
int64_t max = std::numeric_limits<ctype>::max(); \
for (size_t i = 0; i < count; ++i) { \
int64_t v = i < count / 2 ? s1.val[LANE(i, s1)] \
: s2.val[LANE(i - count / 2, s2)]; \
res.val[LANE(i, res)] = \
static_cast<dst_ctype>(std::max(min, std::min(max, v))); \
} \
Push(WasmValue(Simd128(res))); \
return true; \
}
PACK_CASE(I16x8SConvertI32x4, int4, i32x4, int8, 8, int16_t, int16_t,
false)
PACK_CASE(I16x8UConvertI32x4, int4, i32x4, int8, 8, uint16_t, int16_t,
true)
PACK_CASE(I8x16SConvertI16x8, int8, i16x8, int16, 16, int8_t, int8_t,
false)
PACK_CASE(I8x16UConvertI16x8, int8, i16x8, int16, 16, uint8_t, int8_t,
true)
PACK_CASE(I16x8SConvertI32x4, int4, i32x4, int8, 8, int16_t, int16_t)
PACK_CASE(I16x8UConvertI32x4, int4, i32x4, int8, 8, uint16_t, int16_t)
PACK_CASE(I8x16SConvertI16x8, int8, i16x8, int16, 16, int8_t, int8_t)
PACK_CASE(I8x16UConvertI16x8, int8, i16x8, int16, 16, uint8_t, int8_t)
#undef PACK_CASE
case kExprS128Select: {
int4 bool_val = Pop().to_s128().to_i32x4();

View File

@ -1335,12 +1335,12 @@ TEST(15) {
__ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, vmovl_s32))));
__ vst1(Neon8, NeonListOperand(q3), NeonMemOperand(r4));
// Narrow what we widened.
__ vqmovn(NeonU16, d0, q2);
__ vqmovn(NeonU16, NeonU16, d0, q2);
__ vstr(d0, r0, offsetof(T, vqmovn_u16));
__ vmov(d1, d0);
__ vqmovn(NeonS8, d2, q0);
__ vqmovn(NeonS8, NeonS8, d2, q0);
__ vstr(d2, r0, offsetof(T, vqmovn_s8));
__ vqmovn(NeonS32, d4, q3);
__ vqmovn(NeonS32, NeonS32, d4, q3);
__ vstr(d4, r0, offsetof(T, vqmovn_s32));
// ARM core register to scalar.

View File

@ -997,9 +997,14 @@ TEST(Neon) {
COMPARE(vmovl(NeonS16, q4, d2), "f2908a12 vmovl.s16 q4, d2");
COMPARE(vmovl(NeonU32, q4, d2), "f3a08a12 vmovl.u32 q4, d2");
COMPARE(vqmovn(NeonU8, d16, q8), "f3f202e0 vqmovn.u16 d16, q8");
COMPARE(vqmovn(NeonS16, d16, q8), "f3f602a0 vqmovn.s32 d16, q8");
COMPARE(vqmovn(NeonU32, d2, q4), "f3ba22c8 vqmovn.u64 d2, q4");
COMPARE(vqmovn(NeonU8, NeonU8, d16, q8),
"f3f202e0 vqmovn.u16 d16, q8");
COMPARE(vqmovn(NeonS16, NeonS16, d16, q8),
"f3f602a0 vqmovn.s32 d16, q8");
COMPARE(vqmovn(NeonU32, NeonU32, d2, q4),
"f3ba22c8 vqmovn.u64 d2, q4");
COMPARE(vqmovn(NeonU32, NeonS32, d2, q4),
"f3ba2248 vqmovun.s64 d2, q4");
COMPARE(vmov(NeonS8, d0, 0, r0), "ee400b10 vmov.8 d0[0], r0");
COMPARE(vmov(NeonU8, d1, 1, r1), "ee411b30 vmov.8 d1[1], r1");

View File

@ -227,13 +227,6 @@ T Narrow(int64_t value) {
return Clamp<T>(value);
}
template <typename T>
T UnsignedNarrow(int64_t value) {
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
using UnsignedT = typename std::make_unsigned<T>::type;
return static_cast<T>(Clamp<UnsignedT>(value & 0xFFFFFFFFu));
}
template <typename T>
T AddSaturate(T a, T b) {
return Clamp<T>(Widen(a) + Widen(b));
@ -2034,7 +2027,7 @@ WASM_SIMD_TEST(I16x8ConvertI32x4) {
FOR_INT32_INPUTS(x) {
r.Call(x);
int16_t expected_signed = Narrow<int16_t>(x);
int16_t expected_unsigned = UnsignedNarrow<int16_t>(x);
int16_t expected_unsigned = Narrow<uint16_t>(x);
for (int i = 0; i < 8; i++) {
CHECK_EQ(expected_signed, ReadLittleEndianValue<int16_t>(&g0[i]));
CHECK_EQ(expected_unsigned, ReadLittleEndianValue<int16_t>(&g1[i]));
@ -2277,7 +2270,7 @@ WASM_SIMD_TEST(I8x16ConvertI16x8) {
FOR_INT16_INPUTS(x) {
r.Call(x);
int8_t expected_signed = Narrow<int8_t>(x);
int8_t expected_unsigned = UnsignedNarrow<int8_t>(x);
int8_t expected_unsigned = Narrow<uint8_t>(x);
for (int i = 0; i < 16; i++) {
CHECK_EQ(expected_signed, ReadLittleEndianValue<int8_t>(&g0[i]));
CHECK_EQ(expected_unsigned, ReadLittleEndianValue<int8_t>(&g1[i]));