[wasm-simd] Fix f64x2 replace lane

There are a couple of bugs here:

1. The immediate used for vinsertps is wrong when lane == 1, the first
two bits specify which element of the source is copied, and it should
always be 00, 01 to copy the first 2 lanes of source.
2. For both cases, the second insertps call should be using dst as the
src, since dst was already updated by the first insertps call, it was
incorrectly using the old value of src. This was probably working
correctly because in many cases dst and src happened to be the same
register.
3. rep cannot be same as dst, because dst is overwritten, and rep should
stay the same

I also modified the F64x2ReplaceLane to test separately for replacing
lane 0 and lane 1.

Fixed bug 3. for arm and arm64.

Bug: v8:9728
Change-Id: Iec6e48bcfbc7d27908dd86d5f113a8b5dedd499b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1877055
Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64620}
This commit is contained in:
Ng Zhi An 2019-10-23 14:32:34 -07:00 committed by Commit Bot
parent 3c6ecf716d
commit f3cbcdb24b
5 changed files with 18 additions and 13 deletions

View File

@ -133,7 +133,7 @@ void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
int32_t imm = OpParameter<int32_t>(node->op());
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
g.UseRegister(node->InputAt(1)));
g.UseUniqueRegister(node->InputAt(1)));
}
template <IrOpcode::Value kOpcode, int kImmMin, int kImmMax,

View File

@ -179,7 +179,7 @@ void VisitRRIR(InstructionSelector* selector, ArchOpcode opcode, Node* node) {
int32_t imm = OpParameter<int32_t>(node->op());
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(imm),
g.UseRegister(node->InputAt(1)));
g.UseUniqueRegister(node->InputAt(1)));
}
struct ExtendingLoadMatcher {

View File

@ -1881,14 +1881,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
DoubleRegister rep = i.InputDoubleRegister(2);
DCHECK_NE(dst, rep);
DCHECK_LT(lane, 2);
if (lane == 0) {
__ vinsertps(dst, src, rep, 0b00000000);
__ vinsertps(dst, src, rep, 0b01010000);
__ vinsertps(dst, dst, rep, 0b01010000);
} else {
__ vinsertps(dst, src, rep, 0b10100000);
__ vinsertps(dst, src, rep, 0b11110000);
__ vinsertps(dst, src, rep, 0b00100000);
__ vinsertps(dst, dst, rep, 0b01110000);
}
break;
}

View File

@ -2211,7 +2211,7 @@ VISIT_SIMD_REPLACE_LANE(F32x4)
InstructionOperand operand0 = g.UseRegister(node->InputAt(0)); \
InstructionOperand operand1 = \
g.UseImmediate(OpParameter<int32_t>(node->op())); \
InstructionOperand operand2 = g.UseRegister(node->InputAt(1)); \
InstructionOperand operand2 = g.UseUniqueRegister(node->InputAt(1)); \
if (IsSupported(AVX)) { \
Emit(kAVX##Type##ReplaceLane, g.DefineAsRegister(node), operand0, \
operand1, operand2); \

View File

@ -1136,21 +1136,25 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
WasmRunner<int32_t> r(execution_tier, lower_simd);
// Set up a global to hold input/output vector.
double* g = r.builder().AddGlobal<double>(kWasmS128);
// Set up globals to hold input/output vector.
double* g0 = r.builder().AddGlobal<double>(kWasmS128);
double* g1 = r.builder().AddGlobal<double>(kWasmS128);
// Build function to replace each lane with its (FP) index.
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e100))),
WASM_SET_LOCAL(temp1, WASM_SIMD_F64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_F64(0.0f))),
// Replace lane 0.
WASM_SET_GLOBAL(0, WASM_SIMD_F64x2_REPLACE_LANE(
0, WASM_GET_LOCAL(temp1), WASM_F64(0.0f))),
// Replace lane 1.
WASM_SET_GLOBAL(1, WASM_SIMD_F64x2_REPLACE_LANE(
1, WASM_GET_LOCAL(temp1), WASM_F64(1.0f))),
WASM_ONE);
r.Call();
for (int i = 0; i < 2; i++) {
CHECK_EQ(static_cast<double>(i), ReadLittleEndianValue<double>(&g[i]));
}
CHECK_EQ(0., ReadLittleEndianValue<double>(&g0[0]));
CHECK_EQ(1e100, ReadLittleEndianValue<double>(&g0[1]));
CHECK_EQ(1e100, ReadLittleEndianValue<double>(&g1[0]));
CHECK_EQ(1., ReadLittleEndianValue<double>(&g1[1]));
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64