[ARM] Make Simd 128 bit load/store more like existing load/store.

- Renames kArmSimd128Load, kArmSimd128Store to kArmVld1S128,
  kArmVst1S128
- Handles the unaligned load/store cases.
LOG=N
BUG=v8:6020

Review-Url: https://codereview.chromium.org/2769083003
Cr-Commit-Position: refs/heads/master@{#44117}
This commit is contained in:
bbudge 2017-03-24 10:40:49 -07:00 committed by Commit bot
parent 14e01da1cf
commit 6839e7ac08
4 changed files with 38 additions and 28 deletions

View File

@ -1444,7 +1444,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(LeaveCC, i.OutputSBit()); DCHECK_EQ(LeaveCC, i.OutputSBit());
break; break;
case kArmVld1F64: { case kArmVld1F64: {
__ vld1(NeonSize::Neon8, NeonListOperand(i.OutputDoubleRegister()), __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
NeonMemOperand(i.InputRegister(0))); NeonMemOperand(i.InputRegister(0)));
break; break;
} }
@ -1453,6 +1453,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
NeonMemOperand(i.InputRegister(1))); NeonMemOperand(i.InputRegister(1)));
break; break;
} }
case kArmVld1S128: {
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
NeonMemOperand(i.InputRegister(0)));
break;
}
case kArmVst1S128: {
__ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
NeonMemOperand(i.InputRegister(1)));
break;
}
case kArmVldrF64: case kArmVldrF64:
__ vldr(i.OutputDoubleRegister(), i.InputOffset()); __ vldr(i.OutputDoubleRegister(), i.InputOffset());
DCHECK_EQ(LeaveCC, i.OutputSBit()); DCHECK_EQ(LeaveCC, i.OutputSBit());
@ -1992,18 +2002,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.OutputSimd128Register()); i.OutputSimd128Register());
break; break;
} }
case kArmSimd128Load: {
MemOperand src = i.InputOffset();
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
NeonMemOperand(src.rn(), src.rm()));
break;
}
case kArmSimd128Store: {
MemOperand src = i.InputOffset(1);
__ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
NeonMemOperand(src.rn(), src.rm()));
break;
}
case kArmSimd128And: { case kArmSimd128And: {
__ vand(i.OutputSimd128Register(), i.InputSimd128Register(0), __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1)); i.InputSimd128Register(1));

View File

@ -107,6 +107,8 @@ namespace compiler {
V(ArmVld1F64) \ V(ArmVld1F64) \
V(ArmVstrF64) \ V(ArmVstrF64) \
V(ArmVst1F64) \ V(ArmVst1F64) \
V(ArmVld1S128) \
V(ArmVst1S128) \
V(ArmFloat32Max) \ V(ArmFloat32Max) \
V(ArmFloat64Max) \ V(ArmFloat64Max) \
V(ArmFloat32Min) \ V(ArmFloat32Min) \
@ -213,8 +215,6 @@ namespace compiler {
V(ArmUint8x16LessThan) \ V(ArmUint8x16LessThan) \
V(ArmUint8x16LessThanOrEqual) \ V(ArmUint8x16LessThanOrEqual) \
V(ArmSimd128Zero) \ V(ArmSimd128Zero) \
V(ArmSimd128Load) \
V(ArmSimd128Store) \
V(ArmSimd128And) \ V(ArmSimd128And) \
V(ArmSimd128Or) \ V(ArmSimd128Or) \
V(ArmSimd128Xor) \ V(ArmSimd128Xor) \

View File

@ -199,8 +199,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmUint8x16LessThan: case kArmUint8x16LessThan:
case kArmUint8x16LessThanOrEqual: case kArmUint8x16LessThanOrEqual:
case kArmSimd128Zero: case kArmSimd128Zero:
case kArmSimd128Load:
case kArmSimd128Store:
case kArmSimd128And: case kArmSimd128And:
case kArmSimd128Or: case kArmSimd128Or:
case kArmSimd128Xor: case kArmSimd128Xor:
@ -217,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmVldrF32: case kArmVldrF32:
case kArmVldrF64: case kArmVldrF64:
case kArmVld1F64: case kArmVld1F64:
case kArmVld1S128:
case kArmLdrb: case kArmLdrb:
case kArmLdrsb: case kArmLdrsb:
case kArmLdrh: case kArmLdrh:
@ -227,6 +226,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmVstrF32: case kArmVstrF32:
case kArmVstrF64: case kArmVstrF64:
case kArmVst1F64: case kArmVst1F64:
case kArmVst1S128:
case kArmStrb: case kArmStrb:
case kArmStrh: case kArmStrh:
case kArmStr: case kArmStr:

View File

@ -427,7 +427,7 @@ void InstructionSelector::VisitLoad(Node* node) {
opcode = kArmLdr; opcode = kArmLdr;
break; break;
case MachineRepresentation::kSimd128: case MachineRepresentation::kSimd128:
opcode = kArmSimd128Load; opcode = kArmVld1S128;
break; break;
case MachineRepresentation::kWord64: // Fall through. case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd1x4: // Fall through. case MachineRepresentation::kSimd1x4: // Fall through.
@ -517,7 +517,7 @@ void InstructionSelector::VisitStore(Node* node) {
opcode = kArmStr; opcode = kArmStr;
break; break;
case MachineRepresentation::kSimd128: case MachineRepresentation::kSimd128:
opcode = kArmSimd128Store; opcode = kArmVst1S128;
break; break;
case MachineRepresentation::kWord64: // Fall through. case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd1x4: // Fall through. case MachineRepresentation::kSimd1x4: // Fall through.
@ -542,8 +542,8 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
} }
void InstructionSelector::VisitUnalignedLoad(Node* node) { void InstructionSelector::VisitUnalignedLoad(Node* node) {
UnalignedLoadRepresentation load_rep = MachineRepresentation load_rep =
UnalignedLoadRepresentationOf(node->op()); UnalignedLoadRepresentationOf(node->op()).representation();
ArmOperandGenerator g(this); ArmOperandGenerator g(this);
Node* base = node->InputAt(0); Node* base = node->InputAt(0);
Node* index = node->InputAt(1); Node* index = node->InputAt(1);
@ -551,16 +551,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
InstructionCode opcode = kArmLdr; InstructionCode opcode = kArmLdr;
// Only floating point loads need to be specially handled; integer loads // Only floating point loads need to be specially handled; integer loads
// support unaligned access. We support unaligned FP loads by loading to // support unaligned access. We support unaligned FP loads by loading to
// integer registers first, then moving to the destination FP register. // integer registers first, then moving to the destination FP register. If
switch (load_rep.representation()) { // NEON is supported, we use the vld1.8 instruction.
switch (load_rep) {
case MachineRepresentation::kFloat32: { case MachineRepresentation::kFloat32: {
InstructionOperand temp = g.TempRegister(); InstructionOperand temp = g.TempRegister();
EmitLoad(this, opcode, &temp, base, index); EmitLoad(this, opcode, &temp, base, index);
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp); Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
return; return;
} }
case MachineRepresentation::kFloat64: { case MachineRepresentation::kFloat64:
// Compute the address of the least-significant half of the FP value. case MachineRepresentation::kSimd128: {
// Compute the address of the least-significant byte of the FP value.
// We assume that the base node is unlikely to be an encodable immediate // We assume that the base node is unlikely to be an encodable immediate
// or the result of a shift operation, so only consider the addressing // or the result of a shift operation, so only consider the addressing
// mode that should be used for the index node. // mode that should be used for the index node.
@ -585,8 +587,12 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
if (CpuFeatures::IsSupported(NEON)) { if (CpuFeatures::IsSupported(NEON)) {
// With NEON we can load directly from the calculated address. // With NEON we can load directly from the calculated address.
Emit(kArmVld1F64, g.DefineAsRegister(node), addr); ArchOpcode op = load_rep == MachineRepresentation::kFloat64
? kArmVld1F64
: kArmVld1S128;
Emit(op, g.DefineAsRegister(node), addr);
} else { } else {
DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
// Load both halves and move to an FP register. // Load both halves and move to an FP register.
InstructionOperand fp_lo = g.TempRegister(); InstructionOperand fp_lo = g.TempRegister();
InstructionOperand fp_hi = g.TempRegister(); InstructionOperand fp_hi = g.TempRegister();
@ -619,6 +625,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
// Only floating point stores need to be specially handled; integer stores // Only floating point stores need to be specially handled; integer stores
// support unaligned access. We support unaligned FP stores by moving the // support unaligned access. We support unaligned FP stores by moving the
// value to integer registers first, then storing to the destination address. // value to integer registers first, then storing to the destination address.
// If NEON is supported, we use the vst1.8 instruction.
switch (store_rep) { switch (store_rep) {
case MachineRepresentation::kFloat32: { case MachineRepresentation::kFloat32: {
inputs[input_count++] = g.TempRegister(); inputs[input_count++] = g.TempRegister();
@ -627,7 +634,8 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
EmitStore(this, kArmStr, input_count, inputs, index); EmitStore(this, kArmStr, input_count, inputs, index);
return; return;
} }
case MachineRepresentation::kFloat64: { case MachineRepresentation::kFloat64:
case MachineRepresentation::kSimd128: {
if (CpuFeatures::IsSupported(NEON)) { if (CpuFeatures::IsSupported(NEON)) {
InstructionOperand address = g.TempRegister(); InstructionOperand address = g.TempRegister();
{ {
@ -653,8 +661,12 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
inputs[input_count++] = g.UseRegister(value); inputs[input_count++] = g.UseRegister(value);
inputs[input_count++] = address; inputs[input_count++] = address;
Emit(kArmVst1F64, 0, nullptr, input_count, inputs); ArchOpcode op = store_rep == MachineRepresentation::kFloat64
? kArmVst1F64
: kArmVst1S128;
Emit(op, 0, nullptr, input_count, inputs);
} else { } else {
DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
// Store a 64-bit floating point value using two 32-bit integer stores. // Store a 64-bit floating point value using two 32-bit integer stores.
// Computing the store address here would require three live temporary // Computing the store address here would require three live temporary
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after