[ARM] Make Simd 128 bit load/store more like existing load/store.
- Renames kArmSimd128Load, kArmSimd128Store to kArmVld1S128, kArmVst1S128 - Handles the unaligned load/store cases. LOG=N BUG=v8:6020 Review-Url: https://codereview.chromium.org/2769083003 Cr-Commit-Position: refs/heads/master@{#44117}
This commit is contained in:
parent
14e01da1cf
commit
6839e7ac08
@ -1444,7 +1444,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
break;
|
break;
|
||||||
case kArmVld1F64: {
|
case kArmVld1F64: {
|
||||||
__ vld1(NeonSize::Neon8, NeonListOperand(i.OutputDoubleRegister()),
|
__ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
|
||||||
NeonMemOperand(i.InputRegister(0)));
|
NeonMemOperand(i.InputRegister(0)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1453,6 +1453,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
NeonMemOperand(i.InputRegister(1)));
|
NeonMemOperand(i.InputRegister(1)));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case kArmVld1S128: {
|
||||||
|
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
|
||||||
|
NeonMemOperand(i.InputRegister(0)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case kArmVst1S128: {
|
||||||
|
__ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
|
||||||
|
NeonMemOperand(i.InputRegister(1)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case kArmVldrF64:
|
case kArmVldrF64:
|
||||||
__ vldr(i.OutputDoubleRegister(), i.InputOffset());
|
__ vldr(i.OutputDoubleRegister(), i.InputOffset());
|
||||||
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
DCHECK_EQ(LeaveCC, i.OutputSBit());
|
||||||
@ -1992,18 +2002,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
|||||||
i.OutputSimd128Register());
|
i.OutputSimd128Register());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case kArmSimd128Load: {
|
|
||||||
MemOperand src = i.InputOffset();
|
|
||||||
__ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
|
|
||||||
NeonMemOperand(src.rn(), src.rm()));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case kArmSimd128Store: {
|
|
||||||
MemOperand src = i.InputOffset(1);
|
|
||||||
__ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
|
|
||||||
NeonMemOperand(src.rn(), src.rm()));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case kArmSimd128And: {
|
case kArmSimd128And: {
|
||||||
__ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
__ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
|
||||||
i.InputSimd128Register(1));
|
i.InputSimd128Register(1));
|
||||||
|
@ -107,6 +107,8 @@ namespace compiler {
|
|||||||
V(ArmVld1F64) \
|
V(ArmVld1F64) \
|
||||||
V(ArmVstrF64) \
|
V(ArmVstrF64) \
|
||||||
V(ArmVst1F64) \
|
V(ArmVst1F64) \
|
||||||
|
V(ArmVld1S128) \
|
||||||
|
V(ArmVst1S128) \
|
||||||
V(ArmFloat32Max) \
|
V(ArmFloat32Max) \
|
||||||
V(ArmFloat64Max) \
|
V(ArmFloat64Max) \
|
||||||
V(ArmFloat32Min) \
|
V(ArmFloat32Min) \
|
||||||
@ -213,8 +215,6 @@ namespace compiler {
|
|||||||
V(ArmUint8x16LessThan) \
|
V(ArmUint8x16LessThan) \
|
||||||
V(ArmUint8x16LessThanOrEqual) \
|
V(ArmUint8x16LessThanOrEqual) \
|
||||||
V(ArmSimd128Zero) \
|
V(ArmSimd128Zero) \
|
||||||
V(ArmSimd128Load) \
|
|
||||||
V(ArmSimd128Store) \
|
|
||||||
V(ArmSimd128And) \
|
V(ArmSimd128And) \
|
||||||
V(ArmSimd128Or) \
|
V(ArmSimd128Or) \
|
||||||
V(ArmSimd128Xor) \
|
V(ArmSimd128Xor) \
|
||||||
|
@ -199,8 +199,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmUint8x16LessThan:
|
case kArmUint8x16LessThan:
|
||||||
case kArmUint8x16LessThanOrEqual:
|
case kArmUint8x16LessThanOrEqual:
|
||||||
case kArmSimd128Zero:
|
case kArmSimd128Zero:
|
||||||
case kArmSimd128Load:
|
|
||||||
case kArmSimd128Store:
|
|
||||||
case kArmSimd128And:
|
case kArmSimd128And:
|
||||||
case kArmSimd128Or:
|
case kArmSimd128Or:
|
||||||
case kArmSimd128Xor:
|
case kArmSimd128Xor:
|
||||||
@ -217,6 +215,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmVldrF32:
|
case kArmVldrF32:
|
||||||
case kArmVldrF64:
|
case kArmVldrF64:
|
||||||
case kArmVld1F64:
|
case kArmVld1F64:
|
||||||
|
case kArmVld1S128:
|
||||||
case kArmLdrb:
|
case kArmLdrb:
|
||||||
case kArmLdrsb:
|
case kArmLdrsb:
|
||||||
case kArmLdrh:
|
case kArmLdrh:
|
||||||
@ -227,6 +226,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
|
|||||||
case kArmVstrF32:
|
case kArmVstrF32:
|
||||||
case kArmVstrF64:
|
case kArmVstrF64:
|
||||||
case kArmVst1F64:
|
case kArmVst1F64:
|
||||||
|
case kArmVst1S128:
|
||||||
case kArmStrb:
|
case kArmStrb:
|
||||||
case kArmStrh:
|
case kArmStrh:
|
||||||
case kArmStr:
|
case kArmStr:
|
||||||
|
@ -427,7 +427,7 @@ void InstructionSelector::VisitLoad(Node* node) {
|
|||||||
opcode = kArmLdr;
|
opcode = kArmLdr;
|
||||||
break;
|
break;
|
||||||
case MachineRepresentation::kSimd128:
|
case MachineRepresentation::kSimd128:
|
||||||
opcode = kArmSimd128Load;
|
opcode = kArmVld1S128;
|
||||||
break;
|
break;
|
||||||
case MachineRepresentation::kWord64: // Fall through.
|
case MachineRepresentation::kWord64: // Fall through.
|
||||||
case MachineRepresentation::kSimd1x4: // Fall through.
|
case MachineRepresentation::kSimd1x4: // Fall through.
|
||||||
@ -517,7 +517,7 @@ void InstructionSelector::VisitStore(Node* node) {
|
|||||||
opcode = kArmStr;
|
opcode = kArmStr;
|
||||||
break;
|
break;
|
||||||
case MachineRepresentation::kSimd128:
|
case MachineRepresentation::kSimd128:
|
||||||
opcode = kArmSimd128Store;
|
opcode = kArmVst1S128;
|
||||||
break;
|
break;
|
||||||
case MachineRepresentation::kWord64: // Fall through.
|
case MachineRepresentation::kWord64: // Fall through.
|
||||||
case MachineRepresentation::kSimd1x4: // Fall through.
|
case MachineRepresentation::kSimd1x4: // Fall through.
|
||||||
@ -542,8 +542,8 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
||||||
UnalignedLoadRepresentation load_rep =
|
MachineRepresentation load_rep =
|
||||||
UnalignedLoadRepresentationOf(node->op());
|
UnalignedLoadRepresentationOf(node->op()).representation();
|
||||||
ArmOperandGenerator g(this);
|
ArmOperandGenerator g(this);
|
||||||
Node* base = node->InputAt(0);
|
Node* base = node->InputAt(0);
|
||||||
Node* index = node->InputAt(1);
|
Node* index = node->InputAt(1);
|
||||||
@ -551,16 +551,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
|||||||
InstructionCode opcode = kArmLdr;
|
InstructionCode opcode = kArmLdr;
|
||||||
// Only floating point loads need to be specially handled; integer loads
|
// Only floating point loads need to be specially handled; integer loads
|
||||||
// support unaligned access. We support unaligned FP loads by loading to
|
// support unaligned access. We support unaligned FP loads by loading to
|
||||||
// integer registers first, then moving to the destination FP register.
|
// integer registers first, then moving to the destination FP register. If
|
||||||
switch (load_rep.representation()) {
|
// NEON is supported, we use the vld1.8 instruction.
|
||||||
|
switch (load_rep) {
|
||||||
case MachineRepresentation::kFloat32: {
|
case MachineRepresentation::kFloat32: {
|
||||||
InstructionOperand temp = g.TempRegister();
|
InstructionOperand temp = g.TempRegister();
|
||||||
EmitLoad(this, opcode, &temp, base, index);
|
EmitLoad(this, opcode, &temp, base, index);
|
||||||
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
|
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case MachineRepresentation::kFloat64: {
|
case MachineRepresentation::kFloat64:
|
||||||
// Compute the address of the least-significant half of the FP value.
|
case MachineRepresentation::kSimd128: {
|
||||||
|
// Compute the address of the least-significant byte of the FP value.
|
||||||
// We assume that the base node is unlikely to be an encodable immediate
|
// We assume that the base node is unlikely to be an encodable immediate
|
||||||
// or the result of a shift operation, so only consider the addressing
|
// or the result of a shift operation, so only consider the addressing
|
||||||
// mode that should be used for the index node.
|
// mode that should be used for the index node.
|
||||||
@ -585,8 +587,12 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
|||||||
|
|
||||||
if (CpuFeatures::IsSupported(NEON)) {
|
if (CpuFeatures::IsSupported(NEON)) {
|
||||||
// With NEON we can load directly from the calculated address.
|
// With NEON we can load directly from the calculated address.
|
||||||
Emit(kArmVld1F64, g.DefineAsRegister(node), addr);
|
ArchOpcode op = load_rep == MachineRepresentation::kFloat64
|
||||||
|
? kArmVld1F64
|
||||||
|
: kArmVld1S128;
|
||||||
|
Emit(op, g.DefineAsRegister(node), addr);
|
||||||
} else {
|
} else {
|
||||||
|
DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
|
||||||
// Load both halves and move to an FP register.
|
// Load both halves and move to an FP register.
|
||||||
InstructionOperand fp_lo = g.TempRegister();
|
InstructionOperand fp_lo = g.TempRegister();
|
||||||
InstructionOperand fp_hi = g.TempRegister();
|
InstructionOperand fp_hi = g.TempRegister();
|
||||||
@ -619,6 +625,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
|||||||
// Only floating point stores need to be specially handled; integer stores
|
// Only floating point stores need to be specially handled; integer stores
|
||||||
// support unaligned access. We support unaligned FP stores by moving the
|
// support unaligned access. We support unaligned FP stores by moving the
|
||||||
// value to integer registers first, then storing to the destination address.
|
// value to integer registers first, then storing to the destination address.
|
||||||
|
// If NEON is supported, we use the vst1.8 instruction.
|
||||||
switch (store_rep) {
|
switch (store_rep) {
|
||||||
case MachineRepresentation::kFloat32: {
|
case MachineRepresentation::kFloat32: {
|
||||||
inputs[input_count++] = g.TempRegister();
|
inputs[input_count++] = g.TempRegister();
|
||||||
@ -627,7 +634,8 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
|||||||
EmitStore(this, kArmStr, input_count, inputs, index);
|
EmitStore(this, kArmStr, input_count, inputs, index);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case MachineRepresentation::kFloat64: {
|
case MachineRepresentation::kFloat64:
|
||||||
|
case MachineRepresentation::kSimd128: {
|
||||||
if (CpuFeatures::IsSupported(NEON)) {
|
if (CpuFeatures::IsSupported(NEON)) {
|
||||||
InstructionOperand address = g.TempRegister();
|
InstructionOperand address = g.TempRegister();
|
||||||
{
|
{
|
||||||
@ -653,8 +661,12 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
|||||||
|
|
||||||
inputs[input_count++] = g.UseRegister(value);
|
inputs[input_count++] = g.UseRegister(value);
|
||||||
inputs[input_count++] = address;
|
inputs[input_count++] = address;
|
||||||
Emit(kArmVst1F64, 0, nullptr, input_count, inputs);
|
ArchOpcode op = store_rep == MachineRepresentation::kFloat64
|
||||||
|
? kArmVst1F64
|
||||||
|
: kArmVst1S128;
|
||||||
|
Emit(op, 0, nullptr, input_count, inputs);
|
||||||
} else {
|
} else {
|
||||||
|
DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
|
||||||
// Store a 64-bit floating point value using two 32-bit integer stores.
|
// Store a 64-bit floating point value using two 32-bit integer stores.
|
||||||
// Computing the store address here would require three live temporary
|
// Computing the store address here would require three live temporary
|
||||||
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
|
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
|
||||||
|
Loading…
Reference in New Issue
Block a user