PPC/s390: [turbofan] Further optimize DataView accesses.

Port 5fecd146bf

Original Commit Message:

    This adds support for unaligned load/store access to the DataView
    backing store and uses byteswap operations to fix up the endianess
    when necessary. This changes the Word32ReverseBytes operator to be
    a required operator and adds the missing support on the Intel and
    ARM platforms (on 64-bit platforms the Word64ReverseBytes operator
    is also mandatory now).

    This further improves the performance on the dataviewperf.js test
    mentioned in the tracking bug by up to 40%, and at the same time
    reduces the code complexity in the EffectControlLinearizer.

R=bmeurer@chromium.org, joransiu@ca.ibm.com, michael_dawson@ca.ibm.com
BUG=
LOG=N

Change-Id: Ia9aad21713a2ad76ce3ef2b816fc20e9a27fe4c9
Reviewed-on: https://chromium-review.googlesource.com/1174936
Reviewed-by: John Barboza <jbarboza@ca.ibm.com>
Commit-Queue: Junliang Yan <jyan@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#55132}
This commit is contained in:
Junliang Yan 2018-08-14 14:34:58 -04:00 committed by Commit Bot
parent fba8c5ecc2
commit 14170c67b3
5 changed files with 44 additions and 5 deletions

View File

@ -2058,6 +2058,35 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ATOMIC_BINOP_CASE(Xor, xor_)
#undef ATOMIC_BINOP_CASE
case kPPC_ByteRev32: {
Register input = i.InputRegister(0);
Register output = i.OutputRegister();
Register temp1 = r0;
__ rotlwi(temp1, input, 8);
__ rlwimi(temp1, input, 24, 0, 7);
__ rlwimi(temp1, input, 24, 16, 23);
__ extsw(output, temp1);
break;
}
#ifdef V8_TARGET_ARCH_PPC64
case kPPC_ByteRev64: {
Register input = i.InputRegister(0);
Register output = i.OutputRegister();
Register temp1 = r0;
Register temp2 = kScratchReg;
Register temp3 = i.InputRegister(1);
__ rldicl(temp1, input, 32, 32);
__ rotlwi(temp2, input, 8);
__ rlwimi(temp2, input, 24, 0, 7);
__ rotlwi(temp3, temp1, 8);
__ rlwimi(temp2, input, 24, 16, 23);
__ rlwimi(temp3, temp1, 24, 0, 7);
__ rlwimi(temp3, temp1, 24, 16, 23);
__ rldicr(temp2, temp2, 32, 31);
__ orx(output, temp2, temp3);
break;
}
#endif // V8_TARGET_ARCH_PPC64
default:
UNREACHABLE();
break;

View File

@ -124,7 +124,9 @@ namespace compiler {
V(PPC_StoreWord32) \
V(PPC_StoreWord64) \
V(PPC_StoreFloat32) \
V(PPC_StoreDouble)
V(PPC_StoreDouble) \
V(PPC_ByteRev32) \
V(PPC_ByteRev64)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes

View File

@ -109,6 +109,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kPPC_BitcastFloat32ToInt32:
case kPPC_BitcastInt64ToDouble:
case kPPC_BitcastDoubleToInt64:
case kPPC_ByteRev32:
case kPPC_ByteRev64:
return kNoOpcodeFlags;
case kPPC_LoadWordS8:

View File

@ -937,9 +937,17 @@ void InstructionSelector::VisitWord32ReverseBits(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64ReverseBits(Node* node) { UNREACHABLE(); }
#endif
void InstructionSelector::VisitWord64ReverseBytes(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64ReverseBytes(Node* node) {
PPCOperandGenerator g(this);
Emit(kPPC_ByteRev64, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.TempRegister());
}
void InstructionSelector::VisitWord32ReverseBytes(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
PPCOperandGenerator g(this);
Emit(kPPC_ByteRev32, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitSpeculationFence(Node* node) { UNREACHABLE(); }

View File

@ -2609,8 +2609,6 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesAway |
MachineOperatorBuilder::kWord32Popcnt |
MachineOperatorBuilder::kWord32ReverseBytes |
MachineOperatorBuilder::kWord64ReverseBytes |
MachineOperatorBuilder::kInt32AbsWithOverflow |
MachineOperatorBuilder::kInt64AbsWithOverflow |
MachineOperatorBuilder::kWord64Popcnt;