s390: TF Optimize 32-bit Mul/Div/Mod/Popcnt

R=joransiu@ca.ibm.com, bjaideep@ca.ibm.com
BUG=

Review-Url: https://codereview.chromium.org/2662963002
Cr-Commit-Position: refs/heads/master@{#42791}
This commit is contained in:
jyan 2017-01-30 13:13:18 -08:00 committed by Commit bot
parent d293656481
commit 0d9b0dcf43
8 changed files with 227 additions and 94 deletions

View File

@ -1531,22 +1531,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_MulHigh32:
ASSEMBLE_BIN_OP(MulHigh32, MulHigh32, MulHigh32);
break;
case kS390_Mul32WithHigh32:
__ LoadRR(r1, i.InputRegister(0));
__ mr_z(r0, i.InputRegister(1));
__ LoadW(i.OutputRegister(0), r1); // low
__ LoadW(i.OutputRegister(1), r0); // high
break;
case kS390_MulHighU32:
__ LoadRR(r1, i.InputRegister(0));
if (HasRegisterInput(instr, 1)) {
__ mlr(r0, i.InputRegister(1));
} else if (HasStackSlotInput(instr, 1)) {
__ ml(r0, i.InputStackSlot32(1));
} else {
UNIMPLEMENTED();
}
__ LoadlW(i.OutputRegister(), r0);
ASSEMBLE_BIN_OP(MulHighU32, MulHighU32, MulHighU32);
break;
case kS390_MulFloat:
// Ensure we don't clobber right
@ -1576,20 +1562,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
#endif
case kS390_Div32: {
AddressingMode mode = AddressingModeField::decode(instr->opcode());
__ lgfr(r1, i.InputRegister(0));
if (mode != kMode_None) {
size_t first_index = 1;
MemOperand operand = i.MemoryOperand(&mode, &first_index);
__ dsgf(r0, operand);
} else if (HasRegisterInput(instr, 1)) {
__ dsgfr(r0, i.InputRegister(1));
} else if (HasStackSlotInput(instr, 1)) {
__ dsgf(r0, i.InputStackSlot32(1));
} else {
UNREACHABLE();
}
__ LoadlW(i.OutputRegister(), r1);
ASSEMBLE_BIN_OP(Div32, Div32, Div32);
break;
}
#if V8_TARGET_ARCH_S390X
@ -1601,21 +1574,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
#endif
case kS390_DivU32: {
__ lr(r0, i.InputRegister(0));
__ srdl(r0, Operand(32));
AddressingMode mode = AddressingModeField::decode(instr->opcode());
if (mode != kMode_None) {
size_t first_index = 1;
MemOperand operand = i.MemoryOperand(&mode, &first_index);
__ dl(r0, operand);
} else if (HasRegisterInput(instr, 1)) {
__ dlr(r0, i.InputRegister(1));
} else if (HasStackSlotInput(instr, 1)) {
__ dl(r0, i.InputStackSlot32(1));
} else {
UNREACHABLE();
}
__ LoadlW(i.OutputRegister(), r1);
ASSEMBLE_BIN_OP(DivU32, DivU32, DivU32);
break;
}
case kS390_DivFloat:
@ -1643,10 +1602,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kS390_Mod32:
ASSEMBLE_MODULO(dr, srda);
ASSEMBLE_BIN_OP(Mod32, Mod32, Mod32);
break;
case kS390_ModU32:
ASSEMBLE_MODULO(dlr, srdl);
ASSEMBLE_BIN_OP(ModU32, ModU32, ModU32);
break;
#if V8_TARGET_ARCH_S390X
case kS390_Mod64:
@ -1799,14 +1758,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kS390_Cntlz32: {
__ llgfr(i.OutputRegister(), i.InputRegister(0));
__ flogr(r0, i.OutputRegister());
__ LoadRR(i.OutputRegister(), r0);
__ SubP(i.OutputRegister(), Operand(32));
} break;
__ Add32(i.OutputRegister(), r0, Operand(-32));
// No need to zero-ext b/c llgfr is done already
break;
}
#if V8_TARGET_ARCH_S390X
case kS390_Cntlz64: {
__ flogr(r0, i.InputRegister(0));
__ LoadRR(i.OutputRegister(), r0);
} break;
break;
}
#endif
case kS390_Popcnt32:
__ Popcnt32(i.OutputRegister(), i.InputRegister(0));

View File

@ -48,7 +48,6 @@ namespace compiler {
V(S390_MulPair) \
V(S390_Mul32) \
V(S390_Mul32WithOverflow) \
V(S390_Mul32WithHigh32) \
V(S390_Mul64) \
V(S390_MulHigh32) \
V(S390_MulHighU32) \

View File

@ -49,7 +49,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_SubDouble:
case kS390_Mul32:
case kS390_Mul32WithOverflow:
case kS390_Mul32WithHigh32:
case kS390_Mul64:
case kS390_MulHigh32:
case kS390_MulHighU32:

View File

@ -246,8 +246,11 @@ bool AutoZeroExtendsWord32ToWord64(Node* node) {
case IrOpcode::kInt32Div:
case IrOpcode::kUint32Div:
case IrOpcode::kInt32MulHigh:
case IrOpcode::kUint32MulHigh:
case IrOpcode::kInt32Mod:
case IrOpcode::kUint32Mod:
case IrOpcode::kWord32Clz:
case IrOpcode::kWord32Popcnt:
return true;
default:
return false;
@ -276,6 +279,7 @@ bool ZeroExtendsWord32ToWord64(Node* node) {
case IrOpcode::kInt32MulHigh:
case IrOpcode::kInt32Mod:
case IrOpcode::kUint32Mod:
case IrOpcode::kWord32Popcnt:
return true;
// TODO(john.yan): consider the following case to be valid
// case IrOpcode::kWord32Equal:
@ -1148,9 +1152,7 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
#endif
void InstructionSelector::VisitWord32Clz(Node* node) {
S390OperandGenerator g(this);
Emit(kS390_Cntlz32, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
VisitRR(this, kS390_Cntlz32, node);
}
#if V8_TARGET_ARCH_S390X
@ -1163,8 +1165,8 @@ void InstructionSelector::VisitWord64Clz(Node* node) {
void InstructionSelector::VisitWord32Popcnt(Node* node) {
S390OperandGenerator g(this);
Emit(kS390_Popcnt32, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)));
Node* value = node->InputAt(0);
Emit(kS390_Popcnt32, g.DefineAsRegister(node), g.UseRegister(value));
}
#if V8_TARGET_ARCH_S390X
@ -1331,15 +1333,8 @@ void InstructionSelector::VisitInt32MulHigh(Node* node) {
}
void InstructionSelector::VisitUint32MulHigh(Node* node) {
S390OperandGenerator g(this);
Int32BinopMatcher m(node);
Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
Emit(kS390_MulHighU32, g.DefineAsRegister(node), g.UseRegister(left),
g.Use(right));
VisitBin32op(this, node, kS390_MulHighU32,
OperandMode::kAllowRRM | OperandMode::kAllowRRR);
}
void InstructionSelector::VisitInt32Div(Node* node) {
@ -1365,7 +1360,8 @@ void InstructionSelector::VisitUint64Div(Node* node) {
#endif
void InstructionSelector::VisitInt32Mod(Node* node) {
VisitRRR(this, kS390_Mod32, node);
VisitBin32op(this, node, kS390_Mod32,
OperandMode::kAllowRRM | OperandMode::kAllowRRR);
}
#if V8_TARGET_ARCH_S390X
@ -1375,7 +1371,8 @@ void InstructionSelector::VisitInt64Mod(Node* node) {
#endif
void InstructionSelector::VisitUint32Mod(Node* node) {
VisitRRR(this, kS390_ModU32, node);
VisitBin32op(this, node, kS390_ModU32,
OperandMode::kAllowRRM | OperandMode::kAllowRRR);
}
#if V8_TARGET_ARCH_S390X

View File

@ -724,6 +724,15 @@ bool Decoder::DecodeFourByte(Instruction* instr) {
case LLGFR:
Format(instr, "llgfr\t'r5,'r6");
break;
case POPCNT_Z:
Format(instr, "popcnt\t'r5,'r6");
break;
case LLGCR:
Format(instr, "llgcr\t'r5,'r6");
break;
case LLCR:
Format(instr, "llcr\t'r5,'r6");
break;
case LBR:
Format(instr, "lbr\t'r5,'r6");
break;

View File

@ -3280,34 +3280,68 @@ void MacroAssembler::Mul32(Register dst, const Operand& src1) {
msfi(dst, src1);
}
#define Generate_MulHigh32(instr) \
{ \
lgfr(dst, src1); \
instr(dst, src2); \
srlg(dst, dst, Operand(32)); \
}
void MacroAssembler::MulHigh32(Register dst, Register src1,
const MemOperand& src2) {
lgfr(dst, src1);
msgf(dst, src2);
srlg(dst, dst, Operand(32));
Generate_MulHigh32(msgf);
}
void MacroAssembler::MulHigh32(Register dst, Register src1, Register src2) {
if (dst.is(src2)) {
std::swap(src1, src2);
}
lgfr(dst, src1);
msgfr(dst, src2);
srlg(dst, dst, Operand(32));
Generate_MulHigh32(msgfr);
}
void MacroAssembler::MulHigh32(Register dst, Register src1,
const Operand& src2) {
lgfr(dst, src1);
msgfi(dst, src2);
srlg(dst, dst, Operand(32));
Generate_MulHigh32(msgfi);
}
#undef Generate_MulHigh32
#define Generate_MulHighU32(instr) \
{ \
lr(r1, src1); \
instr(r0, src2); \
LoadlW(dst, r0); \
}
void MacroAssembler::MulHighU32(Register dst, Register src1,
const MemOperand& src2) {
Generate_MulHighU32(ml);
}
void MacroAssembler::MulHighU32(Register dst, Register src1, Register src2) {
Generate_MulHighU32(mlr);
}
void MacroAssembler::MulHighU32(Register dst, Register src1,
const Operand& src2) {
USE(dst);
USE(src1);
USE(src2);
UNREACHABLE();
}
#undef Generate_MulHighU32
#define Generate_Mul32WithOverflowIfCCUnequal(instr) \
{ \
lgfr(dst, src1); \
instr(dst, src2); \
cgfr(dst, dst); \
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const MemOperand& src2) {
lgfr(dst, src1);
msgf(dst, src2);
cgfr(dst, dst);
Generate_Mul32WithOverflowIfCCUnequal(msgf);
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
@ -3315,18 +3349,16 @@ void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
if (dst.is(src2)) {
std::swap(src1, src2);
}
lgfr(dst, src1);
msgfr(dst, src2);
cgfr(dst, dst);
Generate_Mul32WithOverflowIfCCUnequal(msgfr);
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const Operand& src2) {
lgfr(dst, src1);
msgfi(dst, src2);
cgfr(dst, dst);
Generate_Mul32WithOverflowIfCCUnequal(msgfi);
}
#undef Generate_Mul32WithOverflowIfCCUnequal
void MacroAssembler::Mul64(Register dst, const MemOperand& src1) {
if (is_int20(src1.offset())) {
msg(dst, src1);
@ -3362,6 +3394,108 @@ void MacroAssembler::DivP(Register dividend, Register divider) {
#endif
}
#define Generate_Div32(instr) \
{ \
lgfr(r1, src1); \
instr(r0, src2); \
LoadlW(dst, r1); \
}
void MacroAssembler::Div32(Register dst, Register src1,
const MemOperand& src2) {
Generate_Div32(dsgf);
}
void MacroAssembler::Div32(Register dst, Register src1, Register src2) {
Generate_Div32(dsgfr);
}
void MacroAssembler::Div32(Register dst, Register src1, const Operand& src2) {
USE(dst);
USE(src1);
USE(src2);
UNREACHABLE();
}
#undef Generate_Div32
#define Generate_DivU32(instr) \
{ \
lr(r0, src1); \
srdl(r0, Operand(32)); \
instr(r0, src2); \
LoadlW(dst, r1); \
}
void MacroAssembler::DivU32(Register dst, Register src1,
const MemOperand& src2) {
Generate_DivU32(dl);
}
void MacroAssembler::DivU32(Register dst, Register src1, Register src2) {
Generate_DivU32(dlr);
}
void MacroAssembler::DivU32(Register dst, Register src1, const Operand& src2) {
USE(dst);
USE(src1);
USE(src2);
UNREACHABLE();
}
#undef Generate_DivU32
#define Generate_Mod32(instr) \
{ \
lgfr(r1, src1); \
instr(r0, src2); \
LoadlW(dst, r0); \
}
void MacroAssembler::Mod32(Register dst, Register src1,
const MemOperand& src2) {
Generate_Mod32(dsgf);
}
void MacroAssembler::Mod32(Register dst, Register src1, Register src2) {
Generate_Mod32(dsgfr);
}
void MacroAssembler::Mod32(Register dst, Register src1, const Operand& src2) {
USE(dst);
USE(src1);
USE(src2);
UNREACHABLE();
}
#undef Generate_Mod32
#define Generate_ModU32(instr) \
{ \
lr(r0, src1); \
srdl(r0, Operand(32)); \
instr(r0, src2); \
LoadlW(dst, r0); \
}
void MacroAssembler::ModU32(Register dst, Register src1,
const MemOperand& src2) {
Generate_ModU32(dl);
}
void MacroAssembler::ModU32(Register dst, Register src1, Register src2) {
Generate_ModU32(dlr);
}
void MacroAssembler::ModU32(Register dst, Register src1, const Operand& src2) {
USE(dst);
USE(src1);
USE(src2);
UNREACHABLE();
}
#undef Generate_ModU32
void MacroAssembler::MulP(Register dst, const Operand& opnd) {
#if V8_TARGET_ARCH_S390X
msgfi(dst, opnd);
@ -4765,6 +4899,14 @@ void MacroAssembler::LoadlB(Register dst, const MemOperand& mem) {
#endif
}
void MacroAssembler::LoadlB(Register dst, Register src) {
#if V8_TARGET_ARCH_S390X
llgcr(dst, src);
#else
llcr(dst, src);
#endif
}
void MacroAssembler::LoadLogicalReversedWordP(Register dst,
const MemOperand& mem) {
lrv(dst, mem);
@ -5120,7 +5262,7 @@ void MacroAssembler::Popcnt32(Register dst, Register src) {
ar(dst, r0);
ShiftRight(r0, dst, Operand(8));
ar(dst, r0);
LoadB(dst, dst);
LoadlB(dst, dst);
}
#ifdef V8_TARGET_ARCH_S390X
@ -5135,7 +5277,7 @@ void MacroAssembler::Popcnt64(Register dst, Register src) {
AddP(dst, r0);
ShiftRightP(r0, dst, Operand(8));
AddP(dst, r0);
LoadB(dst, dst);
LoadlB(dst, dst);
}
#endif

View File

@ -325,6 +325,9 @@ class MacroAssembler : public Assembler {
void MulHigh32(Register dst, Register src1, const MemOperand& src2);
void MulHigh32(Register dst, Register src1, Register src2);
void MulHigh32(Register dst, Register src1, const Operand& src2);
void MulHighU32(Register dst, Register src1, const MemOperand& src2);
void MulHighU32(Register dst, Register src1, Register src2);
void MulHighU32(Register dst, Register src1, const Operand& src2);
void Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const MemOperand& src2);
void Mul32WithOverflowIfCCUnequal(Register dst, Register src1, Register src2);
@ -336,6 +339,20 @@ class MacroAssembler : public Assembler {
// Divide
void DivP(Register dividend, Register divider);
void Div32(Register dst, Register src1, const MemOperand& src2);
void Div32(Register dst, Register src1, Register src2);
void Div32(Register dst, Register src1, const Operand& src2);
void DivU32(Register dst, Register src1, const MemOperand& src2);
void DivU32(Register dst, Register src1, Register src2);
void DivU32(Register dst, Register src1, const Operand& src2);
// Mod
void Mod32(Register dst, Register src1, const MemOperand& src2);
void Mod32(Register dst, Register src1, Register src2);
void Mod32(Register dst, Register src1, const Operand& src2);
void ModU32(Register dst, Register src1, const MemOperand& src2);
void ModU32(Register dst, Register src1, Register src2);
void ModU32(Register dst, Register src1, const Operand& src2);
// Square root
void Sqrt(DoubleRegister result, DoubleRegister input);
@ -372,6 +389,7 @@ class MacroAssembler : public Assembler {
void LoadB(Register dst, const MemOperand& opnd);
void LoadB(Register dst, Register src);
void LoadlB(Register dst, const MemOperand& opnd);
void LoadlB(Register dst, Register src);
void LoadLogicalReversedWordP(Register dst, const MemOperand& opnd);
void LoadLogicalReversedHalfWordP(Register dst, const MemOperand& opnd);

View File

@ -10367,9 +10367,13 @@ EVALUATE(FLOGR) {
}
EVALUATE(LLGCR) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(LLGCR);
DECODE_RRE_INSTRUCTION(r1, r2);
uint64_t r2_val = get_low_register<uint64_t>(r2);
r2_val <<= 56;
r2_val >>= 56;
set_register(r1, r2_val);
return length;
}
EVALUATE(LLGHR) {
@ -10447,9 +10451,13 @@ EVALUATE(TROO) {
}
EVALUATE(LLCR) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(LLCR);
DECODE_RRE_INSTRUCTION(r1, r2);
uint32_t r2_val = get_low_register<uint32_t>(r2);
r2_val <<= 24;
r2_val >>= 24;
set_low_register(r1, r2_val);
return length;
}
EVALUATE(LLHR) {