[riscv64] support 64bit mul high and Int64MulWithOverflow

Port 222007bc11 Port 78570f7826 Bug: v8:9407 Change-Id: I5de127dad747bd94b7129476acd67cb2ce8af09d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3940601 Auto-Submit: ji qiu <qiuji@iscas.ac.cn> Reviewed-by: Yahan Lu <yahan@iscas.ac.cn> Commit-Queue: ji qiu <qiuji@iscas.ac.cn> Cr-Commit-Position: refs/heads/main@{#83582}
2022-10-09 20:19:23 +08:00 · 2022-10-09 20:19:23 +08:00 · ed6ea9cecb
commit ed6ea9cecb
parent 7f0c777d9e
6 changed files with 102 additions and 2 deletions
--- a/src/codegen/riscv/macro-assembler-riscv.cc
+++ b/src/codegen/riscv/macro-assembler-riscv.cc
@ -699,6 +699,18 @@ void TurboAssembler::Mulh64(Register rd, Register rs, const Operand& rt) {
  }
 }

+void TurboAssembler::Mulhu64(Register rd, Register rs, const Operand& rt) {
+  if (rt.is_reg()) {
+    mulhu(rd, rs, rt.rm());
+  } else {
+    // li handles the relocation.
+    UseScratchRegisterScope temps(this);
+    Register scratch = temps.Acquire();
+    Li(scratch, rt.immediate());
+    mulhu(rd, rs, scratch);
+  }
+}
+
 void TurboAssembler::Div32(Register res, Register rs, const Operand& rt) {
  if (rt.is_reg()) {
    divw(res, rs, rt.rm());
@ -5200,6 +5212,37 @@ void TurboAssembler::MulOverflow32(Register dst, Register left,
  sext_w(dst, overflow);
  xor_(overflow, overflow, dst);
 }
+
+void TurboAssembler::MulOverflow64(Register dst, Register left,
+                                   const Operand& right, Register overflow) {
+  ASM_CODE_COMMENT(this);
+  UseScratchRegisterScope temps(this);
+  BlockTrampolinePoolScope block_trampoline_pool(this);
+  Register right_reg = no_reg;
+  Register scratch = temps.Acquire();
+  Register scratch2 = temps.Acquire();
+  if (!right.is_reg()) {
+    li(scratch, Operand(right));
+    right_reg = scratch;
+  } else {
+    right_reg = right.rm();
+  }
+
+  DCHECK(left != scratch2 && right_reg != scratch2 && dst != scratch2 &&
+         overflow != scratch2);
+  DCHECK(overflow != left && overflow != right_reg);
+  // use this sequence of "mulh/mul" according to recommendation of ISA Spec 7.1
+  // upper part
+  mulh(scratch2, left, right_reg);
+  // lower part
+  mul(dst, left, right_reg);
+  // expand the sign of the lower part to 64bit
+  srai(overflow, dst, 63);
+  // if the upper part is not eqaul to the expanded sign bit of the lower part,
+  // overflow happens
+  xor_(overflow, overflow, scratch2);
+}
+
 #elif V8_TARGET_ARCH_RISCV32
 void TurboAssembler::AddOverflow(Register dst, Register left,
                                 const Operand& right, Register overflow) {
--- a/src/codegen/riscv/macro-assembler-riscv.h
+++ b/src/codegen/riscv/macro-assembler-riscv.h
@ -447,6 +447,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  DEFINE_INSTRUCTION(Mulh32)
  DEFINE_INSTRUCTION(Mul64)
  DEFINE_INSTRUCTION(Mulh64)
+  DEFINE_INSTRUCTION(Mulhu64)
  DEFINE_INSTRUCTION2(Div32)
  DEFINE_INSTRUCTION2(Div64)
  DEFINE_INSTRUCTION2(Divu32)
@ -871,7 +872,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
  // MulOverflow32 sets overflow register to zero if no overflow occured
  void MulOverflow32(Register dst, Register left, const Operand& right,
                     Register overflow);
-
+  // MulOverflow64 sets overflow register to zero if no overflow occured
+  void MulOverflow64(Register dst, Register left, const Operand& right,
+                     Register overflow);
  // Number of instructions needed for calculation of switch table entry address
  static const int kSwitchTablePrologueSize = 6;

--- a/src/compiler/backend/riscv/code-generator-riscv.cc
+++ b/src/compiler/backend/riscv/code-generator-riscv.cc
@ -1036,6 +1036,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ Mulh64(i.OutputRegister(), i.InputOrZeroRegister(0),
                i.InputOperand(1));
      break;
+    case kRiscvMulHighU64:
+      __ Mulhu64(i.OutputRegister(), i.InputOrZeroRegister(0),
+                 i.InputOperand(1));
+      break;
+    case kRiscvMulOvf64:
+      __ MulOverflow64(i.OutputRegister(), i.InputOrZeroRegister(0),
+                       i.InputOperand(1), kScratchReg);
+      break;
    case kRiscvDiv32: {
      __ Div32(i.OutputRegister(), i.InputOrZeroRegister(0), i.InputOperand(1));
      // Set ouput to zero if divisor == 0
@ -3745,7 +3753,13 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
      default:
        UNSUPPORTED_COND(instr->arch_opcode(), condition);
    }
+#if V8_TARGET_ARCH_RISCV64
+    // kRiscvMulOvf64 is only for RISCV64
+  } else if (instr->arch_opcode() == kRiscvMulOvf32 ||
+             instr->arch_opcode() == kRiscvMulOvf64) {
+#elif V8_TARGET_ARCH_RISCV32
  } else if (instr->arch_opcode() == kRiscvMulOvf32) {
+#endif
    // Overflow occurs if overflow register is not zero
    switch (condition) {
      case kOverflow:
@ -3755,7 +3769,7 @@ void AssembleBranchToLabels(CodeGenerator* gen, TurboAssembler* tasm,
        __ Branch(tlabel, eq, kScratchReg, Operand(zero_reg));
        break;
      default:
-        UNSUPPORTED_COND(kRiscvMulOvf32, condition);
+        UNSUPPORTED_COND(instr->arch_opcode(), condition);
    }
  } else if (instr->arch_opcode() == kRiscvCmp) {
    Condition cc = FlagsConditionToConditionCmp(condition);
@ -3908,7 +3922,13 @@ void CodeGenerator::AssembleArchBoolean(Instruction* instr,
 #endif
    // Overflow occurs if overflow register is negative
    __ Slt(result, kScratchReg, zero_reg);
+#if V8_TARGET_ARCH_RISCV64
+    // kRiscvMulOvf64 is only for RISCV64
+  } else if (instr->arch_opcode() == kRiscvMulOvf32 ||
+             instr->arch_opcode() == kRiscvMulOvf64) {
+#elif V8_TARGET_ARCH_RISCV32
  } else if (instr->arch_opcode() == kRiscvMulOvf32) {
+#endif
    // Overflow occurs if overflow register is not zero
    __ Sgtu(result, kScratchReg, zero_reg);
  } else if (instr->arch_opcode() == kRiscvCmp) {
--- a/src/compiler/backend/riscv/instruction-codes-riscv.h
+++ b/src/compiler/backend/riscv/instruction-codes-riscv.h
@ -18,7 +18,9 @@ namespace compiler {
  V(RiscvSub64)                            \
  V(RiscvSubOvf64)                         \
  V(RiscvMulHigh64)                        \
+  V(RiscvMulHighU64)                       \
  V(RiscvMul64)                            \
+  V(RiscvMulOvf64)                         \
  V(RiscvDiv64)                            \
  V(RiscvDivU64)                           \
  V(RiscvMod64)                            \
--- a/src/compiler/backend/riscv/instruction-scheduler-riscv.cc
+++ b/src/compiler/backend/riscv/instruction-scheduler-riscv.cc
@ -24,6 +24,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kRiscvCvtSL:
    case kRiscvCvtSUl:
    case kRiscvMulHigh64:
+    case kRiscvMulHighU64:
    case kRiscvAdd64:
    case kRiscvAddOvf64:
    case kRiscvClz64:
@ -35,6 +36,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kRiscvMod64:
    case kRiscvModU64:
    case kRiscvMul64:
+    case kRiscvMulOvf64:
    case kRiscvPopcnt64:
    case kRiscvRor64:
    case kRiscvSar64:
@ -946,6 +948,11 @@ int MulOverflow32Latency() {
  return Mul32Latency() + Mulh32Latency() + 2;
 }

+int MulOverflow64Latency() {
+  // Estimated max.
+  return Mul64Latency() + Mulh64Latency() + 2;
+}
+
 // TODO(RISCV): This is incorrect for RISC-V.
 int Clz64Latency() { return 1; }

@ -1216,6 +1223,8 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
      return Mulh64Latency();
    case kRiscvMul64:
      return Mul64Latency();
+    case kRiscvMulOvf64:
+      return MulOverflow64Latency();
    case kRiscvDiv64: {
      int latency = Div64Latency();
      return latency + MovzLatency();
--- a/src/compiler/backend/riscv/instruction-selector-riscv64.cc
+++ b/src/compiler/backend/riscv/instruction-selector-riscv64.cc
@ -667,10 +667,18 @@ void InstructionSelector::VisitInt32MulHigh(Node* node) {
  VisitRRR(this, kRiscvMulHigh32, node);
 }

+void InstructionSelector::VisitInt64MulHigh(Node* node) {
+  VisitRRR(this, kRiscvMulHigh64, node);
+}
+
 void InstructionSelector::VisitUint32MulHigh(Node* node) {
  VisitRRR(this, kRiscvMulHighU32, node);
 }

+void InstructionSelector::VisitUint64MulHigh(Node* node) {
+  VisitRRR(this, kRiscvMulHighU64, node);
+}
+
 void InstructionSelector::VisitInt64Mul(Node* node) {
  RiscvOperandGenerator g(this);
  Int64BinopMatcher m(node);
@ -1820,6 +1828,21 @@ void InstructionSelector::VisitInt64SubWithOverflow(Node* node) {
  VisitBinop(this, node, kRiscvSubOvf64, &cont);
 }

+void InstructionSelector::VisitInt64MulWithOverflow(Node* node) {
+  if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
+    // RISCV64 doesn't set the overflow flag for multiplication, so we need to
+    // test on kNotEqual. Here is the code sequence used:
+    //   mulh rdh, left, right
+    //   mul rdl, left, right
+    //   srai temp, rdl, 63
+    //   xor overflow, rdl, temp
+    FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
+    return VisitBinop(this, node, kRiscvMulOvf64, &cont);
+  }
+  FlagsContinuation cont;
+  VisitBinop(this, node, kRiscvMulOvf64, &cont);
+}
+
 void InstructionSelector::VisitWord64Equal(Node* const node) {
  FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
  Int64BinopMatcher m(node);