Support for SDIV and MLS ARM instructions, and implement DoModI using them.
Also added support for the runtime detection to check if hardware supports SDIV/UDIV Other new opportunities to exploit SDIV/UDIV will be done in separate issues. Review URL: https://chromiumcodereview.appspot.com/10977051 Patch from Subrato K De <subratokde@codeaurora.org>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12646 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
12d4075c03
commit
08747c3a16
@ -110,6 +110,10 @@ void CpuFeatures::Probe() {
|
||||
if (FLAG_enable_armv7) {
|
||||
supported_ |= 1u << ARMv7;
|
||||
}
|
||||
|
||||
if (FLAG_enable_sudiv) {
|
||||
supported_ |= 1u << SUDIV;
|
||||
}
|
||||
#else // __arm__
|
||||
// Probe for additional features not already known to be available.
|
||||
if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
|
||||
@ -125,6 +129,10 @@ void CpuFeatures::Probe() {
|
||||
found_by_runtime_probing_ |= 1u << ARMv7;
|
||||
}
|
||||
|
||||
if (!IsSupported(SUDIV) && OS::ArmCpuHasFeature(SUDIV)) {
|
||||
found_by_runtime_probing_ |= 1u << SUDIV;
|
||||
}
|
||||
|
||||
supported_ |= found_by_runtime_probing_;
|
||||
#endif
|
||||
|
||||
@ -1207,6 +1215,22 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
|
||||
}
|
||||
|
||||
|
||||
void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
|
||||
Condition cond) {
|
||||
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
|
||||
emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
|
||||
src2.code()*B8 | B7 | B4 | src1.code());
|
||||
}
|
||||
|
||||
|
||||
void Assembler::sdiv(Register dst, Register src1, Register src2,
|
||||
Condition cond) {
|
||||
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
|
||||
emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 |
|
||||
src2.code()*B8 | B4 | src1.code());
|
||||
}
|
||||
|
||||
|
||||
void Assembler::mul(Register dst, Register src1, Register src2,
|
||||
SBit s, Condition cond) {
|
||||
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
|
||||
|
@ -511,6 +511,7 @@ class CpuFeatures : public AllStatic {
|
||||
ASSERT(initialized_);
|
||||
if (f == VFP3 && !FLAG_enable_vfp3) return false;
|
||||
if (f == VFP2 && !FLAG_enable_vfp2) return false;
|
||||
if (f == SUDIV && !FLAG_enable_sudiv) return false;
|
||||
return (supported_ & (1u << f)) != 0;
|
||||
}
|
||||
|
||||
@ -869,6 +870,12 @@ class Assembler : public AssemblerBase {
|
||||
void mla(Register dst, Register src1, Register src2, Register srcA,
|
||||
SBit s = LeaveCC, Condition cond = al);
|
||||
|
||||
void mls(Register dst, Register src1, Register src2, Register srcA,
|
||||
Condition cond = al);
|
||||
|
||||
void sdiv(Register dst, Register src1, Register src2,
|
||||
Condition cond = al);
|
||||
|
||||
void mul(Register dst, Register src1, Register src2,
|
||||
SBit s = LeaveCC, Condition cond = al);
|
||||
|
||||
|
@ -692,11 +692,19 @@ void Decoder::DecodeType01(Instruction* instr) {
|
||||
// Rn field to encode it.
|
||||
Format(instr, "mul'cond's 'rn, 'rm, 'rs");
|
||||
} else {
|
||||
// The MLA instruction description (A 4.1.28) refers to the order
|
||||
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
|
||||
// Rn field to encode the Rd register and the Rd field to encode
|
||||
// the Rn register.
|
||||
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
if (instr->Bit(22) == 0) {
|
||||
// The MLA instruction description (A 4.1.28) refers to the order
|
||||
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
|
||||
// Rn field to encode the Rd register and the Rd field to encode
|
||||
// the Rn register.
|
||||
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
} else {
|
||||
// The MLS instruction description (A 4.1.29) refers to the order
|
||||
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
|
||||
// Rn field to encode the Rd register and the Rd field to encode
|
||||
// the Rn register.
|
||||
Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The signed/long multiply instructions use the terms RdHi and RdLo
|
||||
@ -974,6 +982,17 @@ void Decoder::DecodeType3(Instruction* instr) {
|
||||
break;
|
||||
}
|
||||
case db_x: {
|
||||
if (FLAG_enable_sudiv) {
|
||||
if (!instr->HasW()) {
|
||||
if (instr->Bits(5, 4) == 0x1) {
|
||||
if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
|
||||
// SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
|
||||
Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
|
||||
break;
|
||||
}
|
||||
|
@ -979,109 +979,132 @@ void LCodeGen::DoModI(LModI* instr) {
|
||||
Register left = ToRegister(instr->left());
|
||||
Register right = ToRegister(instr->right());
|
||||
Register result = ToRegister(instr->result());
|
||||
Label done;
|
||||
|
||||
Register scratch = scratch0();
|
||||
Register scratch2 = ToRegister(instr->temp());
|
||||
DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
|
||||
DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
|
||||
DwVfpRegister quotient = double_scratch0();
|
||||
if (CpuFeatures::IsSupported(SUDIV)) {
|
||||
CpuFeatures::Scope scope(SUDIV);
|
||||
// Check for x % 0.
|
||||
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
|
||||
__ cmp(right, Operand(0));
|
||||
DeoptimizeIf(eq, instr->environment());
|
||||
}
|
||||
|
||||
ASSERT(!dividend.is(divisor));
|
||||
ASSERT(!dividend.is(quotient));
|
||||
ASSERT(!divisor.is(quotient));
|
||||
ASSERT(!scratch.is(left));
|
||||
ASSERT(!scratch.is(right));
|
||||
ASSERT(!scratch.is(result));
|
||||
// For r3 = r1 % r2; we can have the following ARM code
|
||||
// sdiv r3, r1, r2
|
||||
// mls r3, r3, r2, r1
|
||||
|
||||
Label done, vfp_modulo, both_positive, right_negative;
|
||||
__ sdiv(result, left, right);
|
||||
__ mls(result, result, right, left);
|
||||
__ cmp(result, Operand(0));
|
||||
__ b(ne, &done);
|
||||
|
||||
// Check for x % 0.
|
||||
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
|
||||
__ cmp(right, Operand(0));
|
||||
DeoptimizeIf(eq, instr->environment());
|
||||
}
|
||||
|
||||
__ Move(result, left);
|
||||
|
||||
// (0 % x) must yield 0 (if x is finite, which is the case here).
|
||||
__ cmp(left, Operand(0));
|
||||
__ b(eq, &done);
|
||||
// Preload right in a vfp register.
|
||||
__ vmov(divisor.low(), right);
|
||||
__ b(lt, &vfp_modulo);
|
||||
|
||||
__ cmp(left, Operand(right));
|
||||
__ b(lt, &done);
|
||||
|
||||
// Check for (positive) power of two on the right hand side.
|
||||
__ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
|
||||
scratch,
|
||||
&right_negative,
|
||||
&both_positive);
|
||||
// Perform modulo operation (scratch contains right - 1).
|
||||
__ and_(result, scratch, Operand(left));
|
||||
__ b(&done);
|
||||
|
||||
__ bind(&right_negative);
|
||||
// Negate right. The sign of the divisor does not matter.
|
||||
__ rsb(right, right, Operand(0));
|
||||
|
||||
__ bind(&both_positive);
|
||||
const int kUnfolds = 3;
|
||||
// If the right hand side is smaller than the (nonnegative)
|
||||
// left hand side, the left hand side is the result.
|
||||
// Else try a few subtractions of the left hand side.
|
||||
__ mov(scratch, left);
|
||||
for (int i = 0; i < kUnfolds; i++) {
|
||||
// Check if the left hand side is less or equal than the
|
||||
// the right hand side.
|
||||
__ cmp(scratch, Operand(right));
|
||||
__ mov(result, scratch, LeaveCC, lt);
|
||||
__ b(lt, &done);
|
||||
// If not, reduce the left hand side by the right hand
|
||||
// side and check again.
|
||||
if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
|
||||
}
|
||||
|
||||
__ bind(&vfp_modulo);
|
||||
// Load the arguments in VFP registers.
|
||||
// The divisor value is preloaded before. Be careful that 'right' is only live
|
||||
// on entry.
|
||||
__ vmov(dividend.low(), left);
|
||||
// From here on don't use right as it may have been reallocated (for example
|
||||
// to scratch2).
|
||||
right = no_reg;
|
||||
|
||||
__ vcvt_f64_s32(dividend, dividend.low());
|
||||
__ vcvt_f64_s32(divisor, divisor.low());
|
||||
|
||||
// We do not care about the sign of the divisor.
|
||||
__ vabs(divisor, divisor);
|
||||
// Compute the quotient and round it to a 32bit integer.
|
||||
__ vdiv(quotient, dividend, divisor);
|
||||
__ vcvt_s32_f64(quotient.low(), quotient);
|
||||
__ vcvt_f64_s32(quotient, quotient.low());
|
||||
|
||||
// Compute the remainder in result.
|
||||
DwVfpRegister double_scratch = dividend;
|
||||
__ vmul(double_scratch, divisor, quotient);
|
||||
__ vcvt_s32_f64(double_scratch.low(), double_scratch);
|
||||
__ vmov(scratch, double_scratch.low());
|
||||
|
||||
if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
|
||||
__ sub(result, left, scratch);
|
||||
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
|
||||
__ cmp(left, Operand(0));
|
||||
DeoptimizeIf(lt, instr->environment());
|
||||
}
|
||||
} else {
|
||||
Label ok;
|
||||
// Check for -0.
|
||||
__ sub(scratch2, left, scratch, SetCC);
|
||||
__ b(ne, &ok);
|
||||
__ cmp(left, Operand(0));
|
||||
DeoptimizeIf(mi, instr->environment());
|
||||
__ bind(&ok);
|
||||
// Load the result and we are done.
|
||||
__ mov(result, scratch2);
|
||||
}
|
||||
Register scratch = scratch0();
|
||||
Register scratch2 = ToRegister(instr->temp());
|
||||
DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
|
||||
DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
|
||||
DwVfpRegister quotient = double_scratch0();
|
||||
|
||||
ASSERT(!dividend.is(divisor));
|
||||
ASSERT(!dividend.is(quotient));
|
||||
ASSERT(!divisor.is(quotient));
|
||||
ASSERT(!scratch.is(left));
|
||||
ASSERT(!scratch.is(right));
|
||||
ASSERT(!scratch.is(result));
|
||||
|
||||
Label done, vfp_modulo, both_positive, right_negative;
|
||||
|
||||
// Check for x % 0.
|
||||
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
|
||||
__ cmp(right, Operand(0));
|
||||
DeoptimizeIf(eq, instr->environment());
|
||||
}
|
||||
|
||||
__ Move(result, left);
|
||||
|
||||
// (0 % x) must yield 0 (if x is finite, which is the case here).
|
||||
__ cmp(left, Operand(0));
|
||||
__ b(eq, &done);
|
||||
// Preload right in a vfp register.
|
||||
__ vmov(divisor.low(), right);
|
||||
__ b(lt, &vfp_modulo);
|
||||
|
||||
__ cmp(left, Operand(right));
|
||||
__ b(lt, &done);
|
||||
|
||||
// Check for (positive) power of two on the right hand side.
|
||||
__ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
|
||||
scratch,
|
||||
&right_negative,
|
||||
&both_positive);
|
||||
// Perform modulo operation (scratch contains right - 1).
|
||||
__ and_(result, scratch, Operand(left));
|
||||
__ b(&done);
|
||||
|
||||
__ bind(&right_negative);
|
||||
// Negate right. The sign of the divisor does not matter.
|
||||
__ rsb(right, right, Operand(0));
|
||||
|
||||
__ bind(&both_positive);
|
||||
const int kUnfolds = 3;
|
||||
// If the right hand side is smaller than the (nonnegative)
|
||||
// left hand side, the left hand side is the result.
|
||||
// Else try a few subtractions of the left hand side.
|
||||
__ mov(scratch, left);
|
||||
for (int i = 0; i < kUnfolds; i++) {
|
||||
// Check if the left hand side is less or equal than the
|
||||
// the right hand side.
|
||||
__ cmp(scratch, Operand(right));
|
||||
__ mov(result, scratch, LeaveCC, lt);
|
||||
__ b(lt, &done);
|
||||
// If not, reduce the left hand side by the right hand
|
||||
// side and check again.
|
||||
if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
|
||||
}
|
||||
|
||||
__ bind(&vfp_modulo);
|
||||
// Load the arguments in VFP registers.
|
||||
// The divisor value is preloaded before. Be careful that 'right'
|
||||
// is only live on entry.
|
||||
__ vmov(dividend.low(), left);
|
||||
// From here on don't use right as it may have been reallocated
|
||||
// (for example to scratch2).
|
||||
right = no_reg;
|
||||
|
||||
__ vcvt_f64_s32(dividend, dividend.low());
|
||||
__ vcvt_f64_s32(divisor, divisor.low());
|
||||
|
||||
// We do not care about the sign of the divisor.
|
||||
__ vabs(divisor, divisor);
|
||||
// Compute the quotient and round it to a 32bit integer.
|
||||
__ vdiv(quotient, dividend, divisor);
|
||||
__ vcvt_s32_f64(quotient.low(), quotient);
|
||||
__ vcvt_f64_s32(quotient, quotient.low());
|
||||
|
||||
// Compute the remainder in result.
|
||||
DwVfpRegister double_scratch = dividend;
|
||||
__ vmul(double_scratch, divisor, quotient);
|
||||
__ vcvt_s32_f64(double_scratch.low(), double_scratch);
|
||||
__ vmov(scratch, double_scratch.low());
|
||||
|
||||
if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
|
||||
__ sub(result, left, scratch);
|
||||
} else {
|
||||
Label ok;
|
||||
// Check for -0.
|
||||
__ sub(scratch2, left, scratch, SetCC);
|
||||
__ b(ne, &ok);
|
||||
__ cmp(left, Operand(0));
|
||||
DeoptimizeIf(mi, instr->environment());
|
||||
__ bind(&ok);
|
||||
// Load the result and we are done.
|
||||
__ mov(result, scratch2);
|
||||
}
|
||||
}
|
||||
__ bind(&done);
|
||||
}
|
||||
|
||||
|
@ -1986,11 +1986,23 @@ void Simulator::DecodeType01(Instruction* instr) {
|
||||
SetNZFlags(alu_out);
|
||||
}
|
||||
} else {
|
||||
// The MLA instruction description (A 4.1.28) refers to the order
|
||||
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
|
||||
// Rn field to encode the Rd register and the Rd field to encode
|
||||
// the Rn register.
|
||||
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
int rd = instr->RdValue();
|
||||
int32_t acc_value = get_register(rd);
|
||||
if (instr->Bit(22) == 0) {
|
||||
// The MLA instruction description (A 4.1.28) refers to the order
|
||||
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
|
||||
// Rn field to encode the Rd register and the Rd field to encode
|
||||
// the Rn register.
|
||||
// Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
int32_t mul_out = rm_val * rs_val;
|
||||
int32_t result = acc_value + mul_out;
|
||||
set_register(rn, result);
|
||||
} else {
|
||||
// Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
|
||||
int32_t mul_out = rm_val * rs_val;
|
||||
int32_t result = acc_value - mul_out;
|
||||
set_register(rn, result);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// The signed/long multiply instructions use the terms RdHi and RdLo
|
||||
@ -2546,6 +2558,25 @@ void Simulator::DecodeType3(Instruction* instr) {
|
||||
break;
|
||||
}
|
||||
case db_x: {
|
||||
if (FLAG_enable_sudiv) {
|
||||
if (!instr->HasW()) {
|
||||
if (instr->Bits(5, 4) == 0x1) {
|
||||
if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
|
||||
// sdiv (in V8 notation matching ARM ISA format) rn = rm/rs
|
||||
// Format(instr, "'sdiv'cond'b 'rn, 'rm, 'rs);
|
||||
int rm = instr->RmValue();
|
||||
int32_t rm_val = get_register(rm);
|
||||
int rs = instr->RsValue();
|
||||
int32_t rs_val = get_register(rs);
|
||||
int32_t ret_val = 0;
|
||||
ASSERT(rs_val != 0);
|
||||
ret_val = rm_val/rs_val;
|
||||
set_register(rn, ret_val);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
|
||||
addr = rn_val - shifter_operand;
|
||||
if (instr->HasW()) {
|
||||
|
@ -284,6 +284,8 @@ DEFINE_bool(enable_vfp2, true,
|
||||
"enable use of VFP2 instructions if available")
|
||||
DEFINE_bool(enable_armv7, true,
|
||||
"enable use of ARMv7 instructions if available (ARM only)")
|
||||
DEFINE_bool(enable_sudiv, true,
|
||||
"enable use of SDIV and UDIV instructions if available (ARM only)")
|
||||
DEFINE_bool(enable_fpu, true,
|
||||
"enable use of MIPS FPU instructions if available (MIPS only)")
|
||||
|
||||
|
@ -148,6 +148,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) {
|
||||
case ARMv7:
|
||||
search_string = "ARMv7";
|
||||
break;
|
||||
case SUDIV:
|
||||
search_string = "idiva";
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -438,6 +438,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86
|
||||
VFP3 = 1, // ARM
|
||||
ARMv7 = 2, // ARM
|
||||
VFP2 = 3, // ARM
|
||||
SUDIV = 4, // ARM
|
||||
SAHF = 0, // x86
|
||||
FPU = 1}; // MIPS
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user