Support for SDIV and MLS ARM instructions, and implement DoModI using them.

Also added support for the runtime detection to check if hardware supports SDIV/UDIV
Other new opportunities to exploit SDIV/UDIV will be done in separate issues.

Review URL: https://chromiumcodereview.appspot.com/10977051
Patch from Subrato K De <subratokde@codeaurora.org>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12646 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
danno@chromium.org 2012-10-01 21:27:33 +00:00
parent 12d4075c03
commit 08747c3a16
8 changed files with 217 additions and 107 deletions

View File

@ -110,6 +110,10 @@ void CpuFeatures::Probe() {
if (FLAG_enable_armv7) {
supported_ |= 1u << ARMv7;
}
if (FLAG_enable_sudiv) {
supported_ |= 1u << SUDIV;
}
#else // __arm__
// Probe for additional features not already known to be available.
if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
@ -125,6 +129,10 @@ void CpuFeatures::Probe() {
found_by_runtime_probing_ |= 1u << ARMv7;
}
if (!IsSupported(SUDIV) && OS::ArmCpuHasFeature(SUDIV)) {
found_by_runtime_probing_ |= 1u << SUDIV;
}
supported_ |= found_by_runtime_probing_;
#endif
@ -1207,6 +1215,22 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
}
void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
Condition cond) {
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc));
emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
src2.code()*B8 | B7 | B4 | src1.code());
}
void Assembler::sdiv(Register dst, Register src1, Register src2,
Condition cond) {
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));
emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 |
src2.code()*B8 | B4 | src1.code());
}
void Assembler::mul(Register dst, Register src1, Register src2,
SBit s, Condition cond) {
ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc));

View File

@ -511,6 +511,7 @@ class CpuFeatures : public AllStatic {
ASSERT(initialized_);
if (f == VFP3 && !FLAG_enable_vfp3) return false;
if (f == VFP2 && !FLAG_enable_vfp2) return false;
if (f == SUDIV && !FLAG_enable_sudiv) return false;
return (supported_ & (1u << f)) != 0;
}
@ -869,6 +870,12 @@ class Assembler : public AssemblerBase {
void mla(Register dst, Register src1, Register src2, Register srcA,
SBit s = LeaveCC, Condition cond = al);
void mls(Register dst, Register src1, Register src2, Register srcA,
Condition cond = al);
void sdiv(Register dst, Register src1, Register src2,
Condition cond = al);
void mul(Register dst, Register src1, Register src2,
SBit s = LeaveCC, Condition cond = al);

View File

@ -692,11 +692,19 @@ void Decoder::DecodeType01(Instruction* instr) {
// Rn field to encode it.
Format(instr, "mul'cond's 'rn, 'rm, 'rs");
} else {
// The MLA instruction description (A 4.1.28) refers to the order
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
// Rn field to encode the Rd register and the Rd field to encode
// the Rn register.
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
if (instr->Bit(22) == 0) {
// The MLA instruction description (A 4.1.28) refers to the order
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
// Rn field to encode the Rd register and the Rd field to encode
// the Rn register.
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
} else {
// The MLS instruction description (A 4.1.29) refers to the order
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
// Rn field to encode the Rd register and the Rd field to encode
// the Rn register.
Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
}
}
} else {
// The signed/long multiply instructions use the terms RdHi and RdLo
@ -974,6 +982,17 @@ void Decoder::DecodeType3(Instruction* instr) {
break;
}
case db_x: {
if (FLAG_enable_sudiv) {
if (!instr->HasW()) {
if (instr->Bits(5, 4) == 0x1) {
if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
// SDIV (in V8 notation matching ARM ISA format) rn = rm/rs
Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs");
break;
}
}
}
}
Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
break;
}

View File

@ -979,109 +979,132 @@ void LCodeGen::DoModI(LModI* instr) {
Register left = ToRegister(instr->left());
Register right = ToRegister(instr->right());
Register result = ToRegister(instr->result());
Label done;
Register scratch = scratch0();
Register scratch2 = ToRegister(instr->temp());
DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
DwVfpRegister quotient = double_scratch0();
if (CpuFeatures::IsSupported(SUDIV)) {
CpuFeatures::Scope scope(SUDIV);
// Check for x % 0.
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
__ cmp(right, Operand(0));
DeoptimizeIf(eq, instr->environment());
}
ASSERT(!dividend.is(divisor));
ASSERT(!dividend.is(quotient));
ASSERT(!divisor.is(quotient));
ASSERT(!scratch.is(left));
ASSERT(!scratch.is(right));
ASSERT(!scratch.is(result));
// For r3 = r1 % r2; we can have the following ARM code
// sdiv r3, r1, r2
// mls r3, r3, r2, r1
Label done, vfp_modulo, both_positive, right_negative;
__ sdiv(result, left, right);
__ mls(result, result, right, left);
__ cmp(result, Operand(0));
__ b(ne, &done);
// Check for x % 0.
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
__ cmp(right, Operand(0));
DeoptimizeIf(eq, instr->environment());
}
__ Move(result, left);
// (0 % x) must yield 0 (if x is finite, which is the case here).
__ cmp(left, Operand(0));
__ b(eq, &done);
// Preload right in a vfp register.
__ vmov(divisor.low(), right);
__ b(lt, &vfp_modulo);
__ cmp(left, Operand(right));
__ b(lt, &done);
// Check for (positive) power of two on the right hand side.
__ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
scratch,
&right_negative,
&both_positive);
// Perform modulo operation (scratch contains right - 1).
__ and_(result, scratch, Operand(left));
__ b(&done);
__ bind(&right_negative);
// Negate right. The sign of the divisor does not matter.
__ rsb(right, right, Operand(0));
__ bind(&both_positive);
const int kUnfolds = 3;
// If the right hand side is smaller than the (nonnegative)
// left hand side, the left hand side is the result.
// Else try a few subtractions of the left hand side.
__ mov(scratch, left);
for (int i = 0; i < kUnfolds; i++) {
// Check if the left hand side is less or equal than the
// the right hand side.
__ cmp(scratch, Operand(right));
__ mov(result, scratch, LeaveCC, lt);
__ b(lt, &done);
// If not, reduce the left hand side by the right hand
// side and check again.
if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
}
__ bind(&vfp_modulo);
// Load the arguments in VFP registers.
// The divisor value is preloaded before. Be careful that 'right' is only live
// on entry.
__ vmov(dividend.low(), left);
// From here on don't use right as it may have been reallocated (for example
// to scratch2).
right = no_reg;
__ vcvt_f64_s32(dividend, dividend.low());
__ vcvt_f64_s32(divisor, divisor.low());
// We do not care about the sign of the divisor.
__ vabs(divisor, divisor);
// Compute the quotient and round it to a 32bit integer.
__ vdiv(quotient, dividend, divisor);
__ vcvt_s32_f64(quotient.low(), quotient);
__ vcvt_f64_s32(quotient, quotient.low());
// Compute the remainder in result.
DwVfpRegister double_scratch = dividend;
__ vmul(double_scratch, divisor, quotient);
__ vcvt_s32_f64(double_scratch.low(), double_scratch);
__ vmov(scratch, double_scratch.low());
if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
__ sub(result, left, scratch);
if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
__ cmp(left, Operand(0));
DeoptimizeIf(lt, instr->environment());
}
} else {
Label ok;
// Check for -0.
__ sub(scratch2, left, scratch, SetCC);
__ b(ne, &ok);
__ cmp(left, Operand(0));
DeoptimizeIf(mi, instr->environment());
__ bind(&ok);
// Load the result and we are done.
__ mov(result, scratch2);
}
Register scratch = scratch0();
Register scratch2 = ToRegister(instr->temp());
DwVfpRegister dividend = ToDoubleRegister(instr->temp2());
DwVfpRegister divisor = ToDoubleRegister(instr->temp3());
DwVfpRegister quotient = double_scratch0();
ASSERT(!dividend.is(divisor));
ASSERT(!dividend.is(quotient));
ASSERT(!divisor.is(quotient));
ASSERT(!scratch.is(left));
ASSERT(!scratch.is(right));
ASSERT(!scratch.is(result));
Label done, vfp_modulo, both_positive, right_negative;
// Check for x % 0.
if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) {
__ cmp(right, Operand(0));
DeoptimizeIf(eq, instr->environment());
}
__ Move(result, left);
// (0 % x) must yield 0 (if x is finite, which is the case here).
__ cmp(left, Operand(0));
__ b(eq, &done);
// Preload right in a vfp register.
__ vmov(divisor.low(), right);
__ b(lt, &vfp_modulo);
__ cmp(left, Operand(right));
__ b(lt, &done);
// Check for (positive) power of two on the right hand side.
__ JumpIfNotPowerOfTwoOrZeroAndNeg(right,
scratch,
&right_negative,
&both_positive);
// Perform modulo operation (scratch contains right - 1).
__ and_(result, scratch, Operand(left));
__ b(&done);
__ bind(&right_negative);
// Negate right. The sign of the divisor does not matter.
__ rsb(right, right, Operand(0));
__ bind(&both_positive);
const int kUnfolds = 3;
// If the right hand side is smaller than the (nonnegative)
// left hand side, the left hand side is the result.
// Else try a few subtractions of the left hand side.
__ mov(scratch, left);
for (int i = 0; i < kUnfolds; i++) {
// Check if the left hand side is less or equal than the
// the right hand side.
__ cmp(scratch, Operand(right));
__ mov(result, scratch, LeaveCC, lt);
__ b(lt, &done);
// If not, reduce the left hand side by the right hand
// side and check again.
if (i < kUnfolds - 1) __ sub(scratch, scratch, right);
}
__ bind(&vfp_modulo);
// Load the arguments in VFP registers.
// The divisor value is preloaded before. Be careful that 'right'
// is only live on entry.
__ vmov(dividend.low(), left);
// From here on don't use right as it may have been reallocated
// (for example to scratch2).
right = no_reg;
__ vcvt_f64_s32(dividend, dividend.low());
__ vcvt_f64_s32(divisor, divisor.low());
// We do not care about the sign of the divisor.
__ vabs(divisor, divisor);
// Compute the quotient and round it to a 32bit integer.
__ vdiv(quotient, dividend, divisor);
__ vcvt_s32_f64(quotient.low(), quotient);
__ vcvt_f64_s32(quotient, quotient.low());
// Compute the remainder in result.
DwVfpRegister double_scratch = dividend;
__ vmul(double_scratch, divisor, quotient);
__ vcvt_s32_f64(double_scratch.low(), double_scratch);
__ vmov(scratch, double_scratch.low());
if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
__ sub(result, left, scratch);
} else {
Label ok;
// Check for -0.
__ sub(scratch2, left, scratch, SetCC);
__ b(ne, &ok);
__ cmp(left, Operand(0));
DeoptimizeIf(mi, instr->environment());
__ bind(&ok);
// Load the result and we are done.
__ mov(result, scratch2);
}
}
__ bind(&done);
}

View File

@ -1986,11 +1986,23 @@ void Simulator::DecodeType01(Instruction* instr) {
SetNZFlags(alu_out);
}
} else {
// The MLA instruction description (A 4.1.28) refers to the order
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
// Rn field to encode the Rd register and the Rd field to encode
// the Rn register.
Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
int rd = instr->RdValue();
int32_t acc_value = get_register(rd);
if (instr->Bit(22) == 0) {
// The MLA instruction description (A 4.1.28) refers to the order
// of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the
// Rn field to encode the Rd register and the Rd field to encode
// the Rn register.
// Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd");
int32_t mul_out = rm_val * rs_val;
int32_t result = acc_value + mul_out;
set_register(rn, result);
} else {
// Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd");
int32_t mul_out = rm_val * rs_val;
int32_t result = acc_value - mul_out;
set_register(rn, result);
}
}
} else {
// The signed/long multiply instructions use the terms RdHi and RdLo
@ -2546,6 +2558,25 @@ void Simulator::DecodeType3(Instruction* instr) {
break;
}
case db_x: {
if (FLAG_enable_sudiv) {
if (!instr->HasW()) {
if (instr->Bits(5, 4) == 0x1) {
if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) {
// sdiv (in V8 notation matching ARM ISA format) rn = rm/rs
// Format(instr, "'sdiv'cond'b 'rn, 'rm, 'rs);
int rm = instr->RmValue();
int32_t rm_val = get_register(rm);
int rs = instr->RsValue();
int32_t rs_val = get_register(rs);
int32_t ret_val = 0;
ASSERT(rs_val != 0);
ret_val = rm_val/rs_val;
set_register(rn, ret_val);
return;
}
}
}
}
// Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w");
addr = rn_val - shifter_operand;
if (instr->HasW()) {

View File

@ -284,6 +284,8 @@ DEFINE_bool(enable_vfp2, true,
"enable use of VFP2 instructions if available")
DEFINE_bool(enable_armv7, true,
"enable use of ARMv7 instructions if available (ARM only)")
DEFINE_bool(enable_sudiv, true,
"enable use of SDIV and UDIV instructions if available (ARM only)")
DEFINE_bool(enable_fpu, true,
"enable use of MIPS FPU instructions if available (MIPS only)")

View File

@ -148,6 +148,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) {
case ARMv7:
search_string = "ARMv7";
break;
case SUDIV:
search_string = "idiva";
break;
default:
UNREACHABLE();
}

View File

@ -438,6 +438,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86
VFP3 = 1, // ARM
ARMv7 = 2, // ARM
VFP2 = 3, // ARM
SUDIV = 4, // ARM
SAHF = 0, // x86
FPU = 1}; // MIPS