From 08747c3a16942bb3c98bdc06a5f99035faaf58d7 Mon Sep 17 00:00:00 2001 From: "danno@chromium.org" Date: Mon, 1 Oct 2012 21:27:33 +0000 Subject: [PATCH] Support for SDIV and MLS ARM instructions, and implement DoModI using them. Also added support for the runtime detection to check if hardware supports SDIV/UDIV Other new opportunities to exploit SDIV/UDIV will be done in separate issues. Review URL: https://chromiumcodereview.appspot.com/10977051 Patch from Subrato K De . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12646 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/assembler-arm.cc | 24 ++++ src/arm/assembler-arm.h | 7 ++ src/arm/disasm-arm.cc | 29 ++++- src/arm/lithium-codegen-arm.cc | 217 ++++++++++++++++++--------------- src/arm/simulator-arm.cc | 41 ++++++- src/flag-definitions.h | 2 + src/platform-linux.cc | 3 + src/v8globals.h | 1 + 8 files changed, 217 insertions(+), 107 deletions(-) diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 78db06d267..7a65ab25cb 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -110,6 +110,10 @@ void CpuFeatures::Probe() { if (FLAG_enable_armv7) { supported_ |= 1u << ARMv7; } + + if (FLAG_enable_sudiv) { + supported_ |= 1u << SUDIV; + } #else // __arm__ // Probe for additional features not already known to be available. if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) { @@ -125,6 +129,10 @@ void CpuFeatures::Probe() { found_by_runtime_probing_ |= 1u << ARMv7; } + if (!IsSupported(SUDIV) && OS::ArmCpuHasFeature(SUDIV)) { + found_by_runtime_probing_ |= 1u << SUDIV; + } + supported_ |= found_by_runtime_probing_; #endif @@ -1207,6 +1215,22 @@ void Assembler::mla(Register dst, Register src1, Register src2, Register srcA, } +void Assembler::mls(Register dst, Register src1, Register src2, Register srcA, + Condition cond) { + ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc) && !srcA.is(pc)); + emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 | + src2.code()*B8 | B7 | B4 | src1.code()); +} + + +void Assembler::sdiv(Register dst, Register src1, Register src2, + Condition cond) { + ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc)); + emit(cond | B26 | B25| B24 | B20 | dst.code()*B16 | 0xf * B12 | + src2.code()*B8 | B4 | src1.code()); +} + + void Assembler::mul(Register dst, Register src1, Register src2, SBit s, Condition cond) { ASSERT(!dst.is(pc) && !src1.is(pc) && !src2.is(pc)); diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index c62d480eba..005c78d5d8 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -511,6 +511,7 @@ class CpuFeatures : public AllStatic { ASSERT(initialized_); if (f == VFP3 && !FLAG_enable_vfp3) return false; if (f == VFP2 && !FLAG_enable_vfp2) return false; + if (f == SUDIV && !FLAG_enable_sudiv) return false; return (supported_ & (1u << f)) != 0; } @@ -869,6 +870,12 @@ class Assembler : public AssemblerBase { void mla(Register dst, Register src1, Register src2, Register srcA, SBit s = LeaveCC, Condition cond = al); + void mls(Register dst, Register src1, Register src2, Register srcA, + Condition cond = al); + + void sdiv(Register dst, Register src1, Register src2, + Condition cond = al); + void mul(Register dst, Register src1, Register src2, SBit s = LeaveCC, Condition cond = al); diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index 96a7d3ce6b..7c5f63edc9 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -692,11 +692,19 @@ void Decoder::DecodeType01(Instruction* instr) { // Rn field to encode it. Format(instr, "mul'cond's 'rn, 'rm, 'rs"); } else { - // The MLA instruction description (A 4.1.28) refers to the order - // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the - // Rn field to encode the Rd register and the Rd field to encode - // the Rn register. - Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd"); + if (instr->Bit(22) == 0) { + // The MLA instruction description (A 4.1.28) refers to the order + // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the + // Rn field to encode the Rd register and the Rd field to encode + // the Rn register. + Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd"); + } else { + // The MLS instruction description (A 4.1.29) refers to the order + // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the + // Rn field to encode the Rd register and the Rd field to encode + // the Rn register. + Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd"); + } } } else { // The signed/long multiply instructions use the terms RdHi and RdLo @@ -974,6 +982,17 @@ void Decoder::DecodeType3(Instruction* instr) { break; } case db_x: { + if (FLAG_enable_sudiv) { + if (!instr->HasW()) { + if (instr->Bits(5, 4) == 0x1) { + if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) { + // SDIV (in V8 notation matching ARM ISA format) rn = rm/rs + Format(instr, "sdiv'cond'b 'rn, 'rm, 'rs"); + break; + } + } + } + } Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w"); break; } diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index e07118317c..1216486e68 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -979,109 +979,132 @@ void LCodeGen::DoModI(LModI* instr) { Register left = ToRegister(instr->left()); Register right = ToRegister(instr->right()); Register result = ToRegister(instr->result()); + Label done; - Register scratch = scratch0(); - Register scratch2 = ToRegister(instr->temp()); - DwVfpRegister dividend = ToDoubleRegister(instr->temp2()); - DwVfpRegister divisor = ToDoubleRegister(instr->temp3()); - DwVfpRegister quotient = double_scratch0(); + if (CpuFeatures::IsSupported(SUDIV)) { + CpuFeatures::Scope scope(SUDIV); + // Check for x % 0. + if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) { + __ cmp(right, Operand(0)); + DeoptimizeIf(eq, instr->environment()); + } - ASSERT(!dividend.is(divisor)); - ASSERT(!dividend.is(quotient)); - ASSERT(!divisor.is(quotient)); - ASSERT(!scratch.is(left)); - ASSERT(!scratch.is(right)); - ASSERT(!scratch.is(result)); + // For r3 = r1 % r2; we can have the following ARM code + // sdiv r3, r1, r2 + // mls r3, r3, r2, r1 - Label done, vfp_modulo, both_positive, right_negative; + __ sdiv(result, left, right); + __ mls(result, result, right, left); + __ cmp(result, Operand(0)); + __ b(ne, &done); - // Check for x % 0. - if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) { - __ cmp(right, Operand(0)); - DeoptimizeIf(eq, instr->environment()); - } - - __ Move(result, left); - - // (0 % x) must yield 0 (if x is finite, which is the case here). - __ cmp(left, Operand(0)); - __ b(eq, &done); - // Preload right in a vfp register. - __ vmov(divisor.low(), right); - __ b(lt, &vfp_modulo); - - __ cmp(left, Operand(right)); - __ b(lt, &done); - - // Check for (positive) power of two on the right hand side. - __ JumpIfNotPowerOfTwoOrZeroAndNeg(right, - scratch, - &right_negative, - &both_positive); - // Perform modulo operation (scratch contains right - 1). - __ and_(result, scratch, Operand(left)); - __ b(&done); - - __ bind(&right_negative); - // Negate right. The sign of the divisor does not matter. - __ rsb(right, right, Operand(0)); - - __ bind(&both_positive); - const int kUnfolds = 3; - // If the right hand side is smaller than the (nonnegative) - // left hand side, the left hand side is the result. - // Else try a few subtractions of the left hand side. - __ mov(scratch, left); - for (int i = 0; i < kUnfolds; i++) { - // Check if the left hand side is less or equal than the - // the right hand side. - __ cmp(scratch, Operand(right)); - __ mov(result, scratch, LeaveCC, lt); - __ b(lt, &done); - // If not, reduce the left hand side by the right hand - // side and check again. - if (i < kUnfolds - 1) __ sub(scratch, scratch, right); - } - - __ bind(&vfp_modulo); - // Load the arguments in VFP registers. - // The divisor value is preloaded before. Be careful that 'right' is only live - // on entry. - __ vmov(dividend.low(), left); - // From here on don't use right as it may have been reallocated (for example - // to scratch2). - right = no_reg; - - __ vcvt_f64_s32(dividend, dividend.low()); - __ vcvt_f64_s32(divisor, divisor.low()); - - // We do not care about the sign of the divisor. - __ vabs(divisor, divisor); - // Compute the quotient and round it to a 32bit integer. - __ vdiv(quotient, dividend, divisor); - __ vcvt_s32_f64(quotient.low(), quotient); - __ vcvt_f64_s32(quotient, quotient.low()); - - // Compute the remainder in result. - DwVfpRegister double_scratch = dividend; - __ vmul(double_scratch, divisor, quotient); - __ vcvt_s32_f64(double_scratch.low(), double_scratch); - __ vmov(scratch, double_scratch.low()); - - if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { - __ sub(result, left, scratch); + if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { + __ cmp(left, Operand(0)); + DeoptimizeIf(lt, instr->environment()); + } } else { - Label ok; - // Check for -0. - __ sub(scratch2, left, scratch, SetCC); - __ b(ne, &ok); - __ cmp(left, Operand(0)); - DeoptimizeIf(mi, instr->environment()); - __ bind(&ok); - // Load the result and we are done. - __ mov(result, scratch2); - } + Register scratch = scratch0(); + Register scratch2 = ToRegister(instr->temp()); + DwVfpRegister dividend = ToDoubleRegister(instr->temp2()); + DwVfpRegister divisor = ToDoubleRegister(instr->temp3()); + DwVfpRegister quotient = double_scratch0(); + ASSERT(!dividend.is(divisor)); + ASSERT(!dividend.is(quotient)); + ASSERT(!divisor.is(quotient)); + ASSERT(!scratch.is(left)); + ASSERT(!scratch.is(right)); + ASSERT(!scratch.is(result)); + + Label done, vfp_modulo, both_positive, right_negative; + + // Check for x % 0. + if (instr->hydrogen()->CheckFlag(HValue::kCanBeDivByZero)) { + __ cmp(right, Operand(0)); + DeoptimizeIf(eq, instr->environment()); + } + + __ Move(result, left); + + // (0 % x) must yield 0 (if x is finite, which is the case here). + __ cmp(left, Operand(0)); + __ b(eq, &done); + // Preload right in a vfp register. + __ vmov(divisor.low(), right); + __ b(lt, &vfp_modulo); + + __ cmp(left, Operand(right)); + __ b(lt, &done); + + // Check for (positive) power of two on the right hand side. + __ JumpIfNotPowerOfTwoOrZeroAndNeg(right, + scratch, + &right_negative, + &both_positive); + // Perform modulo operation (scratch contains right - 1). + __ and_(result, scratch, Operand(left)); + __ b(&done); + + __ bind(&right_negative); + // Negate right. The sign of the divisor does not matter. + __ rsb(right, right, Operand(0)); + + __ bind(&both_positive); + const int kUnfolds = 3; + // If the right hand side is smaller than the (nonnegative) + // left hand side, the left hand side is the result. + // Else try a few subtractions of the left hand side. + __ mov(scratch, left); + for (int i = 0; i < kUnfolds; i++) { + // Check if the left hand side is less or equal than the + // the right hand side. + __ cmp(scratch, Operand(right)); + __ mov(result, scratch, LeaveCC, lt); + __ b(lt, &done); + // If not, reduce the left hand side by the right hand + // side and check again. + if (i < kUnfolds - 1) __ sub(scratch, scratch, right); + } + + __ bind(&vfp_modulo); + // Load the arguments in VFP registers. + // The divisor value is preloaded before. Be careful that 'right' + // is only live on entry. + __ vmov(dividend.low(), left); + // From here on don't use right as it may have been reallocated + // (for example to scratch2). + right = no_reg; + + __ vcvt_f64_s32(dividend, dividend.low()); + __ vcvt_f64_s32(divisor, divisor.low()); + + // We do not care about the sign of the divisor. + __ vabs(divisor, divisor); + // Compute the quotient and round it to a 32bit integer. + __ vdiv(quotient, dividend, divisor); + __ vcvt_s32_f64(quotient.low(), quotient); + __ vcvt_f64_s32(quotient, quotient.low()); + + // Compute the remainder in result. + DwVfpRegister double_scratch = dividend; + __ vmul(double_scratch, divisor, quotient); + __ vcvt_s32_f64(double_scratch.low(), double_scratch); + __ vmov(scratch, double_scratch.low()); + + if (!instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) { + __ sub(result, left, scratch); + } else { + Label ok; + // Check for -0. + __ sub(scratch2, left, scratch, SetCC); + __ b(ne, &ok); + __ cmp(left, Operand(0)); + DeoptimizeIf(mi, instr->environment()); + __ bind(&ok); + // Load the result and we are done. + __ mov(result, scratch2); + } + } __ bind(&done); } diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index a057de58cc..91df404f9a 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -1986,11 +1986,23 @@ void Simulator::DecodeType01(Instruction* instr) { SetNZFlags(alu_out); } } else { - // The MLA instruction description (A 4.1.28) refers to the order - // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the - // Rn field to encode the Rd register and the Rd field to encode - // the Rn register. - Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd"); + int rd = instr->RdValue(); + int32_t acc_value = get_register(rd); + if (instr->Bit(22) == 0) { + // The MLA instruction description (A 4.1.28) refers to the order + // of registers as "Rd, Rm, Rs, Rn". But confusingly it uses the + // Rn field to encode the Rd register and the Rd field to encode + // the Rn register. + // Format(instr, "mla'cond's 'rn, 'rm, 'rs, 'rd"); + int32_t mul_out = rm_val * rs_val; + int32_t result = acc_value + mul_out; + set_register(rn, result); + } else { + // Format(instr, "mls'cond's 'rn, 'rm, 'rs, 'rd"); + int32_t mul_out = rm_val * rs_val; + int32_t result = acc_value - mul_out; + set_register(rn, result); + } } } else { // The signed/long multiply instructions use the terms RdHi and RdLo @@ -2546,6 +2558,25 @@ void Simulator::DecodeType3(Instruction* instr) { break; } case db_x: { + if (FLAG_enable_sudiv) { + if (!instr->HasW()) { + if (instr->Bits(5, 4) == 0x1) { + if ((instr->Bit(22) == 0x0) && (instr->Bit(20) == 0x1)) { + // sdiv (in V8 notation matching ARM ISA format) rn = rm/rs + // Format(instr, "'sdiv'cond'b 'rn, 'rm, 'rs); + int rm = instr->RmValue(); + int32_t rm_val = get_register(rm); + int rs = instr->RsValue(); + int32_t rs_val = get_register(rs); + int32_t ret_val = 0; + ASSERT(rs_val != 0); + ret_val = rm_val/rs_val; + set_register(rn, ret_val); + return; + } + } + } + } // Format(instr, "'memop'cond'b 'rd, ['rn, -'shift_rm]'w"); addr = rn_val - shifter_operand; if (instr->HasW()) { diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 685559a53c..d9fa126069 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -284,6 +284,8 @@ DEFINE_bool(enable_vfp2, true, "enable use of VFP2 instructions if available") DEFINE_bool(enable_armv7, true, "enable use of ARMv7 instructions if available (ARM only)") +DEFINE_bool(enable_sudiv, true, + "enable use of SDIV and UDIV instructions if available (ARM only)") DEFINE_bool(enable_fpu, true, "enable use of MIPS FPU instructions if available (MIPS only)") diff --git a/src/platform-linux.cc b/src/platform-linux.cc index 606d10236e..b9ce9d966d 100644 --- a/src/platform-linux.cc +++ b/src/platform-linux.cc @@ -148,6 +148,9 @@ bool OS::ArmCpuHasFeature(CpuFeature feature) { case ARMv7: search_string = "ARMv7"; break; + case SUDIV: + search_string = "idiva"; + break; default: UNREACHABLE(); } diff --git a/src/v8globals.h b/src/v8globals.h index 3d214f8dd3..1d22a11032 100644 --- a/src/v8globals.h +++ b/src/v8globals.h @@ -438,6 +438,7 @@ enum CpuFeature { SSE4_1 = 32 + 19, // x86 VFP3 = 1, // ARM ARMv7 = 2, // ARM VFP2 = 3, // ARM + SUDIV = 4, // ARM SAHF = 0, // x86 FPU = 1}; // MIPS