[turbofan] Implemented the optional Float32RoundDown operator.

I implemented the optional Float32RoundDown operator on x64, ia32, arm,
and arm64.

For arm I also had to adjust the simulator.

R=titzer@chromium.org

Review URL: https://codereview.chromium.org/1471913006

Cr-Commit-Position: refs/heads/master@{#32261}
This commit is contained in:
ahaas 2015-11-25 03:05:22 -08:00 committed by Commit bot
parent b9d5126930
commit 74434403f6
34 changed files with 227 additions and 52 deletions

View File

@ -3404,6 +3404,20 @@ void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
}
void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
// cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
// 10(19-18) | RM=11(17-16) | Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
// M(5) | 0(4) | Vm(3-0)
DCHECK(CpuFeatures::IsSupported(ARMv8));
int vd, d;
dst.split_code(&vd, &d);
int vm, m;
src.split_code(&vm, &m);
emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
}
void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
// cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
// 10(19-18) | RM=11(17-16) | Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |

View File

@ -1213,6 +1213,7 @@ class Assembler : public AssemblerBase {
// ARMv8 rounding instructions.
void vrinta(const DwVfpRegister dst, const DwVfpRegister src);
void vrintn(const DwVfpRegister dst, const DwVfpRegister src);
void vrintm(const SwVfpRegister dst, const SwVfpRegister src);
void vrintm(const DwVfpRegister dst, const DwVfpRegister src);
void vrintp(const DwVfpRegister dst, const DwVfpRegister src);
void vrintz(const DwVfpRegister dst, const DwVfpRegister src,

View File

@ -3869,44 +3869,71 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
break;
case 0x1D:
if (instr->Opc1Value() == 0x7 && instr->Opc3Value() == 0x1 &&
instr->Bits(11, 9) == 0x5 && instr->Bits(19, 18) == 0x2 &&
instr->Bit(8) == 0x1) {
int vm = instr->VFPMRegValue(kDoublePrecision);
int vd = instr->VFPDRegValue(kDoublePrecision);
double dm_value = get_double_from_d_register(vm);
double dd_value = 0.0;
int rounding_mode = instr->Bits(17, 16);
switch (rounding_mode) {
case 0x0: // vrinta - round with ties to away from zero
dd_value = round(dm_value);
break;
case 0x1: { // vrintn - round with ties to even
dd_value = std::floor(dm_value);
double error = dm_value - dd_value;
// Take care of correctly handling the range [-0.5, -0.0], which
// must yield -0.0.
if ((-0.5 <= dm_value) && (dm_value < 0.0)) {
dd_value = -0.0;
// If the error is greater than 0.5, or is equal to 0.5 and the
// integer result is odd, round up.
} else if ((error > 0.5) ||
((error == 0.5) && (fmod(dd_value, 2) != 0))) {
dd_value++;
instr->Bits(11, 9) == 0x5 && instr->Bits(19, 18) == 0x2) {
if (instr->SzValue() == 0x1) {
int vm = instr->VFPMRegValue(kDoublePrecision);
int vd = instr->VFPDRegValue(kDoublePrecision);
double dm_value = get_double_from_d_register(vm);
double dd_value = 0.0;
int rounding_mode = instr->Bits(17, 16);
switch (rounding_mode) {
case 0x0: // vrinta - round with ties to away from zero
dd_value = round(dm_value);
break;
case 0x1: { // vrintn - round with ties to even
dd_value = std::floor(dm_value);
double error = dm_value - dd_value;
// Take care of correctly handling the range [-0.5, -0.0], which
// must yield -0.0.
if ((-0.5 <= dm_value) && (dm_value < 0.0)) {
dd_value = -0.0;
// If the error is greater than 0.5, or is equal to 0.5 and the
// integer result is odd, round up.
} else if ((error > 0.5) ||
((error == 0.5) && (fmod(dd_value, 2) != 0))) {
dd_value++;
}
break;
}
break;
case 0x2: // vrintp - ceil
dd_value = std::ceil(dm_value);
break;
case 0x3: // vrintm - floor
dd_value = std::floor(dm_value);
break;
default:
UNREACHABLE(); // Case analysis is exhaustive.
break;
}
case 0x2: // vrintp - ceil
dd_value = std::ceil(dm_value);
break;
case 0x3: // vrintm - floor
dd_value = std::floor(dm_value);
break;
default:
UNREACHABLE(); // Case analysis is exhaustive.
break;
dd_value = canonicalizeNaN(dd_value);
set_d_register_from_double(vd, dd_value);
} else {
int m = instr->VFPMRegValue(kSinglePrecision);
int d = instr->VFPDRegValue(kSinglePrecision);
float sm_value = get_float_from_s_register(m);
float sd_value = 0.0;
int rounding_mode = instr->Bits(17, 16);
switch (rounding_mode) {
case 0x0: // vrinta - round with ties to away from zero
sd_value = roundf(sm_value);
break;
case 0x1: { // vrintn - round with ties to even
sd_value = nearbyintf(sm_value);
break;
}
case 0x2: // vrintp - ceil
sd_value = ceilf(sm_value);
break;
case 0x3: // vrintm - floor
sd_value = floorf(sm_value);
break;
default:
UNREACHABLE(); // Case analysis is exhaustive.
break;
}
sd_value = canonicalizeNaN(sd_value);
set_s_register_from_float(d, sd_value);
}
dd_value = canonicalizeNaN(dd_value);
set_d_register_from_double(vd, dd_value);
} else {
UNIMPLEMENTED();
}

View File

@ -726,6 +726,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kArmVnegF32:
__ vneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
break;
case kArmVrintmF32:
__ vrintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
break;
case kArmVcmpF64:
if (instr->InputAt(1)->IsDoubleRegister()) {
__ VFPCompareAndSetFlags(i.InputFloat64Register(0),

View File

@ -54,6 +54,7 @@ namespace compiler {
V(ArmVabsF32) \
V(ArmVnegF32) \
V(ArmVsqrtF32) \
V(ArmVrintmF32) \
V(ArmVcmpF64) \
V(ArmVaddF64) \
V(ArmVsubF64) \

View File

@ -1115,6 +1115,11 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, kArmVrintmF32, node);
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, kArmVrintmF64, node);
}
@ -1569,7 +1574,8 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesAway |
MachineOperatorBuilder::kFloat64RoundTiesEven;
MachineOperatorBuilder::kFloat64RoundTiesEven |
MachineOperatorBuilder::kFloat32RoundDown;
}
return flags;
}

View File

@ -623,6 +623,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Bind(ool->exit());
break;
}
case kArm64Float32RoundDown:
__ Frintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
break;
case kArm64Float64RoundDown:
__ Frintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;

View File

@ -88,6 +88,7 @@ namespace compiler {
V(Arm64Float32Min) \
V(Arm64Float32Abs) \
V(Arm64Float32Sqrt) \
V(Arm64Float32RoundDown) \
V(Arm64Float64Cmp) \
V(Arm64Float64Add) \
V(Arm64Float64Sub) \

View File

@ -1473,6 +1473,11 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, kArm64Float32RoundDown, node);
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, kArm64Float64RoundDown, node);
}
@ -2064,6 +2069,7 @@ MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
return MachineOperatorBuilder::kFloat32Max |
MachineOperatorBuilder::kFloat32Min |
MachineOperatorBuilder::kFloat32RoundDown |
MachineOperatorBuilder::kFloat64Max |
MachineOperatorBuilder::kFloat64Min |
MachineOperatorBuilder::kFloat64RoundDown |

View File

@ -650,6 +650,13 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break;
}
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat64Cmp:
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
break;

View File

@ -43,6 +43,7 @@ namespace compiler {
V(SSEFloat32Abs) \
V(SSEFloat32Neg) \
V(SSEFloat32Sqrt) \
V(SSEFloat32Round) \
V(SSEFloat64Cmp) \
V(SSEFloat64Add) \
V(SSEFloat64Sub) \

View File

@ -851,6 +851,11 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundDown));
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundDown));
}
@ -1287,7 +1292,8 @@ InstructionSelector::SupportedMachineOperatorFlags() {
flags |= MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesEven;
MachineOperatorBuilder::kFloat64RoundTiesEven |
MachineOperatorBuilder::kFloat32RoundDown;
}
return flags;
}

View File

@ -923,6 +923,8 @@ void InstructionSelector::VisitNode(Node* node) {
return VisitFloat64LessThan(node);
case IrOpcode::kFloat64LessThanOrEqual:
return VisitFloat64LessThanOrEqual(node);
case IrOpcode::kFloat32RoundDown:
return MarkAsFloat32(node), VisitFloat32RoundDown(node);
case IrOpcode::kFloat64RoundDown:
return MarkAsFloat64(node), VisitFloat64RoundDown(node);
case IrOpcode::kFloat64RoundUp:

View File

@ -186,6 +186,7 @@ CheckedStoreRepresentation CheckedStoreRepresentationOf(Operator const* op) {
V(Float32Min, Operator::kNoProperties, 2, 0, 1) \
V(Float64Max, Operator::kNoProperties, 2, 0, 1) \
V(Float64Min, Operator::kNoProperties, 2, 0, 1) \
V(Float32RoundDown, Operator::kNoProperties, 1, 0, 1) \
V(Float64RoundDown, Operator::kNoProperties, 1, 0, 1) \
V(Float64RoundUp, Operator::kNoProperties, 1, 0, 1) \
V(Float64RoundTruncate, Operator::kNoProperties, 1, 0, 1) \

View File

@ -116,23 +116,24 @@ class MachineOperatorBuilder final : public ZoneObject {
kFloat32Min = 1u << 1,
kFloat64Max = 1u << 2,
kFloat64Min = 1u << 3,
kFloat64RoundDown = 1u << 4,
kFloat64RoundUp = 1u << 5,
kFloat64RoundTruncate = 1u << 6,
kFloat64RoundTiesEven = 1u << 7,
kFloat64RoundTiesAway = 1u << 8,
kInt32DivIsSafe = 1u << 9,
kUint32DivIsSafe = 1u << 10,
kWord32ShiftIsSafe = 1u << 11,
kWord32Ctz = 1u << 12,
kWord64Ctz = 1u << 13,
kWord32Popcnt = 1u << 14,
kWord64Popcnt = 1u << 15,
kFloat32RoundDown = 1u << 4,
kFloat64RoundDown = 1u << 5,
kFloat64RoundUp = 1u << 6,
kFloat64RoundTruncate = 1u << 7,
kFloat64RoundTiesEven = 1u << 8,
kFloat64RoundTiesAway = 1u << 9,
kInt32DivIsSafe = 1u << 10,
kUint32DivIsSafe = 1u << 11,
kWord32ShiftIsSafe = 1u << 12,
kWord32Ctz = 1u << 13,
kWord64Ctz = 1u << 14,
kWord32Popcnt = 1u << 15,
kWord64Popcnt = 1u << 16,
kAllOptionalOps = kFloat32Max | kFloat32Min | kFloat64Max | kFloat64Min |
kFloat64RoundDown | kFloat64RoundUp |
kFloat64RoundTruncate | kFloat64RoundTiesAway |
kFloat64RoundTiesEven | kWord32Ctz | kWord64Ctz |
kWord32Popcnt | kWord64Popcnt
kWord32Popcnt | kWord64Popcnt | kFloat32RoundDown
};
typedef base::Flags<Flag, unsigned> Flags;
@ -268,6 +269,7 @@ class MachineOperatorBuilder final : public ZoneObject {
const Operator* Float64Abs();
// Floating point rounding.
const OptionalOperator Float32RoundDown();
const OptionalOperator Float64RoundDown();
const OptionalOperator Float64RoundUp();
const OptionalOperator Float64RoundTruncate();

View File

@ -647,6 +647,9 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, kMipsFloat64RoundDown, node);
}

View File

@ -981,6 +981,9 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, kMips64Float64RoundDown, node);
}

View File

@ -294,6 +294,7 @@
V(Float32Min) \
V(Float32Abs) \
V(Float32Sqrt) \
V(Float32RoundDown) \
V(Float64Add) \
V(Float64Sub) \
V(Float64Mul) \

View File

@ -1128,6 +1128,9 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, kPPC_FloorDouble, node);
}

View File

@ -483,6 +483,9 @@ class RawMachineAssembler {
Node* BitcastInt64ToFloat64(Node* a) {
return AddNode(machine()->BitcastInt64ToFloat64(), a);
}
Node* Float32RoundDown(Node* a) {
return AddNode(machine()->Float32RoundDown().op(), a);
}
Node* Float64RoundDown(Node* a) {
return AddNode(machine()->Float64RoundDown().op(), a);
}

View File

@ -2265,6 +2265,12 @@ Type* Typer::Visitor::TypeFloat64LessThanOrEqual(Node* node) {
}
Type* Typer::Visitor::TypeFloat32RoundDown(Node* node) {
// TODO(sigurds): We could have a tighter bound here.
return Type::Number();
}
Type* Typer::Visitor::TypeFloat64RoundDown(Node* node) {
// TODO(sigurds): We could have a tighter bound here.
return Type::Number();

View File

@ -888,6 +888,7 @@ void Verifier::Visitor::Check(Node* node) {
case IrOpcode::kFloat64Min:
case IrOpcode::kFloat64Abs:
case IrOpcode::kFloat64Sqrt:
case IrOpcode::kFloat32RoundDown:
case IrOpcode::kFloat64RoundDown:
case IrOpcode::kFloat64RoundUp:
case IrOpcode::kFloat64RoundTruncate:

View File

@ -937,6 +937,13 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
case kSSEFloat32ToFloat64:
ASSEMBLE_SSE_UNOP(Cvtss2sd);
break;
case kSSEFloat32Round: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat64Cmp:
ASSEMBLE_SSE_BINOP(Ucomisd);
break;

View File

@ -63,6 +63,7 @@ namespace compiler {
V(SSEFloat32Max) \
V(SSEFloat32Min) \
V(SSEFloat32ToFloat64) \
V(SSEFloat32Round) \
V(SSEFloat64Cmp) \
V(SSEFloat64Add) \
V(SSEFloat64Sub) \

View File

@ -1138,6 +1138,11 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat32Round | MiscField::encode(kRoundDown));
}
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
VisitRR(this, node, kSSEFloat64Round | MiscField::encode(kRoundDown));
}
@ -1694,7 +1699,8 @@ InstructionSelector::SupportedMachineOperatorFlags() {
flags |= MachineOperatorBuilder::kFloat64RoundDown |
MachineOperatorBuilder::kFloat64RoundUp |
MachineOperatorBuilder::kFloat64RoundTruncate |
MachineOperatorBuilder::kFloat64RoundTiesEven;
MachineOperatorBuilder::kFloat64RoundTiesEven |
MachineOperatorBuilder::kFloat32RoundDown;
}
return flags;
}

View File

@ -842,6 +842,9 @@ void InstructionSelector::VisitFloat64Sqrt(Node* node) {
}
void InstructionSelector::VisitFloat32RoundDown(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitFloat64RoundDown(Node* node) {
X87OperandGenerator g(this);
Emit(kX87Float64Round | MiscField::encode(kRoundDown),

View File

@ -2194,6 +2194,19 @@ void Assembler::ucomisd(XMMRegister dst, const Operand& src) {
}
void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x0A);
emit_sse_operand(dst, src);
// Mask precision exeption.
EMIT(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);

View File

@ -989,6 +989,7 @@ class Assembler : public AssemblerBase {
void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
void ucomisd(XMMRegister dst, const Operand& src);
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void movmskpd(Register dst, XMMRegister src);

View File

@ -3481,6 +3481,21 @@ void Assembler::cmpltsd(XMMRegister dst, XMMRegister src) {
}
void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0f);
emit(0x3a);
emit(0x0a);
emit_sse_operand(dst, src);
// Mask precision exception.
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
DCHECK(IsEnabled(SSE4_1));

View File

@ -1136,6 +1136,7 @@ class Assembler : public AssemblerBase {
void pinsrd(XMMRegister dst, Register src, int8_t imm8);
void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
// AVX instruction
@ -1407,6 +1408,11 @@ class Assembler : public AssemblerBase {
void vucomisd(XMMRegister dst, const Operand& src) {
vsd(0x2e, dst, xmm0, src, k66, k0F, kWIG);
}
void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vsd(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vsd(0x0b, dst, src1, src2, k66, k0F3A, kWIG);

View File

@ -2764,6 +2764,17 @@ void MacroAssembler::Movmskpd(Register dst, XMMRegister src) {
}
void MacroAssembler::Roundss(XMMRegister dst, XMMRegister src,
RoundingMode mode) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vroundss(dst, dst, src, mode);
} else {
roundss(dst, src, mode);
}
}
void MacroAssembler::Roundsd(XMMRegister dst, XMMRegister src,
RoundingMode mode) {
if (CpuFeatures::IsSupported(AVX)) {

View File

@ -970,6 +970,7 @@ class MacroAssembler: public Assembler {
void Movapd(XMMRegister dst, XMMRegister src);
void Movmskpd(Register dst, XMMRegister src);
void Roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Sqrtsd(XMMRegister dst, XMMRegister src);
void Sqrtsd(XMMRegister dst, const Operand& src);

View File

@ -512,7 +512,7 @@ static inline void CheckFloatEq(volatile float x, volatile float y) {
if (std::isnan(x)) {
CHECK(std::isnan(y));
} else {
CHECK(x == y);
CHECK_EQ(x, y);
}
}

View File

@ -5144,6 +5144,16 @@ static double kValues[] = {0.1,
-two_52 + 1 - 0.7};
TEST(RunFloat32RoundDown) {
BufferedRawMachineAssemblerTester<float> m(kMachFloat32);
if (!m.machine()->Float32RoundDown().IsSupported()) return;
m.Return(m.Float32RoundDown(m.Parameter(0)));
FOR_FLOAT32_INPUTS(i) { CheckFloatEq(floorf(*i), m.Call(*i)); }
}
TEST(RunFloat64RoundDown1) {
BufferedRawMachineAssemblerTester<double> m(kMachFloat64);
if (!m.machine()->Float64RoundDown().IsSupported()) return;