[maglev] Fix several bugs in codegen for x % x

.. which should result in 0 if x is non-negative, and -0.0 otherwise.

- Fix two invalid modulus-related folds.
- Handle aliased inputs in Int32ModulusWithOverflow.
- Drive-by: rename left/right to lhs/rhs to match the algorithm
  description.

Note there is no deopt loop here since a result of -0.0 will update
feedback to kSignedSmallInputs.

Bug: v8:7700
Change-Id: I84fca0e43ded152d3520cbe73cc43299ff1c4230
Fixed: chromium:1403575
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4128081
Auto-Submit: Jakob Linke <jgruber@chromium.org>
Reviewed-by: Darius Mercadier <dmercadier@chromium.org>
Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85039}
This commit is contained in:
Jakob Linke 2022-12-29 12:14:00 +01:00 committed by V8 LUCI CQ
parent 0aa0b44a40
commit ba1fed5ccb
4 changed files with 118 additions and 57 deletions

View File

@ -906,6 +906,10 @@ void Int32ModulusWithOverflow::SetValueLocationConstraints() {
}
void Int32ModulusWithOverflow::GenerateCode(MaglevAssembler* masm,
const ProcessingState& state) {
// If AreAliased(lhs, rhs):
// deopt if lhs < 0 // Minus zero.
// 0
//
// Using same algorithm as in EffectControlLinearizer:
// if rhs <= 0 then
// rhs = -rhs
@ -922,62 +926,78 @@ void Int32ModulusWithOverflow::GenerateCode(MaglevAssembler* masm,
// else
// lhs % rhs
Register left = ToRegister(left_input()).W();
Register right = ToRegister(right_input()).W();
Register lhs = ToRegister(left_input()).W();
Register rhs = ToRegister(right_input()).W();
Register out = ToRegister(result()).W();
static constexpr DeoptimizeReason deopt_reason =
DeoptimizeReason::kDivisionByZero;
if (lhs == rhs) {
// For the modulus algorithm described above, lhs and rhs must not alias
// each other.
__ Tst(lhs, lhs);
// TODO(victorgomes): This ideally should be kMinusZero, but Maglev only
// allows one deopt reason per IR.
__ EmitEagerDeoptIf(mi, deopt_reason, this);
__ Move(ToRegister(result()), 0);
return;
}
DCHECK(!AreAliased(lhs, rhs));
ZoneLabelRef done(masm);
ZoneLabelRef rhs_checked(masm);
__ Cmp(right, Immediate(0));
__ Cmp(rhs, Immediate(0));
__ JumpToDeferredIf(
le,
[](MaglevAssembler* masm, ZoneLabelRef rhs_checked, Register right,
[](MaglevAssembler* masm, ZoneLabelRef rhs_checked, Register rhs,
Int32ModulusWithOverflow* node) {
__ Negs(right, right);
__ EmitEagerDeoptIf(eq, DeoptimizeReason::kDivisionByZero, node);
__ Negs(rhs, rhs);
__ EmitEagerDeoptIf(eq, deopt_reason, node);
__ Jump(*rhs_checked);
},
rhs_checked, right, this);
rhs_checked, rhs, this);
__ bind(*rhs_checked);
__ Cmp(left, Immediate(0));
__ Cmp(lhs, Immediate(0));
__ JumpToDeferredIf(
lt,
[](MaglevAssembler* masm, ZoneLabelRef done, Register left,
Register right, Register out, Int32ModulusWithOverflow* node) {
[](MaglevAssembler* masm, ZoneLabelRef done, Register lhs, Register rhs,
Register out, Int32ModulusWithOverflow* node) {
UseScratchRegisterScope temps(masm);
Register res = temps.AcquireW();
__ neg(left, left);
__ udiv(res, left, right);
__ msub(out, res, right, left);
__ neg(lhs, lhs);
__ udiv(res, lhs, rhs);
__ msub(out, res, rhs, lhs);
__ cmp(out, Immediate(0));
// TODO(victorgomes): This ideally should be kMinusZero, but Maglev
// only allows one deopt reason per IR.
__ EmitEagerDeoptIf(eq, DeoptimizeReason::kDivisionByZero, node);
__ EmitEagerDeoptIf(eq, deopt_reason, node);
__ neg(out, out);
__ b(*done);
},
done, left, right, out, this);
done, lhs, rhs, out, this);
Label right_not_power_of_2;
Label rhs_not_power_of_2;
UseScratchRegisterScope temps(masm);
Register mask = temps.AcquireW();
__ Add(mask, right, Immediate(-1));
__ Tst(mask, right);
__ JumpIf(ne, &right_not_power_of_2);
__ Add(mask, rhs, Immediate(-1));
__ Tst(mask, rhs);
__ JumpIf(ne, &rhs_not_power_of_2);
// {right} is power of 2.
__ And(out, mask, left);
// {rhs} is power of 2.
__ And(out, mask, lhs);
__ Jump(*done);
__ bind(&right_not_power_of_2);
__ bind(&rhs_not_power_of_2);
// We store the result of the Udiv in a temporary register in case {out} is
// the same as {left} or {right}: we'll still need those 2 registers intact to
// the same as {lhs} or {rhs}: we'll still need those 2 registers intact to
// get the remainder.
Register res = mask;
__ Udiv(res, left, right);
__ Msub(out, res, right, left);
__ Udiv(res, lhs, rhs);
__ Msub(out, res, rhs, lhs);
__ bind(*done);
}

View File

@ -516,9 +516,11 @@ ValueNode* MaglevGraphBuilder::TryFoldInt32BinaryOperation(ValueNode* left,
ValueNode* right) {
switch (kOperation) {
case Operation::kModulus:
// x % x = 0
if (right == left) return GetInt32Constant(0);
break;
// Note the `x % x = 0` fold is invalid since for negative x values the
// result is -0.0.
// TODO(v8:7700): Consider re-enabling this fold if the result is used
// only in contexts where -0.0 is semantically equivalent to 0.0, or if x
// is known to be non-negative.
default:
// TODO(victorgomes): Implement more folds.
break;
@ -531,13 +533,14 @@ ValueNode* MaglevGraphBuilder::TryFoldInt32BinaryOperation(ValueNode* left,
int right) {
switch (kOperation) {
case Operation::kModulus:
// x % 1 = 0
// x % -1 = 0
if (right == 1 || right == -1) return GetInt32Constant(0);
// Note the `x % 1 = 0` and `x % -1 = 0` folds are invalid since for
// negative x values the result is -0.0.
// TODO(v8:7700): Consider re-enabling this fold if the result is used
// only in contexts where -0.0 is semantically equivalent to 0.0, or if x
// is known to be non-negative.
// TODO(victorgomes): We can emit faster mod operation if {right} is power
// of 2, unfortunately we need to know if {left} is negative or not.
// Maybe emit a Int32ModulusRightIsPowerOf2?
break;
default:
// TODO(victorgomes): Implement more folds.
break;

View File

@ -1432,12 +1432,16 @@ void Int32ModulusWithOverflow::SetValueLocationConstraints() {
void Int32ModulusWithOverflow::GenerateCode(MaglevAssembler* masm,
const ProcessingState& state) {
// Using same algorithm as in EffectControlLinearizer:
// If AreAliased(lhs, rhs):
// deopt if lhs < 0 // Minus zero.
// 0
//
// Otherwise, use the same algorithm as in EffectControlLinearizer:
// if rhs <= 0 then
// rhs = -rhs
// deopt if rhs == 0
// if lhs < 0 then
// let lhs_abs = -lsh in
// let lhs_abs = -lhs in
// let res = lhs_abs % rhs in
// deopt if res == 0
// -res
@ -1450,57 +1454,77 @@ void Int32ModulusWithOverflow::GenerateCode(MaglevAssembler* masm,
DCHECK(general_temporaries().has(rax));
DCHECK(general_temporaries().has(rdx));
Register left = ToRegister(left_input());
Register right = ToRegister(right_input());
Register lhs = ToRegister(left_input());
Register rhs = ToRegister(right_input());
static constexpr DeoptimizeReason deopt_reason =
DeoptimizeReason::kDivisionByZero;
if (lhs == rhs) {
// For the modulus algorithm described above, lhs and rhs must not alias
// each other.
__ testl(lhs, lhs);
// TODO(victorgomes): This ideally should be kMinusZero, but Maglev only
// allows one deopt reason per IR.
__ EmitEagerDeoptIf(negative, deopt_reason, this);
__ Move(ToRegister(result()), 0);
return;
}
DCHECK(!AreAliased(lhs, rhs, rax, rdx));
ZoneLabelRef done(masm);
ZoneLabelRef rhs_checked(masm);
__ cmpl(right, Immediate(0));
__ cmpl(rhs, Immediate(0));
__ JumpToDeferredIf(
less_equal,
[](MaglevAssembler* masm, ZoneLabelRef rhs_checked, Register right,
[](MaglevAssembler* masm, ZoneLabelRef rhs_checked, Register rhs,
Int32ModulusWithOverflow* node) {
__ negl(right);
__ EmitEagerDeoptIf(zero, DeoptimizeReason::kDivisionByZero, node);
__ negl(rhs);
__ EmitEagerDeoptIf(zero, deopt_reason, node);
__ jmp(*rhs_checked);
},
rhs_checked, right, this);
rhs_checked, rhs, this);
__ bind(*rhs_checked);
__ cmpl(left, Immediate(0));
__ cmpl(lhs, Immediate(0));
__ JumpToDeferredIf(
less,
[](MaglevAssembler* masm, ZoneLabelRef done, Register left,
Register right, Int32ModulusWithOverflow* node) {
__ movl(rax, left);
[](MaglevAssembler* masm, ZoneLabelRef done, Register lhs, Register rhs,
Int32ModulusWithOverflow* node) {
// `divl(divisor)` divides rdx:rax by the divisor and stores the
// quotient in rax, the remainder in rdx.
__ movl(rax, lhs);
__ negl(rax);
__ xorl(rdx, rdx);
__ divl(right);
__ divl(rhs);
__ testl(rdx, rdx);
// TODO(victorgomes): This ideally should be kMinusZero, but Maglev only
// allows one deopt reason per IR.
__ EmitEagerDeoptIf(equal, DeoptimizeReason::kDivisionByZero, node);
__ EmitEagerDeoptIf(equal, deopt_reason, node);
__ negl(rdx);
__ jmp(*done);
},
done, left, right, this);
done, lhs, rhs, this);
Label right_not_power_of_2;
Label rhs_not_power_of_2;
Register mask = rax;
__ leal(mask, Operand(right, -1));
__ testl(right, mask);
__ j(not_zero, &right_not_power_of_2, Label::kNear);
__ leal(mask, Operand(rhs, -1));
__ testl(rhs, mask);
__ j(not_zero, &rhs_not_power_of_2, Label::kNear);
// {right} is power of 2.
__ andl(mask, left);
// {rhs} is power of 2.
__ andl(mask, lhs);
__ movl(ToRegister(result()), mask);
__ jmp(*done, Label::kNear);
__ bind(&right_not_power_of_2);
__ movl(rax, left);
__ bind(&rhs_not_power_of_2);
// `divl(divisor)` divides rdx:rax by the divisor and stores the
// quotient in rax, the remainder in rdx.
__ movl(rax, lhs);
__ xorl(rdx, rdx);
__ divl(right);
__ divl(rhs);
// Result is implicitly written to rdx.
DCHECK_EQ(ToRegister(result()), rdx);

View File

@ -0,0 +1,14 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Flags: --allow-natives-syntax
function f(y) {
const x = y % y;
return 1 / x;
}
%PrepareFunctionForOptimization(f);
assertEquals(f(2), Infinity);
%OptimizeMaglevOnNextCall(f);
assertEquals(f(-2), -Infinity);