[turbofan] Fix CsaLoadElimination for small types

CsaLoadElimination failed to account for truncation when optimizing
loads. This CL extends the notion of compatible Loads and Stores to
include ({store}, {load}) pairs which both have integral representation
and {store}'s representation is no smaller than {load}'s. In case the
representations are not identical, it truncates and possibly
sign-extends {store} before forwarding it to {load}.

Additional change: Extend ObjectMayAlias with wasm allocating builtin
calls.

Bug: v8:11504
Change-Id: I43f89a13793b54477a33be18aaf346462aefa8e5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2739975
Reviewed-by: Georg Neis <neis@chromium.org>
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73340}
This commit is contained in:
Manos Koukoutos 2021-03-10 18:56:53 +00:00 committed by Commit Bot
parent b01869cabd
commit f7bb9267eb
5 changed files with 287 additions and 43 deletions

View File

@ -17,6 +17,7 @@ namespace internal {
enum class MachineRepresentation : uint8_t {
kNone,
kBit,
// Integral representations must be consecutive, in order of increasing order.
kWord8,
kWord16,
kWord32,
@ -26,7 +27,7 @@ enum class MachineRepresentation : uint8_t {
kTagged, // (uncompressed) Object (Smi or HeapObject)
kCompressedPointer, // (compressed) HeapObject
kCompressed, // (compressed) Object (Smi or HeapObject)
// FP representations must be last, and in order of increasing size.
// FP and SIMD representations must be last, and in order of increasing size.
kFloat32,
kFloat64,
kSimd128,
@ -36,6 +37,22 @@ enum class MachineRepresentation : uint8_t {
bool IsSubtype(MachineRepresentation rep1, MachineRepresentation rep2);
#define ASSERT_CONSECUTIVE(rep1, rep2) \
static_assert(static_cast<uint8_t>(MachineRepresentation::k##rep1) + 1 == \
static_cast<uint8_t>(MachineRepresentation::k##rep2), \
#rep1 " and " #rep2 " must be consecutive.");
ASSERT_CONSECUTIVE(Word8, Word16)
ASSERT_CONSECUTIVE(Word16, Word32)
ASSERT_CONSECUTIVE(Word32, Word64)
ASSERT_CONSECUTIVE(Float32, Float64)
ASSERT_CONSECUTIVE(Float64, Simd128)
#undef ASSERT_CONSECUTIVE
static_assert(MachineRepresentation::kLastRepresentation ==
MachineRepresentation::kSimd128,
"FP and SIMD representations must be last.");
static_assert(static_cast<int>(MachineRepresentation::kLastRepresentation) <
kIntSize * kBitsPerByte,
"Bit masks of MachineRepresentation should fit in an int");
@ -255,6 +272,11 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
std::ostream& operator<<(std::ostream& os, MachineSemantic type);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, MachineType type);
inline bool IsIntegral(MachineRepresentation rep) {
return rep >= MachineRepresentation::kWord8 &&
rep <= MachineRepresentation::kWord64;
}
inline bool IsFloatingPoint(MachineRepresentation rep) {
return rep >= MachineRepresentation::kFirstFPRepresentation;
}

View File

@ -54,42 +54,59 @@ Reduction CsaLoadElimination::Reduce(Node* node) {
case IrOpcode::kEffectPhi:
return ReduceEffectPhi(node);
case IrOpcode::kDead:
break;
return NoChange();
case IrOpcode::kStart:
return ReduceStart(node);
default:
return ReduceOtherNode(node);
}
return NoChange();
UNREACHABLE();
}
namespace CsaLoadEliminationHelpers {
bool IsCompatible(MachineRepresentation r1, MachineRepresentation r2) {
// TODO(manoskouk): Temporary patch-up to get wasm i8 and i16 working until we
// properly fix the compatibility logic.
if (ElementSizeInBytes(r1) <
ElementSizeInBytes(MachineRepresentation::kWord32)) {
bool Subsumes(MachineRepresentation from, MachineRepresentation to) {
if (from == to) return true;
if (IsAnyTagged(from)) return IsAnyTagged(to);
if (IsIntegral(from)) {
return IsIntegral(to) && ElementSizeInBytes(from) >= ElementSizeInBytes(to);
}
return false;
}
inline bool IsFreshObject(Node* node) {
if (node->opcode() == IrOpcode::kAllocate) return true;
if (node->opcode() == IrOpcode::kCall) {
NumberMatcher matcher(node->InputAt(0));
if (matcher.HasResolvedValue()) {
Builtins::Name callee =
static_cast<Builtins::Name>(matcher.ResolvedValue());
return callee == Builtins::kWasmAllocateArrayWithRtt ||
callee == Builtins::kWasmAllocatePair ||
callee == Builtins::kWasmAllocateFixedArray ||
callee == Builtins::kWasmAllocateJSArray ||
callee == Builtins::kWasmAllocateObjectWrapper ||
callee == Builtins::kWasmAllocateStructWithRtt;
}
}
return false;
}
inline bool IsPointerConstant(Node* node) {
switch (node->opcode()) {
case IrOpcode::kHeapConstant:
case IrOpcode::kParameter:
return true;
default:
return false;
}
if (r1 == r2) return true;
return IsAnyTagged(r1) && IsAnyTagged(r2);
}
bool ObjectMayAlias(Node* a, Node* b) {
if (a != b) {
if (b->opcode() == IrOpcode::kAllocate) {
std::swap(a, b);
}
if (a->opcode() == IrOpcode::kAllocate) {
switch (b->opcode()) {
case IrOpcode::kAllocate:
case IrOpcode::kHeapConstant:
case IrOpcode::kParameter:
if (IsFreshObject(b)) std::swap(a, b);
if (IsFreshObject(a) && (IsFreshObject(b) || IsPointerConstant(b))) {
return false;
default:
break;
}
}
}
return true;
@ -187,9 +204,11 @@ Reduction CsaLoadElimination::ReduceLoadFromObject(Node* node,
if (!lookup_result.IsEmpty()) {
// Make sure we don't reuse values that were recorded with a different
// representation or resurrect dead {replacement} nodes.
Node* replacement = lookup_result.value;
if (Helpers::IsCompatible(representation, lookup_result.representation) &&
!replacement->IsDead()) {
MachineRepresentation from = lookup_result.representation;
if (Helpers::Subsumes(from, representation) &&
!lookup_result.value->IsDead()) {
Node* replacement =
TruncateAndExtend(lookup_result.value, from, access.machine_type);
ReplaceWithValue(node, replacement, effect);
return Replace(replacement);
}
@ -261,24 +280,20 @@ Reduction CsaLoadElimination::ReduceCall(Node* node) {
}
Reduction CsaLoadElimination::ReduceOtherNode(Node* node) {
if (node->op()->EffectInputCount() == 1) {
if (node->op()->EffectOutputCount() == 1) {
if (node->op()->EffectInputCount() == 1 &&
node->op()->EffectOutputCount() == 1) {
Node* const effect = NodeProperties::GetEffectInput(node);
AbstractState const* state = node_states_.Get(effect);
// If we do not know anything about the predecessor, do not propagate
// just yet because we will have to recompute anyway once we compute
// the predecessor.
// If we do not know anything about the predecessor, do not propagate just
// yet because we will have to recompute anyway once we compute the
// predecessor.
if (state == nullptr) return NoChange();
// Check if this {node} has some uncontrolled side effects.
if (!node->op()->HasProperty(Operator::kNoWrite)) {
state = empty_state();
// If this {node} has some uncontrolled side effects, set its state to
// {empty_state()}, otherwise to its input state.
return UpdateState(node, node->op()->HasProperty(Operator::kNoWrite)
? state
: empty_state());
}
return UpdateState(node, state);
} else {
return NoChange();
}
}
DCHECK_EQ(0, node->op()->EffectInputCount());
DCHECK_EQ(0, node->op()->EffectOutputCount());
return NoChange();
}
@ -329,10 +344,58 @@ CsaLoadElimination::AbstractState const* CsaLoadElimination::ComputeLoopState(
return state;
}
Node* CsaLoadElimination::TruncateAndExtend(Node* node,
MachineRepresentation from,
MachineType to) {
DCHECK(Helpers::Subsumes(from, to.representation()));
DCHECK_GE(ElementSizeInBytes(from), ElementSizeInBytes(to.representation()));
if (to == MachineType::Int8() || to == MachineType::Int16()) {
// 1st case: We want to eliminate a signed 8/16-bit load using the value
// from a previous subsuming load or store. Since that value might be
// outside 8/16-bit range, we first truncate it accordingly. Then we
// sign-extend the result to 32-bit.
DCHECK_EQ(to.semantic(), MachineSemantic::kInt32);
if (from == MachineRepresentation::kWord64) {
node = graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
}
int shift = 32 - 8 * ElementSizeInBytes(to.representation());
return graph()->NewNode(machine()->Word32Sar(),
graph()->NewNode(machine()->Word32Shl(), node,
jsgraph()->Int32Constant(shift)),
jsgraph()->Int32Constant(shift));
} else if (to == MachineType::Uint8() || to == MachineType::Uint16()) {
// 2nd case: We want to eliminate an unsigned 8/16-bit load using the value
// from a previous subsuming load or store. Since that value might be
// outside 8/16-bit range, we first truncate it accordingly.
if (from == MachineRepresentation::kWord64) {
node = graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
}
int mask = (1 << 8 * ElementSizeInBytes(to.representation())) - 1;
return graph()->NewNode(machine()->Word32And(), node,
jsgraph()->Int32Constant(mask));
} else if (from == MachineRepresentation::kWord64 &&
to.representation() == MachineRepresentation::kWord32) {
// 3rd case: Truncate 64-bits into 32-bits.
return graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
} else {
// 4th case: No need for truncation.
DCHECK((from == to.representation() &&
(from == MachineRepresentation::kWord32 ||
from == MachineRepresentation::kWord64 || !IsIntegral(from))) ||
(IsAnyTagged(from) && IsAnyTagged(to.representation())));
return node;
}
}
CommonOperatorBuilder* CsaLoadElimination::common() const {
return jsgraph()->common();
}
MachineOperatorBuilder* CsaLoadElimination::machine() const {
return jsgraph()->machine();
}
Graph* CsaLoadElimination::graph() const { return jsgraph()->graph(); }
Isolate* CsaLoadElimination::isolate() const { return jsgraph()->isolate(); }

View File

@ -97,8 +97,11 @@ class V8_EXPORT_PRIVATE CsaLoadElimination final
AbstractState const* ComputeLoopState(Node* node,
AbstractState const* state) const;
Node* TruncateAndExtend(Node* node, MachineRepresentation from,
MachineType to);
CommonOperatorBuilder* common() const;
MachineOperatorBuilder* machine() const;
Isolate* isolate() const;
Graph* graph() const;
JSGraph* jsgraph() const { return jsgraph_; }

View File

@ -248,6 +248,7 @@ v8_source_set("unittests_sources") {
"compiler/constant-folding-reducer-unittest.cc",
"compiler/control-equivalence-unittest.cc",
"compiler/control-flow-optimizer-unittest.cc",
"compiler/csa-load-elimination-unittest.cc",
"compiler/dead-code-elimination-unittest.cc",
"compiler/decompression-optimizer-unittest.cc",
"compiler/diamond-unittest.cc",

View File

@ -0,0 +1,155 @@
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/compiler/csa-load-elimination.h"
#include "src/compiler/graph-reducer.h"
#include "src/compiler/js-graph.h"
#include "src/compiler/machine-operator-reducer.h"
#include "src/compiler/node-matchers.h"
#include "src/compiler/node.h"
#include "src/compiler/simplified-operator.h"
#include "test/unittests/compiler/graph-reducer-unittest.h"
#include "test/unittests/compiler/graph-unittest.h"
#include "test/unittests/compiler/node-test-utils.h"
#include "testing/gmock-support.h"
using testing::_;
using testing::StrictMock;
namespace v8 {
namespace internal {
namespace compiler {
class CsaLoadEliminationTest : public GraphTest {
public:
CsaLoadEliminationTest()
: GraphTest(3),
simplified_(zone()),
machine_(zone()),
jsgraph_(isolate(), graph(), common(), nullptr, simplified(),
machine()),
reducer_(zone(), graph(), tick_counter(), broker()),
csa_(reducer(), jsgraph(), zone()),
mcr_(reducer(), jsgraph()) {
reducer()->AddReducer(&csa_);
reducer()->AddReducer(&mcr_);
}
~CsaLoadEliminationTest() override = default;
protected:
JSGraph* jsgraph() { return &jsgraph_; }
SimplifiedOperatorBuilder* simplified() { return &simplified_; }
MachineOperatorBuilder* machine() { return &machine_; }
GraphReducer* reducer() { return &reducer_; }
Node* param1() {
return graph()->NewNode(common()->Parameter(1), graph()->start());
}
Node* constant(int32_t value) {
return graph()->NewNode(common()->Int32Constant(value));
}
private:
SimplifiedOperatorBuilder simplified_;
MachineOperatorBuilder machine_;
JSGraph jsgraph_;
GraphReducer reducer_;
CsaLoadElimination csa_;
MachineOperatorReducer mcr_;
};
#define SETUP_SIMPLE_TEST(store_type, load_type, value_) \
Node* object = graph()->NewNode(common()->Parameter(0), graph()->start()); \
Node* offset = graph()->NewNode(common()->Int32Constant(5)); \
Node* value = value_; \
Node* control = graph()->start(); \
\
ObjectAccess store_access(MachineType::store_type(), kNoWriteBarrier); \
ObjectAccess load_access(MachineType::load_type(), kNoWriteBarrier); \
\
Node* store = \
graph()->NewNode(simplified()->StoreToObject(store_access), object, \
offset, value, graph()->start(), control); \
\
Node* load = graph()->NewNode(simplified()->LoadFromObject(load_access), \
object, offset, store, control); \
\
Node* ret = graph()->NewNode(common()->Return(0), load, load, control); \
\
graph()->end()->InsertInput(zone(), 0, ret); \
\
reducer()->ReduceGraph();
TEST_F(CsaLoadEliminationTest, Int32) {
SETUP_SIMPLE_TEST(Int32, Int32, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kParameter);
}
TEST_F(CsaLoadEliminationTest, Int64) {
SETUP_SIMPLE_TEST(Int64, Int64, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kParameter);
}
TEST_F(CsaLoadEliminationTest, Int64_to_Int32) {
SETUP_SIMPLE_TEST(Int64, Int32, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kTruncateInt64ToInt32);
}
TEST_F(CsaLoadEliminationTest, Int16_to_Int16) {
SETUP_SIMPLE_TEST(Int16, Int16, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kWord32Sar);
}
TEST_F(CsaLoadEliminationTest, Int16_to_Uint8) {
SETUP_SIMPLE_TEST(Int16, Uint8, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kWord32And);
}
TEST_F(CsaLoadEliminationTest, Int8_to_Uint16) {
SETUP_SIMPLE_TEST(Int8, Uint16, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
}
TEST_F(CsaLoadEliminationTest, Int8_to_Uint64) {
SETUP_SIMPLE_TEST(Int8, Uint64, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
}
TEST_F(CsaLoadEliminationTest, Int32_to_Int64) {
SETUP_SIMPLE_TEST(Int32, Int64, param1())
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
}
TEST_F(CsaLoadEliminationTest, Int16_constant) {
SETUP_SIMPLE_TEST(Int32, Int16, constant(0xfedcba98))
Int32Matcher m(ret->InputAt(0));
EXPECT_TRUE(m.HasResolvedValue());
EXPECT_EQ(m.ResolvedValue(), int32_t(0xffffba98));
}
TEST_F(CsaLoadEliminationTest, Uint8_constant) {
SETUP_SIMPLE_TEST(Int32, Uint8, constant(0xfedcba98))
Uint32Matcher m(ret->InputAt(0));
EXPECT_TRUE(m.HasResolvedValue());
EXPECT_EQ(m.ResolvedValue(), uint32_t(0x98));
}
#undef SETUP_SIMPLE_TEST
} // namespace compiler
} // namespace internal
} // namespace v8