[turbofan] Fix CsaLoadElimination for small types
CsaLoadElimination failed to account for truncation when optimizing loads. This CL extends the notion of compatible Loads and Stores to include ({store}, {load}) pairs which both have integral representation and {store}'s representation is no smaller than {load}'s. In case the representations are not identical, it truncates and possibly sign-extends {store} before forwarding it to {load}. Additional change: Extend ObjectMayAlias with wasm allocating builtin calls. Bug: v8:11504 Change-Id: I43f89a13793b54477a33be18aaf346462aefa8e5 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2739975 Reviewed-by: Georg Neis <neis@chromium.org> Commit-Queue: Manos Koukoutos <manoskouk@chromium.org> Cr-Commit-Position: refs/heads/master@{#73340}
This commit is contained in:
parent
b01869cabd
commit
f7bb9267eb
@ -17,6 +17,7 @@ namespace internal {
|
||||
enum class MachineRepresentation : uint8_t {
|
||||
kNone,
|
||||
kBit,
|
||||
// Integral representations must be consecutive, in order of increasing order.
|
||||
kWord8,
|
||||
kWord16,
|
||||
kWord32,
|
||||
@ -26,7 +27,7 @@ enum class MachineRepresentation : uint8_t {
|
||||
kTagged, // (uncompressed) Object (Smi or HeapObject)
|
||||
kCompressedPointer, // (compressed) HeapObject
|
||||
kCompressed, // (compressed) Object (Smi or HeapObject)
|
||||
// FP representations must be last, and in order of increasing size.
|
||||
// FP and SIMD representations must be last, and in order of increasing size.
|
||||
kFloat32,
|
||||
kFloat64,
|
||||
kSimd128,
|
||||
@ -36,6 +37,22 @@ enum class MachineRepresentation : uint8_t {
|
||||
|
||||
bool IsSubtype(MachineRepresentation rep1, MachineRepresentation rep2);
|
||||
|
||||
#define ASSERT_CONSECUTIVE(rep1, rep2) \
|
||||
static_assert(static_cast<uint8_t>(MachineRepresentation::k##rep1) + 1 == \
|
||||
static_cast<uint8_t>(MachineRepresentation::k##rep2), \
|
||||
#rep1 " and " #rep2 " must be consecutive.");
|
||||
|
||||
ASSERT_CONSECUTIVE(Word8, Word16)
|
||||
ASSERT_CONSECUTIVE(Word16, Word32)
|
||||
ASSERT_CONSECUTIVE(Word32, Word64)
|
||||
ASSERT_CONSECUTIVE(Float32, Float64)
|
||||
ASSERT_CONSECUTIVE(Float64, Simd128)
|
||||
#undef ASSERT_CONSECUTIVE
|
||||
|
||||
static_assert(MachineRepresentation::kLastRepresentation ==
|
||||
MachineRepresentation::kSimd128,
|
||||
"FP and SIMD representations must be last.");
|
||||
|
||||
static_assert(static_cast<int>(MachineRepresentation::kLastRepresentation) <
|
||||
kIntSize * kBitsPerByte,
|
||||
"Bit masks of MachineRepresentation should fit in an int");
|
||||
@ -255,6 +272,11 @@ V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
|
||||
std::ostream& operator<<(std::ostream& os, MachineSemantic type);
|
||||
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os, MachineType type);
|
||||
|
||||
inline bool IsIntegral(MachineRepresentation rep) {
|
||||
return rep >= MachineRepresentation::kWord8 &&
|
||||
rep <= MachineRepresentation::kWord64;
|
||||
}
|
||||
|
||||
inline bool IsFloatingPoint(MachineRepresentation rep) {
|
||||
return rep >= MachineRepresentation::kFirstFPRepresentation;
|
||||
}
|
||||
|
@ -54,42 +54,59 @@ Reduction CsaLoadElimination::Reduce(Node* node) {
|
||||
case IrOpcode::kEffectPhi:
|
||||
return ReduceEffectPhi(node);
|
||||
case IrOpcode::kDead:
|
||||
break;
|
||||
return NoChange();
|
||||
case IrOpcode::kStart:
|
||||
return ReduceStart(node);
|
||||
default:
|
||||
return ReduceOtherNode(node);
|
||||
}
|
||||
return NoChange();
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
namespace CsaLoadEliminationHelpers {
|
||||
|
||||
bool IsCompatible(MachineRepresentation r1, MachineRepresentation r2) {
|
||||
// TODO(manoskouk): Temporary patch-up to get wasm i8 and i16 working until we
|
||||
// properly fix the compatibility logic.
|
||||
if (ElementSizeInBytes(r1) <
|
||||
ElementSizeInBytes(MachineRepresentation::kWord32)) {
|
||||
bool Subsumes(MachineRepresentation from, MachineRepresentation to) {
|
||||
if (from == to) return true;
|
||||
if (IsAnyTagged(from)) return IsAnyTagged(to);
|
||||
if (IsIntegral(from)) {
|
||||
return IsIntegral(to) && ElementSizeInBytes(from) >= ElementSizeInBytes(to);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool IsFreshObject(Node* node) {
|
||||
if (node->opcode() == IrOpcode::kAllocate) return true;
|
||||
if (node->opcode() == IrOpcode::kCall) {
|
||||
NumberMatcher matcher(node->InputAt(0));
|
||||
if (matcher.HasResolvedValue()) {
|
||||
Builtins::Name callee =
|
||||
static_cast<Builtins::Name>(matcher.ResolvedValue());
|
||||
return callee == Builtins::kWasmAllocateArrayWithRtt ||
|
||||
callee == Builtins::kWasmAllocatePair ||
|
||||
callee == Builtins::kWasmAllocateFixedArray ||
|
||||
callee == Builtins::kWasmAllocateJSArray ||
|
||||
callee == Builtins::kWasmAllocateObjectWrapper ||
|
||||
callee == Builtins::kWasmAllocateStructWithRtt;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool IsPointerConstant(Node* node) {
|
||||
switch (node->opcode()) {
|
||||
case IrOpcode::kHeapConstant:
|
||||
case IrOpcode::kParameter:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
if (r1 == r2) return true;
|
||||
return IsAnyTagged(r1) && IsAnyTagged(r2);
|
||||
}
|
||||
|
||||
bool ObjectMayAlias(Node* a, Node* b) {
|
||||
if (a != b) {
|
||||
if (b->opcode() == IrOpcode::kAllocate) {
|
||||
std::swap(a, b);
|
||||
}
|
||||
if (a->opcode() == IrOpcode::kAllocate) {
|
||||
switch (b->opcode()) {
|
||||
case IrOpcode::kAllocate:
|
||||
case IrOpcode::kHeapConstant:
|
||||
case IrOpcode::kParameter:
|
||||
if (IsFreshObject(b)) std::swap(a, b);
|
||||
if (IsFreshObject(a) && (IsFreshObject(b) || IsPointerConstant(b))) {
|
||||
return false;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
@ -187,9 +204,11 @@ Reduction CsaLoadElimination::ReduceLoadFromObject(Node* node,
|
||||
if (!lookup_result.IsEmpty()) {
|
||||
// Make sure we don't reuse values that were recorded with a different
|
||||
// representation or resurrect dead {replacement} nodes.
|
||||
Node* replacement = lookup_result.value;
|
||||
if (Helpers::IsCompatible(representation, lookup_result.representation) &&
|
||||
!replacement->IsDead()) {
|
||||
MachineRepresentation from = lookup_result.representation;
|
||||
if (Helpers::Subsumes(from, representation) &&
|
||||
!lookup_result.value->IsDead()) {
|
||||
Node* replacement =
|
||||
TruncateAndExtend(lookup_result.value, from, access.machine_type);
|
||||
ReplaceWithValue(node, replacement, effect);
|
||||
return Replace(replacement);
|
||||
}
|
||||
@ -261,24 +280,20 @@ Reduction CsaLoadElimination::ReduceCall(Node* node) {
|
||||
}
|
||||
|
||||
Reduction CsaLoadElimination::ReduceOtherNode(Node* node) {
|
||||
if (node->op()->EffectInputCount() == 1) {
|
||||
if (node->op()->EffectOutputCount() == 1) {
|
||||
if (node->op()->EffectInputCount() == 1 &&
|
||||
node->op()->EffectOutputCount() == 1) {
|
||||
Node* const effect = NodeProperties::GetEffectInput(node);
|
||||
AbstractState const* state = node_states_.Get(effect);
|
||||
// If we do not know anything about the predecessor, do not propagate
|
||||
// just yet because we will have to recompute anyway once we compute
|
||||
// the predecessor.
|
||||
// If we do not know anything about the predecessor, do not propagate just
|
||||
// yet because we will have to recompute anyway once we compute the
|
||||
// predecessor.
|
||||
if (state == nullptr) return NoChange();
|
||||
// Check if this {node} has some uncontrolled side effects.
|
||||
if (!node->op()->HasProperty(Operator::kNoWrite)) {
|
||||
state = empty_state();
|
||||
// If this {node} has some uncontrolled side effects, set its state to
|
||||
// {empty_state()}, otherwise to its input state.
|
||||
return UpdateState(node, node->op()->HasProperty(Operator::kNoWrite)
|
||||
? state
|
||||
: empty_state());
|
||||
}
|
||||
return UpdateState(node, state);
|
||||
} else {
|
||||
return NoChange();
|
||||
}
|
||||
}
|
||||
DCHECK_EQ(0, node->op()->EffectInputCount());
|
||||
DCHECK_EQ(0, node->op()->EffectOutputCount());
|
||||
return NoChange();
|
||||
}
|
||||
@ -329,10 +344,58 @@ CsaLoadElimination::AbstractState const* CsaLoadElimination::ComputeLoopState(
|
||||
return state;
|
||||
}
|
||||
|
||||
Node* CsaLoadElimination::TruncateAndExtend(Node* node,
|
||||
MachineRepresentation from,
|
||||
MachineType to) {
|
||||
DCHECK(Helpers::Subsumes(from, to.representation()));
|
||||
DCHECK_GE(ElementSizeInBytes(from), ElementSizeInBytes(to.representation()));
|
||||
|
||||
if (to == MachineType::Int8() || to == MachineType::Int16()) {
|
||||
// 1st case: We want to eliminate a signed 8/16-bit load using the value
|
||||
// from a previous subsuming load or store. Since that value might be
|
||||
// outside 8/16-bit range, we first truncate it accordingly. Then we
|
||||
// sign-extend the result to 32-bit.
|
||||
DCHECK_EQ(to.semantic(), MachineSemantic::kInt32);
|
||||
if (from == MachineRepresentation::kWord64) {
|
||||
node = graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
|
||||
}
|
||||
int shift = 32 - 8 * ElementSizeInBytes(to.representation());
|
||||
return graph()->NewNode(machine()->Word32Sar(),
|
||||
graph()->NewNode(machine()->Word32Shl(), node,
|
||||
jsgraph()->Int32Constant(shift)),
|
||||
jsgraph()->Int32Constant(shift));
|
||||
} else if (to == MachineType::Uint8() || to == MachineType::Uint16()) {
|
||||
// 2nd case: We want to eliminate an unsigned 8/16-bit load using the value
|
||||
// from a previous subsuming load or store. Since that value might be
|
||||
// outside 8/16-bit range, we first truncate it accordingly.
|
||||
if (from == MachineRepresentation::kWord64) {
|
||||
node = graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
|
||||
}
|
||||
int mask = (1 << 8 * ElementSizeInBytes(to.representation())) - 1;
|
||||
return graph()->NewNode(machine()->Word32And(), node,
|
||||
jsgraph()->Int32Constant(mask));
|
||||
} else if (from == MachineRepresentation::kWord64 &&
|
||||
to.representation() == MachineRepresentation::kWord32) {
|
||||
// 3rd case: Truncate 64-bits into 32-bits.
|
||||
return graph()->NewNode(machine()->TruncateInt64ToInt32(), node);
|
||||
} else {
|
||||
// 4th case: No need for truncation.
|
||||
DCHECK((from == to.representation() &&
|
||||
(from == MachineRepresentation::kWord32 ||
|
||||
from == MachineRepresentation::kWord64 || !IsIntegral(from))) ||
|
||||
(IsAnyTagged(from) && IsAnyTagged(to.representation())));
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
CommonOperatorBuilder* CsaLoadElimination::common() const {
|
||||
return jsgraph()->common();
|
||||
}
|
||||
|
||||
MachineOperatorBuilder* CsaLoadElimination::machine() const {
|
||||
return jsgraph()->machine();
|
||||
}
|
||||
|
||||
Graph* CsaLoadElimination::graph() const { return jsgraph()->graph(); }
|
||||
|
||||
Isolate* CsaLoadElimination::isolate() const { return jsgraph()->isolate(); }
|
||||
|
@ -97,8 +97,11 @@ class V8_EXPORT_PRIVATE CsaLoadElimination final
|
||||
|
||||
AbstractState const* ComputeLoopState(Node* node,
|
||||
AbstractState const* state) const;
|
||||
Node* TruncateAndExtend(Node* node, MachineRepresentation from,
|
||||
MachineType to);
|
||||
|
||||
CommonOperatorBuilder* common() const;
|
||||
MachineOperatorBuilder* machine() const;
|
||||
Isolate* isolate() const;
|
||||
Graph* graph() const;
|
||||
JSGraph* jsgraph() const { return jsgraph_; }
|
||||
|
@ -248,6 +248,7 @@ v8_source_set("unittests_sources") {
|
||||
"compiler/constant-folding-reducer-unittest.cc",
|
||||
"compiler/control-equivalence-unittest.cc",
|
||||
"compiler/control-flow-optimizer-unittest.cc",
|
||||
"compiler/csa-load-elimination-unittest.cc",
|
||||
"compiler/dead-code-elimination-unittest.cc",
|
||||
"compiler/decompression-optimizer-unittest.cc",
|
||||
"compiler/diamond-unittest.cc",
|
||||
|
155
test/unittests/compiler/csa-load-elimination-unittest.cc
Normal file
155
test/unittests/compiler/csa-load-elimination-unittest.cc
Normal file
@ -0,0 +1,155 @@
|
||||
// Copyright 2021 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/csa-load-elimination.h"
|
||||
|
||||
#include "src/compiler/graph-reducer.h"
|
||||
#include "src/compiler/js-graph.h"
|
||||
#include "src/compiler/machine-operator-reducer.h"
|
||||
#include "src/compiler/node-matchers.h"
|
||||
#include "src/compiler/node.h"
|
||||
#include "src/compiler/simplified-operator.h"
|
||||
#include "test/unittests/compiler/graph-reducer-unittest.h"
|
||||
#include "test/unittests/compiler/graph-unittest.h"
|
||||
#include "test/unittests/compiler/node-test-utils.h"
|
||||
#include "testing/gmock-support.h"
|
||||
|
||||
using testing::_;
|
||||
using testing::StrictMock;
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
class CsaLoadEliminationTest : public GraphTest {
|
||||
public:
|
||||
CsaLoadEliminationTest()
|
||||
: GraphTest(3),
|
||||
simplified_(zone()),
|
||||
machine_(zone()),
|
||||
jsgraph_(isolate(), graph(), common(), nullptr, simplified(),
|
||||
machine()),
|
||||
reducer_(zone(), graph(), tick_counter(), broker()),
|
||||
csa_(reducer(), jsgraph(), zone()),
|
||||
mcr_(reducer(), jsgraph()) {
|
||||
reducer()->AddReducer(&csa_);
|
||||
reducer()->AddReducer(&mcr_);
|
||||
}
|
||||
|
||||
~CsaLoadEliminationTest() override = default;
|
||||
|
||||
protected:
|
||||
JSGraph* jsgraph() { return &jsgraph_; }
|
||||
SimplifiedOperatorBuilder* simplified() { return &simplified_; }
|
||||
MachineOperatorBuilder* machine() { return &machine_; }
|
||||
GraphReducer* reducer() { return &reducer_; }
|
||||
Node* param1() {
|
||||
return graph()->NewNode(common()->Parameter(1), graph()->start());
|
||||
}
|
||||
Node* constant(int32_t value) {
|
||||
return graph()->NewNode(common()->Int32Constant(value));
|
||||
}
|
||||
|
||||
private:
|
||||
SimplifiedOperatorBuilder simplified_;
|
||||
MachineOperatorBuilder machine_;
|
||||
JSGraph jsgraph_;
|
||||
GraphReducer reducer_;
|
||||
CsaLoadElimination csa_;
|
||||
MachineOperatorReducer mcr_;
|
||||
};
|
||||
|
||||
#define SETUP_SIMPLE_TEST(store_type, load_type, value_) \
|
||||
Node* object = graph()->NewNode(common()->Parameter(0), graph()->start()); \
|
||||
Node* offset = graph()->NewNode(common()->Int32Constant(5)); \
|
||||
Node* value = value_; \
|
||||
Node* control = graph()->start(); \
|
||||
\
|
||||
ObjectAccess store_access(MachineType::store_type(), kNoWriteBarrier); \
|
||||
ObjectAccess load_access(MachineType::load_type(), kNoWriteBarrier); \
|
||||
\
|
||||
Node* store = \
|
||||
graph()->NewNode(simplified()->StoreToObject(store_access), object, \
|
||||
offset, value, graph()->start(), control); \
|
||||
\
|
||||
Node* load = graph()->NewNode(simplified()->LoadFromObject(load_access), \
|
||||
object, offset, store, control); \
|
||||
\
|
||||
Node* ret = graph()->NewNode(common()->Return(0), load, load, control); \
|
||||
\
|
||||
graph()->end()->InsertInput(zone(), 0, ret); \
|
||||
\
|
||||
reducer()->ReduceGraph();
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int32) {
|
||||
SETUP_SIMPLE_TEST(Int32, Int32, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kParameter);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int64) {
|
||||
SETUP_SIMPLE_TEST(Int64, Int64, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kParameter);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int64_to_Int32) {
|
||||
SETUP_SIMPLE_TEST(Int64, Int32, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kTruncateInt64ToInt32);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int16_to_Int16) {
|
||||
SETUP_SIMPLE_TEST(Int16, Int16, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kWord32Sar);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int16_to_Uint8) {
|
||||
SETUP_SIMPLE_TEST(Int16, Uint8, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kWord32And);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int8_to_Uint16) {
|
||||
SETUP_SIMPLE_TEST(Int8, Uint16, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int8_to_Uint64) {
|
||||
SETUP_SIMPLE_TEST(Int8, Uint64, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int32_to_Int64) {
|
||||
SETUP_SIMPLE_TEST(Int32, Int64, param1())
|
||||
|
||||
EXPECT_EQ(ret->InputAt(0)->opcode(), IrOpcode::kLoadFromObject);
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Int16_constant) {
|
||||
SETUP_SIMPLE_TEST(Int32, Int16, constant(0xfedcba98))
|
||||
|
||||
Int32Matcher m(ret->InputAt(0));
|
||||
|
||||
EXPECT_TRUE(m.HasResolvedValue());
|
||||
EXPECT_EQ(m.ResolvedValue(), int32_t(0xffffba98));
|
||||
}
|
||||
|
||||
TEST_F(CsaLoadEliminationTest, Uint8_constant) {
|
||||
SETUP_SIMPLE_TEST(Int32, Uint8, constant(0xfedcba98))
|
||||
|
||||
Uint32Matcher m(ret->InputAt(0));
|
||||
|
||||
EXPECT_TRUE(m.HasResolvedValue());
|
||||
EXPECT_EQ(m.ResolvedValue(), uint32_t(0x98));
|
||||
}
|
||||
|
||||
#undef SETUP_SIMPLE_TEST
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
Loading…
Reference in New Issue
Block a user