v8/src/compiler/effect-control-linearizer.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

244 lines
10 KiB
C
Raw Normal View History

// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_EFFECT_CONTROL_LINEARIZER_H_
#define V8_COMPILER_EFFECT_CONTROL_LINEARIZER_H_
#include "src/compiler/common-operator.h"
#include "src/compiler/graph-assembler.h"
#include "src/compiler/node.h"
#include "src/compiler/simplified-operator.h"
#include "src/globals.h"
namespace v8 {
namespace internal {
// Forward declarations.
class Callable;
class Zone;
namespace compiler {
class CommonOperatorBuilder;
class SimplifiedOperatorBuilder;
class MachineOperatorBuilder;
class JSGraph;
class Graph;
class Schedule;
class SourcePositionTable;
class NodeOriginTable;
class V8_EXPORT_PRIVATE EffectControlLinearizer {
public:
enum MaskArrayIndexEnable { kDoNotMaskArrayIndex, kMaskArrayIndex };
EffectControlLinearizer(JSGraph* graph, Schedule* schedule, Zone* temp_zone,
SourcePositionTable* source_positions,
NodeOriginTable* node_origins,
MaskArrayIndexEnable mask_array_index);
void Run();
private:
void ProcessNode(Node* node, Node** frame_state, Node** effect,
Node** control);
bool TryWireInStateEffect(Node* node, Node* frame_state, Node** effect,
Node** control);
Node* LowerChangeBitToTagged(Node* node);
Node* LowerChangeInt31ToTaggedSigned(Node* node);
Node* LowerChangeInt32ToTagged(Node* node);
Node* LowerChangeInt64ToTagged(Node* node);
Node* LowerChangeUint32ToTagged(Node* node);
Node* LowerChangeUint64ToTagged(Node* node);
Node* LowerChangeFloat64ToTagged(Node* node);
Node* LowerChangeFloat64ToTaggedPointer(Node* node);
Node* LowerChangeTaggedSignedToInt32(Node* node);
Node* LowerChangeTaggedSignedToInt64(Node* node);
Node* LowerChangeTaggedToBit(Node* node);
Node* LowerChangeTaggedToInt32(Node* node);
Node* LowerChangeTaggedToUint32(Node* node);
Node* LowerChangeTaggedToInt64(Node* node);
Node* LowerChangeTaggedToTaggedSigned(Node* node);
Node* LowerCheckBounds(Node* node, Node* frame_state);
Node* LowerPoisonIndex(Node* node);
Node* LowerCheckInternalizedString(Node* node, Node* frame_state);
void LowerCheckMaps(Node* node, Node* frame_state);
Node* LowerCompareMaps(Node* node);
Node* LowerCheckNumber(Node* node, Node* frame_state);
Node* LowerCheckReceiver(Node* node, Node* frame_state);
[turbofan] ReceiverOrNullOrUndefined feedback for JSEqual. This changes the ReceiverOrOddball feedback on JSStrictEqual to ReceiverOrNullOrUndefined feedback, which can also safely be consumed by JSEqual (we cannot generally accept any oddball here since booleans trigger implicit conversions, unfortunately). Thus we replace the previously introduced CheckReceiverOrOddball with CheckReceiverOrNullOrUndefined, and drop CheckOddball, since we will no longer collect Oddball feedback separately. TurboFan will then turn a JSEqual[ReceiverOrNullOrUndefined] into a sequence like this: ``` left = CheckReceiverOrNullOrUndefined(left); right = CheckReceiverOrNullOrUndefined(right); result = if ObjectIsUndetectable(left) then ObjectIsUndetectable(right) else ReferenceEqual(left, right); ``` This significantly improves the peak performance of abstract equality with Receiver, Null or Undefined inputs. On the test case outlined in http://crbug.com/v8/8356 we go from naive: 2946 ms. tenary: 2134 ms. to naive: 2230 ms. tenary: 2250 ms. which corresponds to a 25% improvement on the abstract equality case. For regular code this will probably yield more performance, since we get rid of the JSEqual operator, which might have arbitrary side effects and thus blocks all kinds of TurboFan optimizations. The JSStrictEqual case is slightly slower now, since it has to rule out booleans as well (even though that's not strictly necessary, but consistency is key here). This way developers can safely use `a == b` instead of doing a dance like `a == null ? b == null : a === b` (which is what dart2js does right now) when both `a` and `b` are known to be Receiver, Null or Undefined. The abstract equality is not only faster to parse than the tenary, but also generates a shorter bytecode sequence. In the test case referenced in http://crbug.com/v8/8356 the bytecode for `naive` is ``` StackCheck Ldar a1 TestEqual a0, [0] JumpIfFalse [5] LdaSmi [1] Return LdaSmi [2] Return ``` which is 14 bytes, whereas the `tenary` function generates ``` StackCheck Ldar a0 TestUndetectable JumpIfFalse [7] Ldar a1 TestUndetectable Jump [7] Ldar a1 TestEqualStrict a0, [0] JumpIfToBooleanFalse [5] LdaSmi [1] Return LdaSmi [2] Return ``` which is 24 bytes. So the `naive` version is 40% smaller and requires fewer bytecode dispatches. Bug: chromium:898455, v8:8356 Change-Id: If3961b2518b4438700706b3bd6071d546305e233 Reviewed-on: https://chromium-review.googlesource.com/c/1297315 Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> Cr-Commit-Position: refs/heads/master@{#56948}
2018-10-24 12:09:34 +00:00
Node* LowerCheckReceiverOrNullOrUndefined(Node* node, Node* frame_state);
Node* LowerCheckString(Node* node, Node* frame_state);
Node* LowerCheckSymbol(Node* node, Node* frame_state);
void LowerCheckIf(Node* node, Node* frame_state);
Node* LowerCheckedInt32Add(Node* node, Node* frame_state);
Node* LowerCheckedInt32Sub(Node* node, Node* frame_state);
Node* LowerCheckedInt32Div(Node* node, Node* frame_state);
Node* LowerCheckedInt32Mod(Node* node, Node* frame_state);
Node* LowerCheckedUint32Div(Node* node, Node* frame_state);
Node* LowerCheckedUint32Mod(Node* node, Node* frame_state);
Node* LowerCheckedInt32Mul(Node* node, Node* frame_state);
Node* LowerCheckedInt32ToTaggedSigned(Node* node, Node* frame_state);
Node* LowerCheckedInt64ToInt32(Node* node, Node* frame_state);
Node* LowerCheckedInt64ToTaggedSigned(Node* node, Node* frame_state);
Node* LowerCheckedUint32ToInt32(Node* node, Node* frame_state);
Node* LowerCheckedUint32ToTaggedSigned(Node* node, Node* frame_state);
Node* LowerCheckedUint64ToInt32(Node* node, Node* frame_state);
Node* LowerCheckedUint64ToTaggedSigned(Node* node, Node* frame_state);
Node* LowerCheckedFloat64ToInt32(Node* node, Node* frame_state);
Node* LowerCheckedTaggedSignedToInt32(Node* node, Node* frame_state);
Node* LowerCheckedTaggedToInt32(Node* node, Node* frame_state);
Node* LowerCheckedTaggedToFloat64(Node* node, Node* frame_state);
Node* LowerCheckedTaggedToTaggedSigned(Node* node, Node* frame_state);
Node* LowerCheckedTaggedToTaggedPointer(Node* node, Node* frame_state);
Node* LowerChangeTaggedToFloat64(Node* node);
void TruncateTaggedPointerToBit(Node* node, GraphAssemblerLabel<1>* done);
Node* LowerTruncateTaggedToBit(Node* node);
Node* LowerTruncateTaggedPointerToBit(Node* node);
Node* LowerTruncateTaggedToFloat64(Node* node);
Node* LowerTruncateTaggedToWord32(Node* node);
Node* LowerCheckedTruncateTaggedToWord32(Node* node, Node* frame_state);
Node* LowerAllocate(Node* node);
Node* LowerNumberToString(Node* node);
Node* LowerObjectIsArrayBufferView(Node* node);
Node* LowerObjectIsBigInt(Node* node);
Node* LowerObjectIsCallable(Node* node);
Node* LowerObjectIsConstructor(Node* node);
Node* LowerObjectIsDetectableCallable(Node* node);
[es2015] Optimize Object.is baseline and interesting cases. The Object.is builtin provides an entry point to the abstract operation SameValue, which properly distinguishes -0 and 0, and also identifies NaNs. Most of the time you don't need these, but rather just regular strict equality, but when you do, Object.is(o, -0) is the most readable way to check for minus zero. This is for example used in Node.js by formatNumber to properly print -0 for negative zero. However since the builtin thus far implemented as C++ builtin and TurboFan didn't know anything about it, Node.js considering to go with a more performant, less readable version (which also makes assumptions about the input value) in https://github.com/nodejs/node/pull/15726 until the performance of Object.is will be on par (so hopefully we can go back to Object.is in Node 9). This CL ports the baseline implementation of Object.is to CSA, which is pretty straight-forward since SameValue is already available in CodeStubAssembler, and inlines a few interesting cases into TurboFan, i.e. comparing same SSA node, and checking for -0 and NaN explicitly. On the micro-benchmarks we go from testNumberIsMinusZero: 1000 ms. testObjectIsMinusZero: 929 ms. testObjectIsNaN: 954 ms. testObjectIsSame: 793 ms. testStrictEqualSame: 104 ms. to testNumberIsMinusZero: 89 ms. testObjectIsMinusZero: 88 ms. testObjectIsNaN: 88 ms. testObjectIsSame: 86 ms. testStrictEqualSame: 105 ms. which is a nice 10x to 11x improvement and brings Object.is on par with strict equality for most cases. Drive-by-fix: Also refactor and optimize the SameValue check in the CodeStubAssembler to avoid code bloat (by not inlining StrictEqual into every user of SameValue, and also avoiding useless checks). Bug: v8:6882 Change-Id: Ibffd8c36511f219fcce0d89ed4e1073f5d6c6344 Reviewed-on: https://chromium-review.googlesource.com/700254 Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> Cr-Commit-Position: refs/heads/master@{#48275}
2017-10-04 06:25:26 +00:00
Node* LowerObjectIsMinusZero(Node* node);
Node* LowerObjectIsNaN(Node* node);
Node* LowerNumberIsNaN(Node* node);
Node* LowerObjectIsNonCallable(Node* node);
Node* LowerObjectIsNumber(Node* node);
Node* LowerObjectIsReceiver(Node* node);
Node* LowerObjectIsSmi(Node* node);
Node* LowerObjectIsString(Node* node);
Node* LowerObjectIsSymbol(Node* node);
Node* LowerObjectIsUndetectable(Node* node);
Node* LowerNumberIsFloat64Hole(Node* node);
Node* LowerNumberIsFinite(Node* node);
Node* LowerObjectIsFiniteNumber(Node* node);
Node* LowerNumberIsInteger(Node* node);
Node* LowerObjectIsInteger(Node* node);
Node* LowerNumberIsSafeInteger(Node* node);
Node* LowerObjectIsSafeInteger(Node* node);
[turbofan] escape analysis supports arguments object and rest elements The new NewUnmappedArgumentsElements node now takes two inputs: - the frame holding the arguments (current frame or arguments adaptor frame) - the length of the suffix of passed arguments to be copied into the backing store These inputs are computed with two new node types: ArgumentsFrame() ArgumentsLength[formal_parameter_count,is_rest_length](Node* arguments_frame) The node type NewRestParameterElements can now be expressed with NewUnmappedArgumentsElements and an appropriate length and is thus not needed anymore. In escape analysis, we lower loads from the length field of NewUnmappedArgumentsElements with its length input and if we find out that no write access to the arguments elements exists, we replace element loads with direct stack access and replace the NewUnmappedArgumentsElements node with a node of the new node type ArgumentsElementsState. This corresponds to an ObjectState node and gets translated into a deoptimizer instruction to allocate the backing store. Together with the already existing deoptimizer support for the actual arguments object/rest parameters, this allows to remove all allocations for arguments objects/rest parameters in this case. In the deoptimizer, we read the actual parameters from the stack while transforming the static deopt info into TranslatedValue objects. If escape analysis cannot remove the backing store allocation, NewUnmappedArgumentsElements gets lo BUG=v8:5726 Review-Url: https://codereview.chromium.org/2692753004 Cr-Commit-Position: refs/heads/master@{#43475}
2017-02-28 11:16:27 +00:00
Node* LowerArgumentsFrame(Node* node);
Node* LowerArgumentsLength(Node* node);
Node* LowerNewDoubleElements(Node* node);
Node* LowerNewSmiOrObjectElements(Node* node);
Node* LowerNewArgumentsElements(Node* node);
Node* LowerNewConsString(Node* node);
Node* LowerSameValue(Node* node);
Node* LowerDeadValue(Node* node);
Node* LowerStringConcat(Node* node);
Node* LowerStringToNumber(Node* node);
Node* LowerStringCharCodeAt(Node* node);
Node* LowerStringCodePointAt(Node* node, UnicodeEncoding encoding);
Node* LowerStringToLowerCaseIntl(Node* node);
Node* LowerStringToUpperCaseIntl(Node* node);
Node* LowerStringFromSingleCharCode(Node* node);
Node* LowerStringFromSingleCodePoint(Node* node);
Node* LowerStringIndexOf(Node* node);
Node* LowerStringSubstring(Node* node);
Node* LowerStringLength(Node* node);
Node* LowerStringEqual(Node* node);
Node* LowerStringLessThan(Node* node);
Node* LowerStringLessThanOrEqual(Node* node);
Node* LowerCheckFloat64Hole(Node* node, Node* frame_state);
Node* LowerCheckNotTaggedHole(Node* node, Node* frame_state);
Node* LowerConvertTaggedHoleToUndefined(Node* node);
[ic] Ensure that we make progress on KeyedLoadIC polymorphic name. In the special case of KeyedLoadIC, where the key that is passed in is a Name that is always the same we only checked for identity in both the stub and the TurboFan case, which works fine for symbols and internalized strings, but doesn't really work with non-internalized strings, where the identity check will fail, the runtime will internalize the string, and the IC will then see the original internalized string again and not progress in the feedback lattice. This leads to tricky deoptimization loops in TurboFan and constantly missing ICs. This adds fixes the stub to always try to internalize strings first when the identity check fails and then doing the check again. If the name is not found in the string table we miss, since in that case the string cannot match the previously recorded feedback name (which is always a unique name). In TurboFan we represent this checks with new CheckEqualsSymbol and CheckEqualsInternalizedString operators, which validate the previously recorded feedback, and the CheckEqualsInternalizedString operator does the attempt to internalize the input. Bug: v8:6936, v8:6948, v8:6969 Change-Id: I3f3b4a587c67f00f7c4b60d239eb98a9626fe04a Reviewed-on: https://chromium-review.googlesource.com/730224 Reviewed-by: Toon Verwaest <verwaest@chromium.org> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> Cr-Commit-Position: refs/heads/master@{#48784}
2017-10-20 11:36:26 +00:00
void LowerCheckEqualsInternalizedString(Node* node, Node* frame_state);
void LowerCheckEqualsSymbol(Node* node, Node* frame_state);
Node* LowerTypeOf(Node* node);
Node* LowerToBoolean(Node* node);
Node* LowerPlainPrimitiveToNumber(Node* node);
Node* LowerPlainPrimitiveToWord32(Node* node);
Node* LowerPlainPrimitiveToFloat64(Node* node);
Node* LowerEnsureWritableFastElements(Node* node);
Node* LowerMaybeGrowFastElements(Node* node, Node* frame_state);
void LowerTransitionElementsKind(Node* node);
[turbofan] Optimize fast enum cache driven for..in. This CL adds support to optimize for..in in fast enum-cache mode to the same degree that it was optimized in Crankshaft, without adding the same deoptimization loop that Crankshaft had with missing enum cache indices. That means code like for (var k in o) { var v = o[k]; // ... } and code like for (var k in o) { if (Object.prototype.hasOwnProperty.call(o, k)) { var v = o[k]; // ... } } which follows the https://eslint.org/docs/rules/guard-for-in linter rule, can now utilize the enum cache indices if o has only fast properties on the receiver, which speeds up the access o[k] significantly and reduces the pollution of the global megamorphic stub cache. For example the micro-benchmark in the tracking bug v8:6702 now runs faster than ever before: forIn: 1516 ms. forInHasOwnProperty: 1674 ms. forInHasOwnPropertySafe: 1595 ms. forInSum: 2051 ms. forInSumSafe: 2215 ms. Compared to numbers from V8 5.8 which is the last version running with Crankshaft forIn: 1641 ms. forInHasOwnProperty: 1719 ms. forInHasOwnPropertySafe: 1802 ms. forInSum: 2226 ms. forInSumSafe: 2409 ms. and V8 6.0 which is the current stable version with TurboFan: forIn: 1713 ms. forInHasOwnProperty: 5417 ms. forInHasOwnPropertySafe: 5324 ms. forInSum: 7556 ms. forInSumSafe: 11067 ms. It also improves the throughput on the string-fasta benchmark by around 7-10%, and there seems to be a ~5% improvement on the Speedometer/React benchmark locally. For this to work, the ForInPrepare bytecode was split into ForInEnumerate and ForInPrepare, which is very similar to how it was handled in Fullcodegen initially. In TurboFan we introduce a new operator LoadFieldByIndex that does the dynamic property load. This also removes the CheckMapValue operator again in favor of just using LoadField, ReferenceEqual and CheckIf, which work automatically with the EscapeAnalysis and the BranchConditionElimination. Bug: v8:6702 Change-Id: I91235413eea478ba77ace7bd14bb2f62e155dd9a Reviewed-on: https://chromium-review.googlesource.com/645949 Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> Reviewed-by: Yang Guo <yangguo@chromium.org> Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> Reviewed-by: Leszek Swirski <leszeks@chromium.org> Cr-Commit-Position: refs/heads/master@{#47768}
2017-09-01 10:49:06 +00:00
Node* LowerLoadFieldByIndex(Node* node);
Node* LowerLoadTypedElement(Node* node);
Node* LowerLoadDataViewElement(Node* node);
void LowerStoreTypedElement(Node* node);
void LowerStoreDataViewElement(Node* node);
void LowerStoreSignedSmallElement(Node* node);
Node* LowerFindOrderedHashMapEntry(Node* node);
Revert "[turbofan] Introduce FindOrderedHashMapEntryForReceiverKey operator." This reverts commit 877de37676eb2f60dac5e80316935133ef157687. Reason for revert: Looks like this doesn't really move the needle (only w/ high iteration count). So let's not do the extra complexity unless there's a good reason to do so. Original change's description: > [turbofan] Introduce FindOrderedHashMapEntryForReceiverKey operator. > > This optimizes Map#get and Map#has for the case where the key is known > to be a JSReceiver. This generalizes the existing logic for the > FindOrderedHashMapEntryForSigned32Key operator to also deal with > receivers. This gives a nice 33% boost on the map-set-lookup-es6 test > of the six-speed benchmark suite. > > Drive-by-fix: Rename the FindOrderedHashMapEntryForInt32Key operator to > FindOrderedHashMapEntryForSigned32Key to match the naming of the types. > > R=​jarin@chromium.org > > Bug: v8:5267, v8:7001 > Change-Id: Ifab8414f26adee7ec833d8cb94ae0ac49f2c3d35 > Reviewed-on: https://chromium-review.googlesource.com/738180 > Reviewed-by: Jaroslav Sevcik <jarin@chromium.org> > Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> > Cr-Commit-Position: refs/heads/master@{#48938} TBR=jarin@chromium.org,bmeurer@chromium.org Change-Id: Icaf9e22cb3412a97342c4e4cdc422d4aaa2d0ef9 No-Presubmit: true No-Tree-Checks: true No-Try: true Bug: v8:5267, v8:7001 Reviewed-on: https://chromium-review.googlesource.com/738052 Reviewed-by: Benedikt Meurer <bmeurer@chromium.org> Commit-Queue: Benedikt Meurer <bmeurer@chromium.org> Cr-Commit-Position: refs/heads/master@{#48941}
2017-10-25 17:35:15 +00:00
Node* LowerFindOrderedHashMapEntryForInt32Key(Node* node);
void LowerTransitionAndStoreElement(Node* node);
void LowerTransitionAndStoreNumberElement(Node* node);
void LowerTransitionAndStoreNonNumberElement(Node* node);
void LowerRuntimeAbort(Node* node);
Node* LowerConvertReceiver(Node* node);
Node* LowerDateNow(Node* node);
// Lowering of optional operators.
Maybe<Node*> LowerFloat64RoundUp(Node* node);
Maybe<Node*> LowerFloat64RoundDown(Node* node);
Maybe<Node*> LowerFloat64RoundTiesEven(Node* node);
Maybe<Node*> LowerFloat64RoundTruncate(Node* node);
Node* AllocateHeapNumberWithValue(Node* node);
Node* BuildCheckedFloat64ToInt32(CheckForMinusZeroMode mode,
const VectorSlotPair& feedback, Node* value,
Node* frame_state);
Node* BuildCheckedHeapNumberOrOddballToFloat64(CheckTaggedInputMode mode,
const VectorSlotPair& feedback,
Node* value,
Node* frame_state);
Node* BuildReverseBytes(ExternalArrayType type, Node* value);
Node* BuildFloat64RoundDown(Node* value);
Node* BuildFloat64RoundTruncate(Node* input);
Node* BuildUint32Mod(Node* lhs, Node* rhs);
Node* ComputeUnseededHash(Node* value);
Node* LowerStringComparison(Callable const& callable, Node* node);
Node* IsElementsKindGreaterThan(Node* kind, ElementsKind reference_kind);
Node* ChangeInt32ToSmi(Node* value);
Node* ChangeInt32ToIntPtr(Node* value);
Node* ChangeInt64ToSmi(Node* value);
Node* ChangeIntPtrToInt32(Node* value);
Node* ChangeIntPtrToSmi(Node* value);
Node* ChangeUint32ToUintPtr(Node* value);
Node* ChangeUint32ToSmi(Node* value);
Node* ChangeSmiToIntPtr(Node* value);
Node* ChangeSmiToInt32(Node* value);
Node* ChangeSmiToInt64(Node* value);
Node* ObjectIsSmi(Node* value);
Node* LoadFromSeqString(Node* receiver, Node* position, Node* is_one_byte);
Node* SmiMaxValueConstant();
Node* SmiShiftBitsConstant();
void TransitionElementsTo(Node* node, Node* array, ElementsKind from,
ElementsKind to);
Factory* factory() const;
Isolate* isolate() const;
JSGraph* jsgraph() const { return js_graph_; }
Graph* graph() const;
Schedule* schedule() const { return schedule_; }
Zone* temp_zone() const { return temp_zone_; }
CommonOperatorBuilder* common() const;
SimplifiedOperatorBuilder* simplified() const;
MachineOperatorBuilder* machine() const;
GraphAssembler* gasm() { return &graph_assembler_; }
JSGraph* js_graph_;
Schedule* schedule_;
Zone* temp_zone_;
MaskArrayIndexEnable mask_array_index_;
RegionObservability region_observability_ = RegionObservability::kObservable;
SourcePositionTable* source_positions_;
NodeOriginTable* node_origins_;
GraphAssembler graph_assembler_;
Node* frame_state_zapper_; // For tracking down compiler::Node::New crashes.
};
} // namespace compiler
} // namespace internal
} // namespace v8
#endif // V8_COMPILER_EFFECT_CONTROL_LINEARIZER_H_