[regexp] Extract more parts of the regexp compiler
Bug: v8:9359 Change-Id: I06a4ccc53abff25237a1113774a0b17bdf861c86 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1658157 Reviewed-by: Peter Marshall <petermarshall@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#62198}
This commit is contained in:
parent
dbfe54b12b
commit
def9aa5d0a
8
BUILD.gn
8
BUILD.gn
@ -2686,7 +2686,10 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/regexp/regexp-ast.h",
|
||||
"src/regexp/regexp-bytecodes.h",
|
||||
"src/regexp/regexp-compiler-tonode.cc",
|
||||
"src/regexp/regexp-compiler.cc",
|
||||
"src/regexp/regexp-compiler.h",
|
||||
"src/regexp/regexp-dotprinter.cc",
|
||||
"src/regexp/regexp-dotprinter.h",
|
||||
"src/regexp/regexp-interpreter.cc",
|
||||
"src/regexp/regexp-interpreter.h",
|
||||
"src/regexp/regexp-macro-assembler-arch.h",
|
||||
@ -2697,6 +2700,7 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/regexp/regexp-macro-assembler-tracer.h",
|
||||
"src/regexp/regexp-macro-assembler.cc",
|
||||
"src/regexp/regexp-macro-assembler.h",
|
||||
"src/regexp/regexp-nodes.h",
|
||||
"src/regexp/regexp-parser.cc",
|
||||
"src/regexp/regexp-parser.h",
|
||||
"src/regexp/regexp-stack.cc",
|
||||
@ -2949,6 +2953,10 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/objects/elements.cc",
|
||||
"src/objects/objects.cc",
|
||||
"src/parsing/parser.cc",
|
||||
|
||||
# Explicit template instantiation clash (these files are also very large).
|
||||
"src/regexp/regexp-compiler-tonode.cc",
|
||||
"src/regexp/regexp-compiler.cc",
|
||||
]
|
||||
|
||||
if (v8_current_cpu == "x86") {
|
||||
|
@ -75,11 +75,6 @@ int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
|
||||
return ®ister_array_[index];
|
||||
}
|
||||
|
||||
RegExpEngine::CompilationResult::CompilationResult(Isolate* isolate,
|
||||
const char* error_message)
|
||||
: error_message(error_message),
|
||||
code(ReadOnlyRoots(isolate).the_hole_value()) {}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
3714
src/regexp/regexp-compiler.cc
Normal file
3714
src/regexp/regexp-compiler.cc
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,8 +5,8 @@
|
||||
#ifndef V8_REGEXP_REGEXP_COMPILER_H_
|
||||
#define V8_REGEXP_REGEXP_COMPILER_H_
|
||||
|
||||
#include "src/regexp/jsregexp.h" // TODO(jgruber): Remove if possible.
|
||||
#include "src/regexp/regexp-macro-assembler-arch.h"
|
||||
#include "src/regexp/regexp-nodes.h"
|
||||
#include "src/zone/zone-splay-tree.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -37,8 +37,530 @@ constexpr int kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E,
|
||||
0x2028, 0x202A, kRangeEndMarker};
|
||||
constexpr int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
|
||||
|
||||
// More makes code generation slower, less makes V8 benchmark score lower.
|
||||
constexpr int kMaxLookaheadForBoyerMoore = 8;
|
||||
// In a 3-character pattern you can maximally step forwards 3 characters
|
||||
// at a time, which is not always enough to pay for the extra logic.
|
||||
constexpr int kPatternTooShortForBoyerMoore = 2;
|
||||
|
||||
} // namespace regexp_compiler_constants
|
||||
|
||||
// A set of unsigned integers that behaves especially well on small
|
||||
// integers (< 32). May do zone-allocation.
|
||||
class OutSet : public ZoneObject {
|
||||
public:
|
||||
OutSet() : first_(0), remaining_(nullptr), successors_(nullptr) {}
|
||||
OutSet* Extend(unsigned value, Zone* zone);
|
||||
V8_EXPORT_PRIVATE bool Get(unsigned value) const;
|
||||
static const unsigned kFirstLimit = 32;
|
||||
|
||||
private:
|
||||
// Destructively set a value in this set. In most cases you want
|
||||
// to use Extend instead to ensure that only one instance exists
|
||||
// that contains the same values.
|
||||
void Set(unsigned value, Zone* zone);
|
||||
|
||||
// The successors are a list of sets that contain the same values
|
||||
// as this set and the one more value that is not present in this
|
||||
// set.
|
||||
ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
|
||||
|
||||
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
|
||||
: first_(first), remaining_(remaining), successors_(nullptr) {}
|
||||
uint32_t first_;
|
||||
ZoneList<unsigned>* remaining_;
|
||||
ZoneList<OutSet*>* successors_;
|
||||
friend class Trace;
|
||||
};
|
||||
|
||||
// A mapping from integers, specified as ranges, to a set of integers.
|
||||
// Used for mapping character ranges to choices.
|
||||
class DispatchTable : public ZoneObject {
|
||||
public:
|
||||
explicit DispatchTable(Zone* zone) : tree_(zone) {}
|
||||
|
||||
class Entry {
|
||||
public:
|
||||
Entry() : from_(0), to_(0), out_set_(nullptr) {}
|
||||
Entry(uc32 from, uc32 to, OutSet* out_set)
|
||||
: from_(from), to_(to), out_set_(out_set) {
|
||||
DCHECK(from <= to);
|
||||
}
|
||||
uc32 from() { return from_; }
|
||||
uc32 to() { return to_; }
|
||||
void set_to(uc32 value) { to_ = value; }
|
||||
void AddValue(int value, Zone* zone) {
|
||||
out_set_ = out_set_->Extend(value, zone);
|
||||
}
|
||||
OutSet* out_set() { return out_set_; }
|
||||
|
||||
private:
|
||||
uc32 from_;
|
||||
uc32 to_;
|
||||
OutSet* out_set_;
|
||||
};
|
||||
|
||||
class Config {
|
||||
public:
|
||||
using Key = uc32;
|
||||
using Value = Entry;
|
||||
static const uc32 kNoKey;
|
||||
static const Entry NoValue() { return Value(); }
|
||||
static inline int Compare(uc32 a, uc32 b) {
|
||||
if (a == b)
|
||||
return 0;
|
||||
else if (a < b)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
V8_EXPORT_PRIVATE void AddRange(CharacterRange range, int value, Zone* zone);
|
||||
V8_EXPORT_PRIVATE OutSet* Get(uc32 value);
|
||||
void Dump();
|
||||
|
||||
template <typename Callback>
|
||||
void ForEach(Callback* callback) {
|
||||
return tree()->ForEach(callback);
|
||||
}
|
||||
|
||||
private:
|
||||
// There can't be a static empty set since it allocates its
|
||||
// successors in a zone and caches them.
|
||||
OutSet* empty() { return &empty_; }
|
||||
OutSet empty_;
|
||||
ZoneSplayTree<Config>* tree() { return &tree_; }
|
||||
ZoneSplayTree<Config> tree_;
|
||||
};
|
||||
|
||||
// Node visitor used to add the start set of the alternatives to the
|
||||
// dispatch table of a choice node.
|
||||
class V8_EXPORT_PRIVATE DispatchTableConstructor : public NodeVisitor {
|
||||
public:
|
||||
DispatchTableConstructor(DispatchTable* table, bool ignore_case, Zone* zone)
|
||||
: table_(table),
|
||||
choice_index_(-1),
|
||||
ignore_case_(ignore_case),
|
||||
zone_(zone) {}
|
||||
|
||||
void BuildTable(ChoiceNode* node);
|
||||
|
||||
void AddRange(CharacterRange range) {
|
||||
table()->AddRange(range, choice_index_, zone_);
|
||||
}
|
||||
|
||||
void AddInverse(ZoneList<CharacterRange>* ranges);
|
||||
|
||||
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
|
||||
DispatchTable* table() { return table_; }
|
||||
void set_choice_index(int value) { choice_index_ = value; }
|
||||
|
||||
protected:
|
||||
DispatchTable* table_;
|
||||
int choice_index_;
|
||||
bool ignore_case_;
|
||||
Zone* zone_;
|
||||
};
|
||||
|
||||
// Details of a quick mask-compare check that can look ahead in the
|
||||
// input stream.
|
||||
class QuickCheckDetails {
|
||||
public:
|
||||
QuickCheckDetails()
|
||||
: characters_(0), mask_(0), value_(0), cannot_match_(false) {}
|
||||
explicit QuickCheckDetails(int characters)
|
||||
: characters_(characters), mask_(0), value_(0), cannot_match_(false) {}
|
||||
bool Rationalize(bool one_byte);
|
||||
// Merge in the information from another branch of an alternation.
|
||||
void Merge(QuickCheckDetails* other, int from_index);
|
||||
// Advance the current position by some amount.
|
||||
void Advance(int by, bool one_byte);
|
||||
void Clear();
|
||||
bool cannot_match() { return cannot_match_; }
|
||||
void set_cannot_match() { cannot_match_ = true; }
|
||||
struct Position {
|
||||
Position() : mask(0), value(0), determines_perfectly(false) {}
|
||||
uc16 mask;
|
||||
uc16 value;
|
||||
bool determines_perfectly;
|
||||
};
|
||||
int characters() { return characters_; }
|
||||
void set_characters(int characters) { characters_ = characters; }
|
||||
Position* positions(int index) {
|
||||
DCHECK_LE(0, index);
|
||||
DCHECK_GT(characters_, index);
|
||||
return positions_ + index;
|
||||
}
|
||||
uint32_t mask() { return mask_; }
|
||||
uint32_t value() { return value_; }
|
||||
|
||||
private:
|
||||
// How many characters do we have quick check information from. This is
|
||||
// the same for all branches of a choice node.
|
||||
int characters_;
|
||||
Position positions_[4];
|
||||
// These values are the condensate of the above array after Rationalize().
|
||||
uint32_t mask_;
|
||||
uint32_t value_;
|
||||
// If set to true, there is no way this quick check can match at all.
|
||||
// E.g., if it requires to be at the start of the input, and isn't.
|
||||
bool cannot_match_;
|
||||
};
|
||||
|
||||
// Improve the speed that we scan for an initial point where a non-anchored
|
||||
// regexp can match by using a Boyer-Moore-like table. This is done by
|
||||
// identifying non-greedy non-capturing loops in the nodes that eat any
|
||||
// character one at a time. For example in the middle of the regexp
|
||||
// /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
|
||||
// inserted at the start of any non-anchored regexp.
|
||||
//
|
||||
// When we have found such a loop we look ahead in the nodes to find the set of
|
||||
// characters that can come at given distances. For example for the regexp
|
||||
// /.?foo/ we know that there are at least 3 characters ahead of us, and the
|
||||
// sets of characters that can occur are [any, [f, o], [o]]. We find a range in
|
||||
// the lookahead info where the set of characters is reasonably constrained. In
|
||||
// our example this is from index 1 to 2 (0 is not constrained). We can now
|
||||
// look 3 characters ahead and if we don't find one of [f, o] (the union of
|
||||
// [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
|
||||
//
|
||||
// For Unicode input strings we do the same, but modulo 128.
|
||||
//
|
||||
// We also look at the first string fed to the regexp and use that to get a hint
|
||||
// of the character frequencies in the inputs. This affects the assessment of
|
||||
// whether the set of characters is 'reasonably constrained'.
|
||||
//
|
||||
// We also have another lookahead mechanism (called quick check in the code),
|
||||
// which uses a wide load of multiple characters followed by a mask and compare
|
||||
// to determine whether a match is possible at this point.
|
||||
enum ContainedInLattice {
|
||||
kNotYet = 0,
|
||||
kLatticeIn = 1,
|
||||
kLatticeOut = 2,
|
||||
kLatticeUnknown = 3 // Can also mean both in and out.
|
||||
};
|
||||
|
||||
inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
|
||||
return static_cast<ContainedInLattice>(a | b);
|
||||
}
|
||||
|
||||
ContainedInLattice AddRange(ContainedInLattice a, const int* ranges,
|
||||
int ranges_size, Interval new_range);
|
||||
|
||||
class BoyerMoorePositionInfo : public ZoneObject {
|
||||
public:
|
||||
explicit BoyerMoorePositionInfo(Zone* zone)
|
||||
: map_(new (zone) ZoneList<bool>(kMapSize, zone)),
|
||||
map_count_(0),
|
||||
w_(kNotYet),
|
||||
s_(kNotYet),
|
||||
d_(kNotYet),
|
||||
surrogate_(kNotYet) {
|
||||
for (int i = 0; i < kMapSize; i++) {
|
||||
map_->Add(false, zone);
|
||||
}
|
||||
}
|
||||
|
||||
bool& at(int i) { return map_->at(i); }
|
||||
|
||||
static const int kMapSize = 128;
|
||||
static const int kMask = kMapSize - 1;
|
||||
|
||||
int map_count() const { return map_count_; }
|
||||
|
||||
void Set(int character);
|
||||
void SetInterval(const Interval& interval);
|
||||
void SetAll();
|
||||
bool is_non_word() { return w_ == kLatticeOut; }
|
||||
bool is_word() { return w_ == kLatticeIn; }
|
||||
|
||||
private:
|
||||
ZoneList<bool>* map_;
|
||||
int map_count_; // Number of set bits in the map.
|
||||
ContainedInLattice w_; // The \w character class.
|
||||
ContainedInLattice s_; // The \s character class.
|
||||
ContainedInLattice d_; // The \d character class.
|
||||
ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
|
||||
};
|
||||
|
||||
class BoyerMooreLookahead : public ZoneObject {
|
||||
public:
|
||||
BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone);
|
||||
|
||||
int length() { return length_; }
|
||||
int max_char() { return max_char_; }
|
||||
RegExpCompiler* compiler() { return compiler_; }
|
||||
|
||||
int Count(int map_number) { return bitmaps_->at(map_number)->map_count(); }
|
||||
|
||||
BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
|
||||
|
||||
void Set(int map_number, int character) {
|
||||
if (character > max_char_) return;
|
||||
BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
|
||||
info->Set(character);
|
||||
}
|
||||
|
||||
void SetInterval(int map_number, const Interval& interval) {
|
||||
if (interval.from() > max_char_) return;
|
||||
BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
|
||||
if (interval.to() > max_char_) {
|
||||
info->SetInterval(Interval(interval.from(), max_char_));
|
||||
} else {
|
||||
info->SetInterval(interval);
|
||||
}
|
||||
}
|
||||
|
||||
void SetAll(int map_number) { bitmaps_->at(map_number)->SetAll(); }
|
||||
|
||||
void SetRest(int from_map) {
|
||||
for (int i = from_map; i < length_; i++) SetAll(i);
|
||||
}
|
||||
void EmitSkipInstructions(RegExpMacroAssembler* masm);
|
||||
|
||||
private:
|
||||
// This is the value obtained by EatsAtLeast. If we do not have at least this
|
||||
// many characters left in the sample string then the match is bound to fail.
|
||||
// Therefore it is OK to read a character this far ahead of the current match
|
||||
// point.
|
||||
int length_;
|
||||
RegExpCompiler* compiler_;
|
||||
// 0xff for Latin1, 0xffff for UTF-16.
|
||||
int max_char_;
|
||||
ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
|
||||
|
||||
int GetSkipTable(int min_lookahead, int max_lookahead,
|
||||
Handle<ByteArray> boolean_skip_table);
|
||||
bool FindWorthwhileInterval(int* from, int* to);
|
||||
int FindBestInterval(int max_number_of_chars, int old_biggest_points,
|
||||
int* from, int* to);
|
||||
};
|
||||
|
||||
// There are many ways to generate code for a node. This class encapsulates
|
||||
// the current way we should be generating. In other words it encapsulates
|
||||
// the current state of the code generator. The effect of this is that we
|
||||
// generate code for paths that the matcher can take through the regular
|
||||
// expression. A given node in the regexp can be code-generated several times
|
||||
// as it can be part of several traces. For example for the regexp:
|
||||
// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
|
||||
// of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
|
||||
// to match foo is generated only once (the traces have a common prefix). The
|
||||
// code to store the capture is deferred and generated (twice) after the places
|
||||
// where baz has been matched.
|
||||
class Trace {
|
||||
public:
|
||||
// A value for a property that is either known to be true, know to be false,
|
||||
// or not known.
|
||||
enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 };
|
||||
|
||||
class DeferredAction {
|
||||
public:
|
||||
DeferredAction(ActionNode::ActionType action_type, int reg)
|
||||
: action_type_(action_type), reg_(reg), next_(nullptr) {}
|
||||
DeferredAction* next() { return next_; }
|
||||
bool Mentions(int reg);
|
||||
int reg() { return reg_; }
|
||||
ActionNode::ActionType action_type() { return action_type_; }
|
||||
|
||||
private:
|
||||
ActionNode::ActionType action_type_;
|
||||
int reg_;
|
||||
DeferredAction* next_;
|
||||
friend class Trace;
|
||||
};
|
||||
|
||||
class DeferredCapture : public DeferredAction {
|
||||
public:
|
||||
DeferredCapture(int reg, bool is_capture, Trace* trace)
|
||||
: DeferredAction(ActionNode::STORE_POSITION, reg),
|
||||
cp_offset_(trace->cp_offset()),
|
||||
is_capture_(is_capture) {}
|
||||
int cp_offset() { return cp_offset_; }
|
||||
bool is_capture() { return is_capture_; }
|
||||
|
||||
private:
|
||||
int cp_offset_;
|
||||
bool is_capture_;
|
||||
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
|
||||
};
|
||||
|
||||
class DeferredSetRegister : public DeferredAction {
|
||||
public:
|
||||
DeferredSetRegister(int reg, int value)
|
||||
: DeferredAction(ActionNode::SET_REGISTER, reg), value_(value) {}
|
||||
int value() { return value_; }
|
||||
|
||||
private:
|
||||
int value_;
|
||||
};
|
||||
|
||||
class DeferredClearCaptures : public DeferredAction {
|
||||
public:
|
||||
explicit DeferredClearCaptures(Interval range)
|
||||
: DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {}
|
||||
Interval range() { return range_; }
|
||||
|
||||
private:
|
||||
Interval range_;
|
||||
};
|
||||
|
||||
class DeferredIncrementRegister : public DeferredAction {
|
||||
public:
|
||||
explicit DeferredIncrementRegister(int reg)
|
||||
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {}
|
||||
};
|
||||
|
||||
Trace()
|
||||
: cp_offset_(0),
|
||||
actions_(nullptr),
|
||||
backtrack_(nullptr),
|
||||
stop_node_(nullptr),
|
||||
loop_label_(nullptr),
|
||||
characters_preloaded_(0),
|
||||
bound_checked_up_to_(0),
|
||||
flush_budget_(100),
|
||||
at_start_(UNKNOWN) {}
|
||||
|
||||
// End the trace. This involves flushing the deferred actions in the trace
|
||||
// and pushing a backtrack location onto the backtrack stack. Once this is
|
||||
// done we can start a new trace or go to one that has already been
|
||||
// generated.
|
||||
void Flush(RegExpCompiler* compiler, RegExpNode* successor);
|
||||
int cp_offset() { return cp_offset_; }
|
||||
DeferredAction* actions() { return actions_; }
|
||||
// A trivial trace is one that has no deferred actions or other state that
|
||||
// affects the assumptions used when generating code. There is no recorded
|
||||
// backtrack location in a trivial trace, so with a trivial trace we will
|
||||
// generate code that, on a failure to match, gets the backtrack location
|
||||
// from the backtrack stack rather than using a direct jump instruction. We
|
||||
// always start code generation with a trivial trace and non-trivial traces
|
||||
// are created as we emit code for nodes or add to the list of deferred
|
||||
// actions in the trace. The location of the code generated for a node using
|
||||
// a trivial trace is recorded in a label in the node so that gotos can be
|
||||
// generated to that code.
|
||||
bool is_trivial() {
|
||||
return backtrack_ == nullptr && actions_ == nullptr && cp_offset_ == 0 &&
|
||||
characters_preloaded_ == 0 && bound_checked_up_to_ == 0 &&
|
||||
quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN;
|
||||
}
|
||||
TriBool at_start() { return at_start_; }
|
||||
void set_at_start(TriBool at_start) { at_start_ = at_start; }
|
||||
Label* backtrack() { return backtrack_; }
|
||||
Label* loop_label() { return loop_label_; }
|
||||
RegExpNode* stop_node() { return stop_node_; }
|
||||
int characters_preloaded() { return characters_preloaded_; }
|
||||
int bound_checked_up_to() { return bound_checked_up_to_; }
|
||||
int flush_budget() { return flush_budget_; }
|
||||
QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
|
||||
bool mentions_reg(int reg);
|
||||
// Returns true if a deferred position store exists to the specified
|
||||
// register and stores the offset in the out-parameter. Otherwise
|
||||
// returns false.
|
||||
bool GetStoredPosition(int reg, int* cp_offset);
|
||||
// These set methods and AdvanceCurrentPositionInTrace should be used only on
|
||||
// new traces - the intention is that traces are immutable after creation.
|
||||
void add_action(DeferredAction* new_action) {
|
||||
DCHECK(new_action->next_ == nullptr);
|
||||
new_action->next_ = actions_;
|
||||
actions_ = new_action;
|
||||
}
|
||||
void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
|
||||
void set_stop_node(RegExpNode* node) { stop_node_ = node; }
|
||||
void set_loop_label(Label* label) { loop_label_ = label; }
|
||||
void set_characters_preloaded(int count) { characters_preloaded_ = count; }
|
||||
void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
|
||||
void set_flush_budget(int to) { flush_budget_ = to; }
|
||||
void set_quick_check_performed(QuickCheckDetails* d) {
|
||||
quick_check_performed_ = *d;
|
||||
}
|
||||
void InvalidateCurrentCharacter();
|
||||
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
|
||||
|
||||
private:
|
||||
int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
|
||||
void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
|
||||
const OutSet& affected_registers,
|
||||
OutSet* registers_to_pop,
|
||||
OutSet* registers_to_clear, Zone* zone);
|
||||
void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
|
||||
const OutSet& registers_to_pop,
|
||||
const OutSet& registers_to_clear);
|
||||
int cp_offset_;
|
||||
DeferredAction* actions_;
|
||||
Label* backtrack_;
|
||||
RegExpNode* stop_node_;
|
||||
Label* loop_label_;
|
||||
int characters_preloaded_;
|
||||
int bound_checked_up_to_;
|
||||
QuickCheckDetails quick_check_performed_;
|
||||
int flush_budget_;
|
||||
TriBool at_start_;
|
||||
};
|
||||
|
||||
class GreedyLoopState {
|
||||
public:
|
||||
explicit GreedyLoopState(bool not_at_start);
|
||||
|
||||
Label* label() { return &label_; }
|
||||
Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
|
||||
|
||||
private:
|
||||
Label label_;
|
||||
Trace counter_backtrack_trace_;
|
||||
};
|
||||
|
||||
struct PreloadState {
|
||||
static const int kEatsAtLeastNotYetInitialized = -1;
|
||||
bool preload_is_current_;
|
||||
bool preload_has_checked_bounds_;
|
||||
int preload_characters_;
|
||||
int eats_at_least_;
|
||||
void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; }
|
||||
};
|
||||
|
||||
// Assertion propagation moves information about assertions such as
|
||||
// \b to the affected nodes. For instance, in /.\b./ information must
|
||||
// be propagated to the first '.' that whatever follows needs to know
|
||||
// if it matched a word or a non-word, and to the second '.' that it
|
||||
// has to check if it succeeds a word or non-word. In this case the
|
||||
// result will be something like:
|
||||
//
|
||||
// +-------+ +------------+
|
||||
// | . | | . |
|
||||
// +-------+ ---> +------------+
|
||||
// | word? | | check word |
|
||||
// +-------+ +------------+
|
||||
class Analysis : public NodeVisitor {
|
||||
public:
|
||||
Analysis(Isolate* isolate, bool is_one_byte)
|
||||
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
|
||||
void EnsureAnalyzed(RegExpNode* node);
|
||||
|
||||
#define DECLARE_VISIT(Type) void Visit##Type(Type##Node* that) override;
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
void VisitLoopChoice(LoopChoiceNode* that) override;
|
||||
|
||||
bool has_failed() { return error_message_ != nullptr; }
|
||||
const char* error_message() {
|
||||
DCHECK(error_message_ != nullptr);
|
||||
return error_message_;
|
||||
}
|
||||
void fail(const char* error_message) { error_message_ = error_message; }
|
||||
|
||||
Isolate* isolate() const { return isolate_; }
|
||||
|
||||
private:
|
||||
Isolate* isolate_;
|
||||
bool is_one_byte_;
|
||||
const char* error_message_;
|
||||
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
|
||||
};
|
||||
|
||||
class FrequencyCollator {
|
||||
public:
|
||||
FrequencyCollator() : total_samples_(0) {
|
||||
@ -113,10 +635,30 @@ class RegExpCompiler {
|
||||
return unicode_lookaround_position_register_;
|
||||
}
|
||||
|
||||
RegExpEngine::CompilationResult Assemble(Isolate* isolate,
|
||||
RegExpMacroAssembler* assembler,
|
||||
RegExpNode* start, int capture_count,
|
||||
Handle<String> pattern);
|
||||
struct CompilationResult final {
|
||||
explicit CompilationResult(const char* error_message)
|
||||
: error_message(error_message) {}
|
||||
CompilationResult(Object code, int registers)
|
||||
: code(code), num_registers(registers) {}
|
||||
|
||||
static CompilationResult RegExpTooBig() {
|
||||
return CompilationResult("RegExp too big");
|
||||
}
|
||||
|
||||
const char* const error_message = nullptr;
|
||||
Object code;
|
||||
int num_registers = 0;
|
||||
};
|
||||
|
||||
CompilationResult Assemble(Isolate* isolate, RegExpMacroAssembler* assembler,
|
||||
RegExpNode* start, int capture_count,
|
||||
Handle<String> pattern);
|
||||
|
||||
// If the regexp matching starts within a surrogate pair, step back to the
|
||||
// lead surrogate and start matching from there.
|
||||
static RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags);
|
||||
|
||||
inline void AddWork(RegExpNode* node) {
|
||||
if (!node->on_work_list() && !node->label()->is_bound()) {
|
||||
|
339
src/regexp/regexp-dotprinter.cc
Normal file
339
src/regexp/regexp-dotprinter.cc
Normal file
@ -0,0 +1,339 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/regexp/regexp-dotprinter.h"
|
||||
|
||||
#include "src/regexp/regexp-compiler.h"
|
||||
#include "src/utils/ostreams.h"
|
||||
#include "src/utils/splay-tree-inl.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Dot/dotty output
|
||||
|
||||
#ifdef DEBUG
|
||||
|
||||
class DotPrinterImpl : public NodeVisitor {
|
||||
public:
|
||||
DotPrinterImpl(std::ostream& os, bool ignore_case) // NOLINT
|
||||
: os_(os), ignore_case_(ignore_case) {}
|
||||
void PrintNode(const char* label, RegExpNode* node);
|
||||
void Visit(RegExpNode* node);
|
||||
void PrintAttributes(RegExpNode* from);
|
||||
void PrintOnFailure(RegExpNode* from, RegExpNode* to);
|
||||
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
private:
|
||||
std::ostream& os_;
|
||||
bool ignore_case_;
|
||||
};
|
||||
|
||||
void DotPrinterImpl::PrintNode(const char* label, RegExpNode* node) {
|
||||
os_ << "digraph G {\n graph [label=\"";
|
||||
for (int i = 0; label[i]; i++) {
|
||||
switch (label[i]) {
|
||||
case '\\':
|
||||
os_ << "\\\\";
|
||||
break;
|
||||
case '"':
|
||||
os_ << "\"";
|
||||
break;
|
||||
default:
|
||||
os_ << label[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
os_ << "\"];\n";
|
||||
Visit(node);
|
||||
os_ << "}" << std::endl;
|
||||
}
|
||||
|
||||
void DotPrinterImpl::Visit(RegExpNode* node) {
|
||||
if (node->info()->visited) return;
|
||||
node->info()->visited = true;
|
||||
node->Accept(this);
|
||||
}
|
||||
|
||||
void DotPrinterImpl::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
|
||||
os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n";
|
||||
Visit(on_failure);
|
||||
}
|
||||
|
||||
class TableEntryBodyPrinter {
|
||||
public:
|
||||
TableEntryBodyPrinter(std::ostream& os, ChoiceNode* choice) // NOLINT
|
||||
: os_(os), choice_(choice) {}
|
||||
void Call(uc16 from, DispatchTable::Entry entry) {
|
||||
OutSet* out_set = entry.out_set();
|
||||
for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
|
||||
if (out_set->Get(i)) {
|
||||
os_ << " n" << choice() << ":s" << from << "o" << i << " -> n"
|
||||
<< choice()->alternatives()->at(i).node() << ";\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ChoiceNode* choice() { return choice_; }
|
||||
std::ostream& os_;
|
||||
ChoiceNode* choice_;
|
||||
};
|
||||
|
||||
class TableEntryHeaderPrinter {
|
||||
public:
|
||||
explicit TableEntryHeaderPrinter(std::ostream& os) // NOLINT
|
||||
: first_(true), os_(os) {}
|
||||
void Call(uc16 from, DispatchTable::Entry entry) {
|
||||
if (first_) {
|
||||
first_ = false;
|
||||
} else {
|
||||
os_ << "|";
|
||||
}
|
||||
os_ << "{\\" << AsUC16(from) << "-\\" << AsUC16(entry.to()) << "|{";
|
||||
OutSet* out_set = entry.out_set();
|
||||
int priority = 0;
|
||||
for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
|
||||
if (out_set->Get(i)) {
|
||||
if (priority > 0) os_ << "|";
|
||||
os_ << "<s" << from << "o" << i << "> " << priority;
|
||||
priority++;
|
||||
}
|
||||
}
|
||||
os_ << "}}";
|
||||
}
|
||||
|
||||
private:
|
||||
bool first_;
|
||||
std::ostream& os_;
|
||||
};
|
||||
|
||||
class AttributePrinter {
|
||||
public:
|
||||
explicit AttributePrinter(std::ostream& os) // NOLINT
|
||||
: os_(os), first_(true) {}
|
||||
void PrintSeparator() {
|
||||
if (first_) {
|
||||
first_ = false;
|
||||
} else {
|
||||
os_ << "|";
|
||||
}
|
||||
}
|
||||
void PrintBit(const char* name, bool value) {
|
||||
if (!value) return;
|
||||
PrintSeparator();
|
||||
os_ << "{" << name << "}";
|
||||
}
|
||||
void PrintPositive(const char* name, int value) {
|
||||
if (value < 0) return;
|
||||
PrintSeparator();
|
||||
os_ << "{" << name << "|" << value << "}";
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& os_;
|
||||
bool first_;
|
||||
};
|
||||
|
||||
void DotPrinterImpl::PrintAttributes(RegExpNode* that) {
|
||||
os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, "
|
||||
<< "margin=0.1, fontsize=10, label=\"{";
|
||||
AttributePrinter printer(os_);
|
||||
NodeInfo* info = that->info();
|
||||
printer.PrintBit("NI", info->follows_newline_interest);
|
||||
printer.PrintBit("WI", info->follows_word_interest);
|
||||
printer.PrintBit("SI", info->follows_start_interest);
|
||||
Label* label = that->label();
|
||||
if (label->is_bound()) printer.PrintPositive("@", label->pos());
|
||||
os_ << "}\"];\n"
|
||||
<< " a" << that << " -> n" << that
|
||||
<< " [style=dashed, color=grey, arrowhead=none];\n";
|
||||
}
|
||||
|
||||
static const bool kPrintDispatchTable = false;
|
||||
void DotPrinterImpl::VisitChoice(ChoiceNode* that) {
|
||||
if (kPrintDispatchTable) {
|
||||
os_ << " n" << that << " [shape=Mrecord, label=\"";
|
||||
TableEntryHeaderPrinter header_printer(os_);
|
||||
that->GetTable(ignore_case_)->ForEach(&header_printer);
|
||||
os_ << "\"]\n";
|
||||
PrintAttributes(that);
|
||||
TableEntryBodyPrinter body_printer(os_, that);
|
||||
that->GetTable(ignore_case_)->ForEach(&body_printer);
|
||||
} else {
|
||||
os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n";
|
||||
for (int i = 0; i < that->alternatives()->length(); i++) {
|
||||
GuardedAlternative alt = that->alternatives()->at(i);
|
||||
os_ << " n" << that << " -> n" << alt.node();
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < that->alternatives()->length(); i++) {
|
||||
GuardedAlternative alt = that->alternatives()->at(i);
|
||||
alt.node()->Accept(this);
|
||||
}
|
||||
}
|
||||
|
||||
void DotPrinterImpl::VisitText(TextNode* that) {
|
||||
Zone* zone = that->zone();
|
||||
os_ << " n" << that << " [label=\"";
|
||||
for (int i = 0; i < that->elements()->length(); i++) {
|
||||
if (i > 0) os_ << " ";
|
||||
TextElement elm = that->elements()->at(i);
|
||||
switch (elm.text_type()) {
|
||||
case TextElement::ATOM: {
|
||||
Vector<const uc16> data = elm.atom()->data();
|
||||
for (int i = 0; i < data.length(); i++) {
|
||||
os_ << static_cast<char>(data[i]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TextElement::CHAR_CLASS: {
|
||||
RegExpCharacterClass* node = elm.char_class();
|
||||
os_ << "[";
|
||||
if (node->is_negated()) os_ << "^";
|
||||
for (int j = 0; j < node->ranges(zone)->length(); j++) {
|
||||
CharacterRange range = node->ranges(zone)->at(j);
|
||||
os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
|
||||
}
|
||||
os_ << "]";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
os_ << "\", shape=box, peripheries=2];\n";
|
||||
PrintAttributes(that);
|
||||
os_ << " n" << that << " -> n" << that->on_success() << ";\n";
|
||||
Visit(that->on_success());
|
||||
}
|
||||
|
||||
void DotPrinterImpl::VisitBackReference(BackReferenceNode* that) {
|
||||
os_ << " n" << that << " [label=\"$" << that->start_register() << "..$"
|
||||
<< that->end_register() << "\", shape=doubleoctagon];\n";
|
||||
PrintAttributes(that);
|
||||
os_ << " n" << that << " -> n" << that->on_success() << ";\n";
|
||||
Visit(that->on_success());
|
||||
}
|
||||
|
||||
void DotPrinterImpl::VisitEnd(EndNode* that) {
|
||||
os_ << " n" << that << " [style=bold, shape=point];\n";
|
||||
PrintAttributes(that);
|
||||
}
|
||||
|
||||
void DotPrinterImpl::VisitAssertion(AssertionNode* that) {
|
||||
os_ << " n" << that << " [";
|
||||
switch (that->assertion_type()) {
|
||||
case AssertionNode::AT_END:
|
||||
os_ << "label=\"$\", shape=septagon";
|
||||
break;
|
||||
case AssertionNode::AT_START:
|
||||
os_ << "label=\"^\", shape=septagon";
|
||||
break;
|
||||
case AssertionNode::AT_BOUNDARY:
|
||||
os_ << "label=\"\\b\", shape=septagon";
|
||||
break;
|
||||
case AssertionNode::AT_NON_BOUNDARY:
|
||||
os_ << "label=\"\\B\", shape=septagon";
|
||||
break;
|
||||
case AssertionNode::AFTER_NEWLINE:
|
||||
os_ << "label=\"(?<=\\n)\", shape=septagon";
|
||||
break;
|
||||
}
|
||||
os_ << "];\n";
|
||||
PrintAttributes(that);
|
||||
RegExpNode* successor = that->on_success();
|
||||
os_ << " n" << that << " -> n" << successor << ";\n";
|
||||
Visit(successor);
|
||||
}
|
||||
|
||||
void DotPrinterImpl::VisitAction(ActionNode* that) {
|
||||
os_ << " n" << that << " [";
|
||||
switch (that->action_type_) {
|
||||
case ActionNode::SET_REGISTER:
|
||||
os_ << "label=\"$" << that->data_.u_store_register.reg
|
||||
<< ":=" << that->data_.u_store_register.value << "\", shape=octagon";
|
||||
break;
|
||||
case ActionNode::INCREMENT_REGISTER:
|
||||
os_ << "label=\"$" << that->data_.u_increment_register.reg
|
||||
<< "++\", shape=octagon";
|
||||
break;
|
||||
case ActionNode::STORE_POSITION:
|
||||
os_ << "label=\"$" << that->data_.u_position_register.reg
|
||||
<< ":=$pos\", shape=octagon";
|
||||
break;
|
||||
case ActionNode::BEGIN_SUBMATCH:
|
||||
os_ << "label=\"$" << that->data_.u_submatch.current_position_register
|
||||
<< ":=$pos,begin\", shape=septagon";
|
||||
break;
|
||||
case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
|
||||
os_ << "label=\"escape\", shape=septagon";
|
||||
break;
|
||||
case ActionNode::EMPTY_MATCH_CHECK:
|
||||
os_ << "label=\"$" << that->data_.u_empty_match_check.start_register
|
||||
<< "=$pos?,$" << that->data_.u_empty_match_check.repetition_register
|
||||
<< "<" << that->data_.u_empty_match_check.repetition_limit
|
||||
<< "?\", shape=septagon";
|
||||
break;
|
||||
case ActionNode::CLEAR_CAPTURES: {
|
||||
os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from
|
||||
<< " to $" << that->data_.u_clear_captures.range_to
|
||||
<< "\", shape=septagon";
|
||||
break;
|
||||
}
|
||||
}
|
||||
os_ << "];\n";
|
||||
PrintAttributes(that);
|
||||
RegExpNode* successor = that->on_success();
|
||||
os_ << " n" << that << " -> n" << successor << ";\n";
|
||||
Visit(successor);
|
||||
}
|
||||
|
||||
class DispatchTableDumper {
|
||||
public:
|
||||
explicit DispatchTableDumper(std::ostream& os) : os_(os) {}
|
||||
void Call(uc16 key, DispatchTable::Entry entry);
|
||||
|
||||
private:
|
||||
std::ostream& os_;
|
||||
};
|
||||
|
||||
void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
|
||||
os_ << "[" << AsUC16(key) << "-" << AsUC16(entry.to()) << "]: {";
|
||||
OutSet* set = entry.out_set();
|
||||
bool first = true;
|
||||
for (unsigned i = 0; i < OutSet::kFirstLimit; i++) {
|
||||
if (set->Get(i)) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
os_ << ", ";
|
||||
}
|
||||
os_ << i;
|
||||
}
|
||||
}
|
||||
os_ << "}\n";
|
||||
}
|
||||
|
||||
void DispatchTable::Dump() {
|
||||
OFStream os(stderr);
|
||||
DispatchTableDumper dumper(os);
|
||||
tree()->ForEach(&dumper);
|
||||
}
|
||||
|
||||
#endif // DEBUG
|
||||
|
||||
void DotPrinter::DotPrint(const char* label, RegExpNode* node,
|
||||
bool ignore_case) {
|
||||
#ifdef DEBUG
|
||||
StdoutStream os;
|
||||
DotPrinterImpl printer(os, ignore_case);
|
||||
printer.PrintNode(label, node);
|
||||
#endif // DEBUG
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
23
src/regexp/regexp-dotprinter.h
Normal file
23
src/regexp/regexp-dotprinter.h
Normal file
@ -0,0 +1,23 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_REGEXP_REGEXP_DOTPRINTER_H_
|
||||
#define V8_REGEXP_REGEXP_DOTPRINTER_H_
|
||||
|
||||
#include "src/common/globals.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class RegExpNode;
|
||||
|
||||
class DotPrinter final : public AllStatic {
|
||||
public:
|
||||
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_REGEXP_REGEXP_DOTPRINTER_H_
|
678
src/regexp/regexp-nodes.h
Normal file
678
src/regexp/regexp-nodes.h
Normal file
@ -0,0 +1,678 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_REGEXP_REGEXP_NODES_H_
|
||||
#define V8_REGEXP_REGEXP_NODES_H_
|
||||
|
||||
#include "src/regexp/regexp-macro-assembler.h"
|
||||
#include "src/zone/zone.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class AlternativeGenerationList;
|
||||
class BoyerMooreLookahead;
|
||||
class DispatchTable;
|
||||
class GreedyLoopState;
|
||||
class Label;
|
||||
class NodeVisitor;
|
||||
class QuickCheckDetails;
|
||||
class RegExpCompiler;
|
||||
class Trace;
|
||||
struct PreloadState;
|
||||
|
||||
#define FOR_EACH_NODE_TYPE(VISIT) \
|
||||
VISIT(End) \
|
||||
VISIT(Action) \
|
||||
VISIT(Choice) \
|
||||
VISIT(BackReference) \
|
||||
VISIT(Assertion) \
|
||||
VISIT(Text)
|
||||
|
||||
struct NodeInfo final {
|
||||
NodeInfo()
|
||||
: being_analyzed(false),
|
||||
been_analyzed(false),
|
||||
follows_word_interest(false),
|
||||
follows_newline_interest(false),
|
||||
follows_start_interest(false),
|
||||
at_end(false),
|
||||
visited(false),
|
||||
replacement_calculated(false) {}
|
||||
|
||||
// Returns true if the interests and assumptions of this node
|
||||
// matches the given one.
|
||||
bool Matches(NodeInfo* that) {
|
||||
return (at_end == that->at_end) &&
|
||||
(follows_word_interest == that->follows_word_interest) &&
|
||||
(follows_newline_interest == that->follows_newline_interest) &&
|
||||
(follows_start_interest == that->follows_start_interest);
|
||||
}
|
||||
|
||||
// Updates the interests of this node given the interests of the
|
||||
// node preceding it.
|
||||
void AddFromPreceding(NodeInfo* that) {
|
||||
at_end |= that->at_end;
|
||||
follows_word_interest |= that->follows_word_interest;
|
||||
follows_newline_interest |= that->follows_newline_interest;
|
||||
follows_start_interest |= that->follows_start_interest;
|
||||
}
|
||||
|
||||
bool HasLookbehind() {
|
||||
return follows_word_interest || follows_newline_interest ||
|
||||
follows_start_interest;
|
||||
}
|
||||
|
||||
// Sets the interests of this node to include the interests of the
|
||||
// following node.
|
||||
void AddFromFollowing(NodeInfo* that) {
|
||||
follows_word_interest |= that->follows_word_interest;
|
||||
follows_newline_interest |= that->follows_newline_interest;
|
||||
follows_start_interest |= that->follows_start_interest;
|
||||
}
|
||||
|
||||
void ResetCompilationState() {
|
||||
being_analyzed = false;
|
||||
been_analyzed = false;
|
||||
}
|
||||
|
||||
bool being_analyzed : 1;
|
||||
bool been_analyzed : 1;
|
||||
|
||||
// These bits are set of this node has to know what the preceding
|
||||
// character was.
|
||||
bool follows_word_interest : 1;
|
||||
bool follows_newline_interest : 1;
|
||||
bool follows_start_interest : 1;
|
||||
|
||||
bool at_end : 1;
|
||||
bool visited : 1;
|
||||
bool replacement_calculated : 1;
|
||||
};
|
||||
|
||||
class RegExpNode : public ZoneObject {
|
||||
public:
|
||||
explicit RegExpNode(Zone* zone)
|
||||
: replacement_(nullptr),
|
||||
on_work_list_(false),
|
||||
trace_count_(0),
|
||||
zone_(zone) {
|
||||
bm_info_[0] = bm_info_[1] = nullptr;
|
||||
}
|
||||
virtual ~RegExpNode();
|
||||
virtual void Accept(NodeVisitor* visitor) = 0;
|
||||
// Generates a goto to this node or actually generates the code at this point.
|
||||
virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
|
||||
// How many characters must this node consume at a minimum in order to
|
||||
// succeed. If we have found at least 'still_to_find' characters that
|
||||
// must be consumed there is no need to ask any following nodes whether
|
||||
// they are sure to eat any more characters. The not_at_start argument is
|
||||
// used to indicate that we know we are not at the start of the input. In
|
||||
// this case anchored branches will always fail and can be ignored when
|
||||
// determining how many characters are consumed on success.
|
||||
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
|
||||
// Emits some quick code that checks whether the preloaded characters match.
|
||||
// Falls through on certain failure, jumps to the label on possible success.
|
||||
// If the node cannot make a quick check it does nothing and returns false.
|
||||
bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace,
|
||||
Trace* trace, bool preload_has_checked_bounds,
|
||||
Label* on_possible_success,
|
||||
QuickCheckDetails* details_return,
|
||||
bool fall_through_on_failure);
|
||||
// For a given number of characters this returns a mask and a value. The
|
||||
// next n characters are anded with the mask and compared with the value.
|
||||
// A comparison failure indicates the node cannot match the next n characters.
|
||||
// A comparison success indicates the node may match.
|
||||
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler,
|
||||
int characters_filled_in,
|
||||
bool not_at_start) = 0;
|
||||
static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
|
||||
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
|
||||
// Only returns the successor for a text node of length 1 that matches any
|
||||
// character and that has no guards on it.
|
||||
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
|
||||
RegExpCompiler* compiler) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Collects information on the possible code units (mod 128) that can match if
|
||||
// we look forward. This is used for a Boyer-Moore-like string searching
|
||||
// implementation. TODO(erikcorry): This should share more code with
|
||||
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
|
||||
// the number of nodes we are willing to look at in order to create this data.
|
||||
static const int kRecursionBudget = 200;
|
||||
bool KeepRecursing(RegExpCompiler* compiler);
|
||||
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// If we know that the input is one-byte then there are some nodes that can
|
||||
// never match. This method returns a node that can be substituted for
|
||||
// itself, or nullptr if the node can never match.
|
||||
virtual RegExpNode* FilterOneByte(int depth) { return this; }
|
||||
// Helper for FilterOneByte.
|
||||
RegExpNode* replacement() {
|
||||
DCHECK(info()->replacement_calculated);
|
||||
return replacement_;
|
||||
}
|
||||
RegExpNode* set_replacement(RegExpNode* replacement) {
|
||||
info()->replacement_calculated = true;
|
||||
replacement_ = replacement;
|
||||
return replacement; // For convenience.
|
||||
}
|
||||
|
||||
// We want to avoid recalculating the lookahead info, so we store it on the
|
||||
// node. Only info that is for this node is stored. We can tell that the
|
||||
// info is for this node when offset == 0, so the information is calculated
|
||||
// relative to this node.
|
||||
void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
|
||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||
}
|
||||
|
||||
Label* label() { return &label_; }
|
||||
// If non-generic code is generated for a node (i.e. the node is not at the
|
||||
// start of the trace) then it cannot be reused. This variable sets a limit
|
||||
// on how often we allow that to happen before we insist on starting a new
|
||||
// trace and generating generic code for a node that can be reused by flushing
|
||||
// the deferred actions in the current trace and generating a goto.
|
||||
static const int kMaxCopiesCodeGenerated = 10;
|
||||
|
||||
bool on_work_list() { return on_work_list_; }
|
||||
void set_on_work_list(bool value) { on_work_list_ = value; }
|
||||
|
||||
NodeInfo* info() { return &info_; }
|
||||
|
||||
BoyerMooreLookahead* bm_info(bool not_at_start) {
|
||||
return bm_info_[not_at_start ? 1 : 0];
|
||||
}
|
||||
|
||||
Zone* zone() const { return zone_; }
|
||||
|
||||
protected:
|
||||
enum LimitResult { DONE, CONTINUE };
|
||||
RegExpNode* replacement_;
|
||||
|
||||
LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
|
||||
|
||||
void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
|
||||
bm_info_[not_at_start ? 1 : 0] = bm;
|
||||
}
|
||||
|
||||
private:
|
||||
static const int kFirstCharBudget = 10;
|
||||
Label label_;
|
||||
bool on_work_list_;
|
||||
NodeInfo info_;
|
||||
// This variable keeps track of how many times code has been generated for
|
||||
// this node (in different traces). We don't keep track of where the
|
||||
// generated code is located unless the code is generated at the start of
|
||||
// a trace, in which case it is generic and can be reused by flushing the
|
||||
// deferred operations in the current trace and generating a goto.
|
||||
int trace_count_;
|
||||
BoyerMooreLookahead* bm_info_[2];
|
||||
|
||||
Zone* zone_;
|
||||
};
|
||||
|
||||
class SeqRegExpNode : public RegExpNode {
|
||||
public:
|
||||
explicit SeqRegExpNode(RegExpNode* on_success)
|
||||
: RegExpNode(on_success->zone()), on_success_(on_success) {}
|
||||
RegExpNode* on_success() { return on_success_; }
|
||||
void set_on_success(RegExpNode* node) { on_success_ = node; }
|
||||
RegExpNode* FilterOneByte(int depth) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override {
|
||||
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
|
||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||
}
|
||||
|
||||
protected:
|
||||
RegExpNode* FilterSuccessor(int depth);
|
||||
|
||||
private:
|
||||
RegExpNode* on_success_;
|
||||
};
|
||||
|
||||
class ActionNode : public SeqRegExpNode {
|
||||
public:
|
||||
enum ActionType {
|
||||
SET_REGISTER,
|
||||
INCREMENT_REGISTER,
|
||||
STORE_POSITION,
|
||||
BEGIN_SUBMATCH,
|
||||
POSITIVE_SUBMATCH_SUCCESS,
|
||||
EMPTY_MATCH_CHECK,
|
||||
CLEAR_CAPTURES
|
||||
};
|
||||
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
|
||||
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
|
||||
static ActionNode* StorePosition(int reg, bool is_capture,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
|
||||
static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
|
||||
int restore_reg,
|
||||
int clear_capture_count,
|
||||
int clear_capture_from,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* EmptyMatchCheck(int start_register,
|
||||
int repetition_register,
|
||||
int repetition_limit,
|
||||
RegExpNode* on_success);
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int filled_in,
|
||||
bool not_at_start) override {
|
||||
return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
|
||||
not_at_start);
|
||||
}
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
ActionType action_type() { return action_type_; }
|
||||
// TODO(erikcorry): We should allow some action nodes in greedy loops.
|
||||
int GreedyLoopTextLength() override {
|
||||
return kNodeIsTooComplexForGreedyLoops;
|
||||
}
|
||||
|
||||
private:
|
||||
union {
|
||||
struct {
|
||||
int reg;
|
||||
int value;
|
||||
} u_store_register;
|
||||
struct {
|
||||
int reg;
|
||||
} u_increment_register;
|
||||
struct {
|
||||
int reg;
|
||||
bool is_capture;
|
||||
} u_position_register;
|
||||
struct {
|
||||
int stack_pointer_register;
|
||||
int current_position_register;
|
||||
int clear_register_count;
|
||||
int clear_register_from;
|
||||
} u_submatch;
|
||||
struct {
|
||||
int start_register;
|
||||
int repetition_register;
|
||||
int repetition_limit;
|
||||
} u_empty_match_check;
|
||||
struct {
|
||||
int range_from;
|
||||
int range_to;
|
||||
} u_clear_captures;
|
||||
} data_;
|
||||
ActionNode(ActionType action_type, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success), action_type_(action_type) {}
|
||||
ActionType action_type_;
|
||||
friend class DotPrinterImpl;
|
||||
};
|
||||
|
||||
class TextNode : public SeqRegExpNode {
|
||||
public:
|
||||
TextNode(ZoneList<TextElement>* elms, bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
|
||||
TextNode(RegExpCharacterClass* that, bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
elms_(new (zone()) ZoneList<TextElement>(1, zone())),
|
||||
read_backward_(read_backward) {
|
||||
elms_->Add(TextElement::CharClass(that), zone());
|
||||
}
|
||||
// Create TextNode for a single character class for the given ranges.
|
||||
static TextNode* CreateForCharacterRanges(Zone* zone,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags);
|
||||
// Create TextNode for a surrogate pair with a range given for the
|
||||
// lead and the trail surrogate each.
|
||||
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
||||
CharacterRange trail,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags);
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override;
|
||||
ZoneList<TextElement>* elements() { return elms_; }
|
||||
bool read_backward() { return read_backward_; }
|
||||
void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
|
||||
int GreedyLoopTextLength() override;
|
||||
RegExpNode* GetSuccessorOfOmnivorousTextNode(
|
||||
RegExpCompiler* compiler) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
void CalculateOffsets();
|
||||
RegExpNode* FilterOneByte(int depth) override;
|
||||
|
||||
private:
|
||||
enum TextEmitPassType {
|
||||
NON_LATIN1_MATCH, // Check for characters that can't match.
|
||||
SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
|
||||
NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
|
||||
CASE_CHARACTER_MATCH, // Case-independent single character check.
|
||||
CHARACTER_CLASS_MATCH // Character class.
|
||||
};
|
||||
static bool SkipPass(TextEmitPassType pass, bool ignore_case);
|
||||
static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
|
||||
static const int kLastPass = CHARACTER_CLASS_MATCH;
|
||||
void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
|
||||
bool preloaded, Trace* trace, bool first_element_checked,
|
||||
int* checked_up_to);
|
||||
int Length();
|
||||
ZoneList<TextElement>* elms_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
class AssertionNode : public SeqRegExpNode {
|
||||
public:
|
||||
enum AssertionType {
|
||||
AT_END,
|
||||
AT_START,
|
||||
AT_BOUNDARY,
|
||||
AT_NON_BOUNDARY,
|
||||
AFTER_NEWLINE
|
||||
};
|
||||
static AssertionNode* AtEnd(RegExpNode* on_success) {
|
||||
return new (on_success->zone()) AssertionNode(AT_END, on_success);
|
||||
}
|
||||
static AssertionNode* AtStart(RegExpNode* on_success) {
|
||||
return new (on_success->zone()) AssertionNode(AT_START, on_success);
|
||||
}
|
||||
static AssertionNode* AtBoundary(RegExpNode* on_success) {
|
||||
return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
|
||||
}
|
||||
static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
|
||||
return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
|
||||
}
|
||||
static AssertionNode* AfterNewline(RegExpNode* on_success) {
|
||||
return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
|
||||
}
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int filled_in,
|
||||
bool not_at_start) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
AssertionType assertion_type() { return assertion_type_; }
|
||||
|
||||
private:
|
||||
void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
|
||||
enum IfPrevious { kIsNonWord, kIsWord };
|
||||
void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace,
|
||||
IfPrevious backtrack_if_previous);
|
||||
AssertionNode(AssertionType t, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success), assertion_type_(t) {}
|
||||
AssertionType assertion_type_;
|
||||
};
|
||||
|
||||
class BackReferenceNode : public SeqRegExpNode {
|
||||
public:
|
||||
BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
|
||||
bool read_backward, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
start_reg_(start_reg),
|
||||
end_reg_(end_reg),
|
||||
flags_(flags),
|
||||
read_backward_(read_backward) {}
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
int start_register() { return start_reg_; }
|
||||
int end_register() { return end_reg_; }
|
||||
bool read_backward() { return read_backward_; }
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int recursion_depth,
|
||||
bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override {
|
||||
return;
|
||||
}
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
|
||||
private:
|
||||
int start_reg_;
|
||||
int end_reg_;
|
||||
JSRegExp::Flags flags_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
class EndNode : public RegExpNode {
|
||||
public:
|
||||
enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
|
||||
EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int recursion_depth,
|
||||
bool not_at_start) override {
|
||||
return 0;
|
||||
}
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override {
|
||||
// Returning 0 from EatsAtLeast should ensure we never get here.
|
||||
UNREACHABLE();
|
||||
}
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override {
|
||||
// Returning 0 from EatsAtLeast should ensure we never get here.
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
private:
|
||||
Action action_;
|
||||
};
|
||||
|
||||
class NegativeSubmatchSuccess : public EndNode {
|
||||
public:
|
||||
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg,
|
||||
int clear_capture_count, int clear_capture_start,
|
||||
Zone* zone)
|
||||
: EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
|
||||
stack_pointer_register_(stack_pointer_reg),
|
||||
current_position_register_(position_reg),
|
||||
clear_capture_count_(clear_capture_count),
|
||||
clear_capture_start_(clear_capture_start) {}
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
|
||||
private:
|
||||
int stack_pointer_register_;
|
||||
int current_position_register_;
|
||||
int clear_capture_count_;
|
||||
int clear_capture_start_;
|
||||
};
|
||||
|
||||
class Guard : public ZoneObject {
|
||||
public:
|
||||
enum Relation { LT, GEQ };
|
||||
Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {}
|
||||
int reg() { return reg_; }
|
||||
Relation op() { return op_; }
|
||||
int value() { return value_; }
|
||||
|
||||
private:
|
||||
int reg_;
|
||||
Relation op_;
|
||||
int value_;
|
||||
};
|
||||
|
||||
class GuardedAlternative {
|
||||
public:
|
||||
explicit GuardedAlternative(RegExpNode* node)
|
||||
: node_(node), guards_(nullptr) {}
|
||||
void AddGuard(Guard* guard, Zone* zone);
|
||||
RegExpNode* node() { return node_; }
|
||||
void set_node(RegExpNode* node) { node_ = node; }
|
||||
ZoneList<Guard*>* guards() { return guards_; }
|
||||
|
||||
private:
|
||||
RegExpNode* node_;
|
||||
ZoneList<Guard*>* guards_;
|
||||
};
|
||||
|
||||
class AlternativeGeneration;
|
||||
|
||||
class ChoiceNode : public RegExpNode {
|
||||
public:
|
||||
explicit ChoiceNode(int expected_size, Zone* zone)
|
||||
: RegExpNode(zone),
|
||||
alternatives_(new (zone)
|
||||
ZoneList<GuardedAlternative>(expected_size, zone)),
|
||||
table_(nullptr),
|
||||
not_at_start_(false),
|
||||
being_calculated_(false) {}
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
void AddAlternative(GuardedAlternative node) {
|
||||
alternatives()->Add(node, zone());
|
||||
}
|
||||
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
|
||||
DispatchTable* GetTable(bool ignore_case);
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
int EatsAtLeastHelper(int still_to_find, int budget,
|
||||
RegExpNode* ignore_this_node, bool not_at_start);
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
|
||||
bool being_calculated() { return being_calculated_; }
|
||||
bool not_at_start() { return not_at_start_; }
|
||||
void set_not_at_start() { not_at_start_ = true; }
|
||||
void set_being_calculated(bool b) { being_calculated_ = b; }
|
||||
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
|
||||
return true;
|
||||
}
|
||||
RegExpNode* FilterOneByte(int depth) override;
|
||||
virtual bool read_backward() { return false; }
|
||||
|
||||
protected:
|
||||
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
|
||||
ZoneList<GuardedAlternative>* alternatives_;
|
||||
|
||||
private:
|
||||
friend class DispatchTableConstructor;
|
||||
friend class Analysis;
|
||||
void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard,
|
||||
Trace* trace);
|
||||
int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
|
||||
void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace,
|
||||
GuardedAlternative alternative,
|
||||
AlternativeGeneration* alt_gen,
|
||||
int preload_characters,
|
||||
bool next_expects_preload);
|
||||
void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
|
||||
PreloadState* preloads);
|
||||
void AssertGuardsMentionRegisters(Trace* trace);
|
||||
int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
|
||||
Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
|
||||
AlternativeGenerationList* alt_gens,
|
||||
PreloadState* preloads,
|
||||
GreedyLoopState* greedy_loop_state, int text_length);
|
||||
void EmitChoices(RegExpCompiler* compiler,
|
||||
AlternativeGenerationList* alt_gens, int first_choice,
|
||||
Trace* trace, PreloadState* preloads);
|
||||
DispatchTable* table_;
|
||||
// If true, this node is never checked at the start of the input.
|
||||
// Allows a new trace to start with at_start() set to false.
|
||||
bool not_at_start_;
|
||||
bool being_calculated_;
|
||||
};
|
||||
|
||||
class NegativeLookaroundChoiceNode : public ChoiceNode {
|
||||
public:
|
||||
explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
|
||||
GuardedAlternative then_do_this,
|
||||
Zone* zone)
|
||||
: ChoiceNode(2, zone) {
|
||||
AddAlternative(this_must_fail);
|
||||
AddAlternative(then_do_this);
|
||||
}
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override {
|
||||
alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm,
|
||||
not_at_start);
|
||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||
}
|
||||
// For a negative lookahead we don't emit the quick check for the
|
||||
// alternative that is expected to fail. This is because quick check code
|
||||
// starts by loading enough characters for the alternative that takes fewest
|
||||
// characters, but on a negative lookahead the negative branch did not take
|
||||
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
|
||||
bool try_to_emit_quick_check_for_alternative(bool is_first) override {
|
||||
return !is_first;
|
||||
}
|
||||
RegExpNode* FilterOneByte(int depth) override;
|
||||
};
|
||||
|
||||
class LoopChoiceNode : public ChoiceNode {
|
||||
public:
|
||||
LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
|
||||
: ChoiceNode(2, zone),
|
||||
loop_node_(nullptr),
|
||||
continue_node_(nullptr),
|
||||
body_can_be_zero_length_(body_can_be_zero_length),
|
||||
read_backward_(read_backward) {}
|
||||
void AddLoopAlternative(GuardedAlternative alt);
|
||||
void AddContinueAlternative(GuardedAlternative alt);
|
||||
void Emit(RegExpCompiler* compiler, Trace* trace) override;
|
||||
int EatsAtLeast(int still_to_find, int budget, bool not_at_start) override;
|
||||
void GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
RegExpCompiler* compiler, int characters_filled_in,
|
||||
bool not_at_start) override;
|
||||
void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) override;
|
||||
RegExpNode* loop_node() { return loop_node_; }
|
||||
RegExpNode* continue_node() { return continue_node_; }
|
||||
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
|
||||
bool read_backward() override { return read_backward_; }
|
||||
void Accept(NodeVisitor* visitor) override;
|
||||
RegExpNode* FilterOneByte(int depth) override;
|
||||
|
||||
private:
|
||||
// AddAlternative is made private for loop nodes because alternatives
|
||||
// should not be added freely, we need to keep track of which node
|
||||
// goes back to the node itself.
|
||||
void AddAlternative(GuardedAlternative node) {
|
||||
ChoiceNode::AddAlternative(node);
|
||||
}
|
||||
|
||||
RegExpNode* loop_node_;
|
||||
RegExpNode* continue_node_;
|
||||
bool body_can_be_zero_length_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
class NodeVisitor {
|
||||
public:
|
||||
virtual ~NodeVisitor() = default;
|
||||
#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0;
|
||||
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
#undef DECLARE_VISIT
|
||||
virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_REGEXP_REGEXP_NODES_H_
|
@ -11,6 +11,7 @@
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/regexp/jsregexp.h"
|
||||
#include "src/regexp/property-sequences.h"
|
||||
#include "src/regexp/regexp-macro-assembler.h"
|
||||
#include "src/strings/char-predicates-inl.h"
|
||||
#include "src/utils/ostreams.h"
|
||||
#include "src/utils/utils.h"
|
||||
|
Loading…
Reference in New Issue
Block a user