[regexp] Refactor OutSet and BoyerMoorePositionInfo
Outset: The more advanced features of OutSet are no longer used, thus the rename to DynamicBitSet to reflect its current purpose. BoyerMoorePositionInfo: Use bitset backing store in BoyerMoorePositionInfo (previously this was based on a (statically-sized) ZoneList<bool>). Bug: v8:9359 Change-Id: I40ca89467ae90ee90c616be5fd0d51e54e94e157 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1664064 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Peter Marshall <petermarshall@chromium.org> Cr-Commit-Position: refs/heads/master@{#62277}
This commit is contained in:
parent
983ab01a68
commit
4fe611ec58
@ -320,7 +320,41 @@ bool Trace::GetStoredPosition(int reg, int* cp_offset) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
|
||||
// A (dynamically-sized) set of unsigned integers that behaves especially well
|
||||
// on small integers (< kFirstLimit). May do zone-allocation.
|
||||
class DynamicBitSet : public ZoneObject {
|
||||
public:
|
||||
V8_EXPORT_PRIVATE bool Get(unsigned value) const {
|
||||
if (value < kFirstLimit) {
|
||||
return (first_ & (1 << value)) != 0;
|
||||
} else if (remaining_ == nullptr) {
|
||||
return false;
|
||||
} else {
|
||||
return remaining_->Contains(value);
|
||||
}
|
||||
}
|
||||
|
||||
// Destructively set a value in this set.
|
||||
void Set(unsigned value, Zone* zone) {
|
||||
if (value < kFirstLimit) {
|
||||
first_ |= (1 << value);
|
||||
} else {
|
||||
if (remaining_ == nullptr)
|
||||
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
|
||||
if (remaining_->is_empty() || !remaining_->Contains(value))
|
||||
remaining_->Add(value, zone);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr unsigned kFirstLimit = 32;
|
||||
|
||||
uint32_t first_ = 0;
|
||||
ZoneList<unsigned>* remaining_ = nullptr;
|
||||
};
|
||||
|
||||
int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
|
||||
Zone* zone) {
|
||||
int max_register = RegExpCompiler::kNoRegister;
|
||||
for (DeferredAction* action = actions_; action != nullptr;
|
||||
action = action->next()) {
|
||||
@ -339,8 +373,8 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
|
||||
|
||||
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
||||
int max_register,
|
||||
const OutSet& registers_to_pop,
|
||||
const OutSet& registers_to_clear) {
|
||||
const DynamicBitSet& registers_to_pop,
|
||||
const DynamicBitSet& registers_to_clear) {
|
||||
for (int reg = max_register; reg >= 0; reg--) {
|
||||
if (registers_to_pop.Get(reg)) {
|
||||
assembler->PopRegister(reg);
|
||||
@ -356,9 +390,10 @@ void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
||||
|
||||
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
int max_register,
|
||||
const OutSet& affected_registers,
|
||||
OutSet* registers_to_pop,
|
||||
OutSet* registers_to_clear, Zone* zone) {
|
||||
const DynamicBitSet& affected_registers,
|
||||
DynamicBitSet* registers_to_pop,
|
||||
DynamicBitSet* registers_to_clear,
|
||||
Zone* zone) {
|
||||
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
|
||||
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
|
||||
|
||||
@ -502,7 +537,7 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
||||
}
|
||||
|
||||
// Generate deferred actions here along with code to undo them again.
|
||||
OutSet affected_registers;
|
||||
DynamicBitSet affected_registers;
|
||||
|
||||
if (backtrack() != nullptr) {
|
||||
// Here we have a concrete backtrack location. These are set up by choice
|
||||
@ -513,8 +548,8 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
||||
|
||||
int max_register =
|
||||
FindAffectedRegisters(&affected_registers, compiler->zone());
|
||||
OutSet registers_to_pop;
|
||||
OutSet registers_to_clear;
|
||||
DynamicBitSet registers_to_pop;
|
||||
DynamicBitSet registers_to_clear;
|
||||
PerformDeferredActions(assembler, max_register, affected_registers,
|
||||
®isters_to_pop, ®isters_to_clear,
|
||||
compiler->zone());
|
||||
@ -2540,15 +2575,15 @@ void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
|
||||
if (interval.to() - interval.from() >= kMapSize - 1) {
|
||||
if (map_count_ != kMapSize) {
|
||||
map_count_ = kMapSize;
|
||||
for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
|
||||
for (int i = 0; i < kMapSize; i++) map_.set(i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
for (int i = interval.from(); i <= interval.to(); i++) {
|
||||
int mod_character = (i & kMask);
|
||||
if (!map_->at(mod_character)) {
|
||||
if (!map_[mod_character]) {
|
||||
map_count_++;
|
||||
map_->at(mod_character) = true;
|
||||
map_.set(mod_character);
|
||||
}
|
||||
if (map_count_ == kMapSize) return;
|
||||
}
|
||||
@ -2558,7 +2593,7 @@ void BoyerMoorePositionInfo::SetAll() {
|
||||
s_ = w_ = d_ = kLatticeUnknown;
|
||||
if (map_count_ != kMapSize) {
|
||||
map_count_ = kMapSize;
|
||||
for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
|
||||
map_.set();
|
||||
}
|
||||
}
|
||||
|
||||
@ -2572,7 +2607,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
|
||||
}
|
||||
bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
|
||||
for (int i = 0; i < length; i++) {
|
||||
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone), zone);
|
||||
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(), zone);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3278,46 +3313,6 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
on_success()->Emit(compiler, trace);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Splay tree
|
||||
|
||||
OutSet* OutSet::Extend(unsigned value, Zone* zone) {
|
||||
if (Get(value)) return this;
|
||||
if (successors(zone) != nullptr) {
|
||||
for (int i = 0; i < successors(zone)->length(); i++) {
|
||||
OutSet* successor = successors(zone)->at(i);
|
||||
if (successor->Get(value)) return successor;
|
||||
}
|
||||
} else {
|
||||
successors_ = new (zone) ZoneList<OutSet*>(2, zone);
|
||||
}
|
||||
OutSet* result = new (zone) OutSet(first_, remaining_);
|
||||
result->Set(value, zone);
|
||||
successors(zone)->Add(result, zone);
|
||||
return result;
|
||||
}
|
||||
|
||||
void OutSet::Set(unsigned value, Zone* zone) {
|
||||
if (value < kFirstLimit) {
|
||||
first_ |= (1 << value);
|
||||
} else {
|
||||
if (remaining_ == nullptr)
|
||||
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
|
||||
if (remaining_->is_empty() || !remaining_->Contains(value))
|
||||
remaining_->Add(value, zone);
|
||||
}
|
||||
}
|
||||
|
||||
bool OutSet::Get(unsigned value) const {
|
||||
if (value < kFirstLimit) {
|
||||
return (first_ & (1 << value)) != 0;
|
||||
} else if (remaining_ == nullptr) {
|
||||
return false;
|
||||
} else {
|
||||
return remaining_->Contains(value);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Analysis
|
||||
|
||||
|
@ -5,12 +5,15 @@
|
||||
#ifndef V8_REGEXP_REGEXP_COMPILER_H_
|
||||
#define V8_REGEXP_REGEXP_COMPILER_H_
|
||||
|
||||
#include <bitset>
|
||||
|
||||
#include "src/base/small-vector.h"
|
||||
#include "src/regexp/regexp-nodes.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class DynamicBitSet;
|
||||
class Isolate;
|
||||
|
||||
namespace regexp_compiler_constants {
|
||||
@ -75,34 +78,6 @@ inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
|
||||
return IsUnicode(flags) && IgnoreCase(flags);
|
||||
}
|
||||
|
||||
// A set of unsigned integers that behaves especially well on small
|
||||
// integers (< 32). May do zone-allocation.
|
||||
class OutSet : public ZoneObject {
|
||||
public:
|
||||
OutSet() : first_(0), remaining_(nullptr), successors_(nullptr) {}
|
||||
OutSet* Extend(unsigned value, Zone* zone);
|
||||
V8_EXPORT_PRIVATE bool Get(unsigned value) const;
|
||||
static const unsigned kFirstLimit = 32;
|
||||
|
||||
private:
|
||||
// Destructively set a value in this set. In most cases you want
|
||||
// to use Extend instead to ensure that only one instance exists
|
||||
// that contains the same values.
|
||||
void Set(unsigned value, Zone* zone);
|
||||
|
||||
// The successors are a list of sets that contain the same values
|
||||
// as this set and the one more value that is not present in this
|
||||
// set.
|
||||
ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
|
||||
|
||||
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
|
||||
: first_(first), remaining_(remaining), successors_(nullptr) {}
|
||||
uint32_t first_;
|
||||
ZoneList<unsigned>* remaining_;
|
||||
ZoneList<OutSet*>* successors_;
|
||||
friend class Trace;
|
||||
};
|
||||
|
||||
// Details of a quick mask-compare check that can look ahead in the
|
||||
// input stream.
|
||||
class QuickCheckDetails {
|
||||
@ -186,22 +161,10 @@ inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
|
||||
|
||||
class BoyerMoorePositionInfo : public ZoneObject {
|
||||
public:
|
||||
explicit BoyerMoorePositionInfo(Zone* zone)
|
||||
: map_(new (zone) ZoneList<bool>(kMapSize, zone)),
|
||||
map_count_(0),
|
||||
w_(kNotYet),
|
||||
s_(kNotYet),
|
||||
d_(kNotYet),
|
||||
surrogate_(kNotYet) {
|
||||
for (int i = 0; i < kMapSize; i++) {
|
||||
map_->Add(false, zone);
|
||||
}
|
||||
}
|
||||
bool at(int i) const { return map_[i]; }
|
||||
|
||||
bool& at(int i) { return map_->at(i); }
|
||||
|
||||
static const int kMapSize = 128;
|
||||
static const int kMask = kMapSize - 1;
|
||||
static constexpr int kMapSize = 128;
|
||||
static constexpr int kMask = kMapSize - 1;
|
||||
|
||||
int map_count() const { return map_count_; }
|
||||
|
||||
@ -212,12 +175,12 @@ class BoyerMoorePositionInfo : public ZoneObject {
|
||||
bool is_word() { return w_ == kLatticeIn; }
|
||||
|
||||
private:
|
||||
ZoneList<bool>* map_;
|
||||
int map_count_; // Number of set bits in the map.
|
||||
ContainedInLattice w_; // The \w character class.
|
||||
ContainedInLattice s_; // The \s character class.
|
||||
ContainedInLattice d_; // The \d character class.
|
||||
ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
|
||||
std::bitset<kMapSize> map_;
|
||||
int map_count_ = 0; // Number of set bits in the map.
|
||||
ContainedInLattice w_ = kNotYet; // The \w character class.
|
||||
ContainedInLattice s_ = kNotYet; // The \s character class.
|
||||
ContainedInLattice d_ = kNotYet; // The \d character class.
|
||||
ContainedInLattice surrogate_ = kNotYet; // Surrogate UTF-16 code units.
|
||||
};
|
||||
|
||||
class BoyerMooreLookahead : public ZoneObject {
|
||||
@ -414,14 +377,14 @@ class Trace {
|
||||
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
|
||||
|
||||
private:
|
||||
int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
|
||||
int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone);
|
||||
void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
|
||||
const OutSet& affected_registers,
|
||||
OutSet* registers_to_pop,
|
||||
OutSet* registers_to_clear, Zone* zone);
|
||||
const DynamicBitSet& affected_registers,
|
||||
DynamicBitSet* registers_to_pop,
|
||||
DynamicBitSet* registers_to_clear, Zone* zone);
|
||||
void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
|
||||
const OutSet& registers_to_pop,
|
||||
const OutSet& registers_to_clear);
|
||||
const DynamicBitSet& registers_to_pop,
|
||||
const DynamicBitSet& registers_to_clear);
|
||||
int cp_offset_;
|
||||
DeferredAction* actions_;
|
||||
Label* backtrack_;
|
||||
|
Loading…
Reference in New Issue
Block a user