[regexp] Refactor OutSet and BoyerMoorePositionInfo

Outset:
The more advanced features of OutSet are no longer used, thus the
rename to DynamicBitSet to reflect its current purpose.

BoyerMoorePositionInfo:
Use bitset backing store in BoyerMoorePositionInfo (previously this
was based on a (statically-sized) ZoneList<bool>).

Bug: v8:9359
Change-Id: I40ca89467ae90ee90c616be5fd0d51e54e94e157
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1664064
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62277}
This commit is contained in:
Jakob Gruber 2019-06-18 12:43:26 +02:00 committed by Commit Bot
parent 983ab01a68
commit 4fe611ec58
2 changed files with 67 additions and 109 deletions

View File

@ -320,7 +320,41 @@ bool Trace::GetStoredPosition(int reg, int* cp_offset) {
return false; return false;
} }
int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) { // A (dynamically-sized) set of unsigned integers that behaves especially well
// on small integers (< kFirstLimit). May do zone-allocation.
class DynamicBitSet : public ZoneObject {
public:
V8_EXPORT_PRIVATE bool Get(unsigned value) const {
if (value < kFirstLimit) {
return (first_ & (1 << value)) != 0;
} else if (remaining_ == nullptr) {
return false;
} else {
return remaining_->Contains(value);
}
}
// Destructively set a value in this set.
void Set(unsigned value, Zone* zone) {
if (value < kFirstLimit) {
first_ |= (1 << value);
} else {
if (remaining_ == nullptr)
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
if (remaining_->is_empty() || !remaining_->Contains(value))
remaining_->Add(value, zone);
}
}
private:
static constexpr unsigned kFirstLimit = 32;
uint32_t first_ = 0;
ZoneList<unsigned>* remaining_ = nullptr;
};
int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
Zone* zone) {
int max_register = RegExpCompiler::kNoRegister; int max_register = RegExpCompiler::kNoRegister;
for (DeferredAction* action = actions_; action != nullptr; for (DeferredAction* action = actions_; action != nullptr;
action = action->next()) { action = action->next()) {
@ -339,8 +373,8 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler, void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
int max_register, int max_register,
const OutSet& registers_to_pop, const DynamicBitSet& registers_to_pop,
const OutSet& registers_to_clear) { const DynamicBitSet& registers_to_clear) {
for (int reg = max_register; reg >= 0; reg--) { for (int reg = max_register; reg >= 0; reg--) {
if (registers_to_pop.Get(reg)) { if (registers_to_pop.Get(reg)) {
assembler->PopRegister(reg); assembler->PopRegister(reg);
@ -356,9 +390,10 @@ void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler, void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int max_register, int max_register,
const OutSet& affected_registers, const DynamicBitSet& affected_registers,
OutSet* registers_to_pop, DynamicBitSet* registers_to_pop,
OutSet* registers_to_clear, Zone* zone) { DynamicBitSet* registers_to_clear,
Zone* zone) {
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
const int push_limit = (assembler->stack_limit_slack() + 1) / 2; const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
@ -502,7 +537,7 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
} }
// Generate deferred actions here along with code to undo them again. // Generate deferred actions here along with code to undo them again.
OutSet affected_registers; DynamicBitSet affected_registers;
if (backtrack() != nullptr) { if (backtrack() != nullptr) {
// Here we have a concrete backtrack location. These are set up by choice // Here we have a concrete backtrack location. These are set up by choice
@ -513,8 +548,8 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
int max_register = int max_register =
FindAffectedRegisters(&affected_registers, compiler->zone()); FindAffectedRegisters(&affected_registers, compiler->zone());
OutSet registers_to_pop; DynamicBitSet registers_to_pop;
OutSet registers_to_clear; DynamicBitSet registers_to_clear;
PerformDeferredActions(assembler, max_register, affected_registers, PerformDeferredActions(assembler, max_register, affected_registers,
&registers_to_pop, &registers_to_clear, &registers_to_pop, &registers_to_clear,
compiler->zone()); compiler->zone());
@ -2540,15 +2575,15 @@ void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
if (interval.to() - interval.from() >= kMapSize - 1) { if (interval.to() - interval.from() >= kMapSize - 1) {
if (map_count_ != kMapSize) { if (map_count_ != kMapSize) {
map_count_ = kMapSize; map_count_ = kMapSize;
for (int i = 0; i < kMapSize; i++) map_->at(i) = true; for (int i = 0; i < kMapSize; i++) map_.set(i);
} }
return; return;
} }
for (int i = interval.from(); i <= interval.to(); i++) { for (int i = interval.from(); i <= interval.to(); i++) {
int mod_character = (i & kMask); int mod_character = (i & kMask);
if (!map_->at(mod_character)) { if (!map_[mod_character]) {
map_count_++; map_count_++;
map_->at(mod_character) = true; map_.set(mod_character);
} }
if (map_count_ == kMapSize) return; if (map_count_ == kMapSize) return;
} }
@ -2558,7 +2593,7 @@ void BoyerMoorePositionInfo::SetAll() {
s_ = w_ = d_ = kLatticeUnknown; s_ = w_ = d_ = kLatticeUnknown;
if (map_count_ != kMapSize) { if (map_count_ != kMapSize) {
map_count_ = kMapSize; map_count_ = kMapSize;
for (int i = 0; i < kMapSize; i++) map_->at(i) = true; map_.set();
} }
} }
@ -2572,7 +2607,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
} }
bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone); bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone), zone); bitmaps_->Add(new (zone) BoyerMoorePositionInfo(), zone);
} }
} }
@ -3278,46 +3313,6 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
on_success()->Emit(compiler, trace); on_success()->Emit(compiler, trace);
} }
// -------------------------------------------------------------------
// Splay tree
OutSet* OutSet::Extend(unsigned value, Zone* zone) {
if (Get(value)) return this;
if (successors(zone) != nullptr) {
for (int i = 0; i < successors(zone)->length(); i++) {
OutSet* successor = successors(zone)->at(i);
if (successor->Get(value)) return successor;
}
} else {
successors_ = new (zone) ZoneList<OutSet*>(2, zone);
}
OutSet* result = new (zone) OutSet(first_, remaining_);
result->Set(value, zone);
successors(zone)->Add(result, zone);
return result;
}
void OutSet::Set(unsigned value, Zone* zone) {
if (value < kFirstLimit) {
first_ |= (1 << value);
} else {
if (remaining_ == nullptr)
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
if (remaining_->is_empty() || !remaining_->Contains(value))
remaining_->Add(value, zone);
}
}
bool OutSet::Get(unsigned value) const {
if (value < kFirstLimit) {
return (first_ & (1 << value)) != 0;
} else if (remaining_ == nullptr) {
return false;
} else {
return remaining_->Contains(value);
}
}
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Analysis // Analysis

View File

@ -5,12 +5,15 @@
#ifndef V8_REGEXP_REGEXP_COMPILER_H_ #ifndef V8_REGEXP_REGEXP_COMPILER_H_
#define V8_REGEXP_REGEXP_COMPILER_H_ #define V8_REGEXP_REGEXP_COMPILER_H_
#include <bitset>
#include "src/base/small-vector.h" #include "src/base/small-vector.h"
#include "src/regexp/regexp-nodes.h" #include "src/regexp/regexp-nodes.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class DynamicBitSet;
class Isolate; class Isolate;
namespace regexp_compiler_constants { namespace regexp_compiler_constants {
@ -75,34 +78,6 @@ inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
return IsUnicode(flags) && IgnoreCase(flags); return IsUnicode(flags) && IgnoreCase(flags);
} }
// A set of unsigned integers that behaves especially well on small
// integers (< 32). May do zone-allocation.
class OutSet : public ZoneObject {
public:
OutSet() : first_(0), remaining_(nullptr), successors_(nullptr) {}
OutSet* Extend(unsigned value, Zone* zone);
V8_EXPORT_PRIVATE bool Get(unsigned value) const;
static const unsigned kFirstLimit = 32;
private:
// Destructively set a value in this set. In most cases you want
// to use Extend instead to ensure that only one instance exists
// that contains the same values.
void Set(unsigned value, Zone* zone);
// The successors are a list of sets that contain the same values
// as this set and the one more value that is not present in this
// set.
ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
: first_(first), remaining_(remaining), successors_(nullptr) {}
uint32_t first_;
ZoneList<unsigned>* remaining_;
ZoneList<OutSet*>* successors_;
friend class Trace;
};
// Details of a quick mask-compare check that can look ahead in the // Details of a quick mask-compare check that can look ahead in the
// input stream. // input stream.
class QuickCheckDetails { class QuickCheckDetails {
@ -186,22 +161,10 @@ inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
class BoyerMoorePositionInfo : public ZoneObject { class BoyerMoorePositionInfo : public ZoneObject {
public: public:
explicit BoyerMoorePositionInfo(Zone* zone) bool at(int i) const { return map_[i]; }
: map_(new (zone) ZoneList<bool>(kMapSize, zone)),
map_count_(0),
w_(kNotYet),
s_(kNotYet),
d_(kNotYet),
surrogate_(kNotYet) {
for (int i = 0; i < kMapSize; i++) {
map_->Add(false, zone);
}
}
bool& at(int i) { return map_->at(i); } static constexpr int kMapSize = 128;
static constexpr int kMask = kMapSize - 1;
static const int kMapSize = 128;
static const int kMask = kMapSize - 1;
int map_count() const { return map_count_; } int map_count() const { return map_count_; }
@ -212,12 +175,12 @@ class BoyerMoorePositionInfo : public ZoneObject {
bool is_word() { return w_ == kLatticeIn; } bool is_word() { return w_ == kLatticeIn; }
private: private:
ZoneList<bool>* map_; std::bitset<kMapSize> map_;
int map_count_; // Number of set bits in the map. int map_count_ = 0; // Number of set bits in the map.
ContainedInLattice w_; // The \w character class. ContainedInLattice w_ = kNotYet; // The \w character class.
ContainedInLattice s_; // The \s character class. ContainedInLattice s_ = kNotYet; // The \s character class.
ContainedInLattice d_; // The \d character class. ContainedInLattice d_ = kNotYet; // The \d character class.
ContainedInLattice surrogate_; // Surrogate UTF-16 code units. ContainedInLattice surrogate_ = kNotYet; // Surrogate UTF-16 code units.
}; };
class BoyerMooreLookahead : public ZoneObject { class BoyerMooreLookahead : public ZoneObject {
@ -414,14 +377,14 @@ class Trace {
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler); void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
private: private:
int FindAffectedRegisters(OutSet* affected_registers, Zone* zone); int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone);
void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register, void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
const OutSet& affected_registers, const DynamicBitSet& affected_registers,
OutSet* registers_to_pop, DynamicBitSet* registers_to_pop,
OutSet* registers_to_clear, Zone* zone); DynamicBitSet* registers_to_clear, Zone* zone);
void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register, void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
const OutSet& registers_to_pop, const DynamicBitSet& registers_to_pop,
const OutSet& registers_to_clear); const DynamicBitSet& registers_to_clear);
int cp_offset_; int cp_offset_;
DeferredAction* actions_; DeferredAction* actions_;
Label* backtrack_; Label* backtrack_;