[regexp] Refactor OutSet and BoyerMoorePositionInfo
Outset: The more advanced features of OutSet are no longer used, thus the rename to DynamicBitSet to reflect its current purpose. BoyerMoorePositionInfo: Use bitset backing store in BoyerMoorePositionInfo (previously this was based on a (statically-sized) ZoneList<bool>). Bug: v8:9359 Change-Id: I40ca89467ae90ee90c616be5fd0d51e54e94e157 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1664064 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Peter Marshall <petermarshall@chromium.org> Cr-Commit-Position: refs/heads/master@{#62277}
This commit is contained in:
parent
983ab01a68
commit
4fe611ec58
@ -320,7 +320,41 @@ bool Trace::GetStoredPosition(int reg, int* cp_offset) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
|
// A (dynamically-sized) set of unsigned integers that behaves especially well
|
||||||
|
// on small integers (< kFirstLimit). May do zone-allocation.
|
||||||
|
class DynamicBitSet : public ZoneObject {
|
||||||
|
public:
|
||||||
|
V8_EXPORT_PRIVATE bool Get(unsigned value) const {
|
||||||
|
if (value < kFirstLimit) {
|
||||||
|
return (first_ & (1 << value)) != 0;
|
||||||
|
} else if (remaining_ == nullptr) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return remaining_->Contains(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Destructively set a value in this set.
|
||||||
|
void Set(unsigned value, Zone* zone) {
|
||||||
|
if (value < kFirstLimit) {
|
||||||
|
first_ |= (1 << value);
|
||||||
|
} else {
|
||||||
|
if (remaining_ == nullptr)
|
||||||
|
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
|
||||||
|
if (remaining_->is_empty() || !remaining_->Contains(value))
|
||||||
|
remaining_->Add(value, zone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr unsigned kFirstLimit = 32;
|
||||||
|
|
||||||
|
uint32_t first_ = 0;
|
||||||
|
ZoneList<unsigned>* remaining_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
|
||||||
|
Zone* zone) {
|
||||||
int max_register = RegExpCompiler::kNoRegister;
|
int max_register = RegExpCompiler::kNoRegister;
|
||||||
for (DeferredAction* action = actions_; action != nullptr;
|
for (DeferredAction* action = actions_; action != nullptr;
|
||||||
action = action->next()) {
|
action = action->next()) {
|
||||||
@ -339,8 +373,8 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers, Zone* zone) {
|
|||||||
|
|
||||||
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
||||||
int max_register,
|
int max_register,
|
||||||
const OutSet& registers_to_pop,
|
const DynamicBitSet& registers_to_pop,
|
||||||
const OutSet& registers_to_clear) {
|
const DynamicBitSet& registers_to_clear) {
|
||||||
for (int reg = max_register; reg >= 0; reg--) {
|
for (int reg = max_register; reg >= 0; reg--) {
|
||||||
if (registers_to_pop.Get(reg)) {
|
if (registers_to_pop.Get(reg)) {
|
||||||
assembler->PopRegister(reg);
|
assembler->PopRegister(reg);
|
||||||
@ -356,9 +390,10 @@ void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
|||||||
|
|
||||||
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||||
int max_register,
|
int max_register,
|
||||||
const OutSet& affected_registers,
|
const DynamicBitSet& affected_registers,
|
||||||
OutSet* registers_to_pop,
|
DynamicBitSet* registers_to_pop,
|
||||||
OutSet* registers_to_clear, Zone* zone) {
|
DynamicBitSet* registers_to_clear,
|
||||||
|
Zone* zone) {
|
||||||
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
|
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
|
||||||
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
|
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
|
||||||
|
|
||||||
@ -502,7 +537,7 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate deferred actions here along with code to undo them again.
|
// Generate deferred actions here along with code to undo them again.
|
||||||
OutSet affected_registers;
|
DynamicBitSet affected_registers;
|
||||||
|
|
||||||
if (backtrack() != nullptr) {
|
if (backtrack() != nullptr) {
|
||||||
// Here we have a concrete backtrack location. These are set up by choice
|
// Here we have a concrete backtrack location. These are set up by choice
|
||||||
@ -513,8 +548,8 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
|||||||
|
|
||||||
int max_register =
|
int max_register =
|
||||||
FindAffectedRegisters(&affected_registers, compiler->zone());
|
FindAffectedRegisters(&affected_registers, compiler->zone());
|
||||||
OutSet registers_to_pop;
|
DynamicBitSet registers_to_pop;
|
||||||
OutSet registers_to_clear;
|
DynamicBitSet registers_to_clear;
|
||||||
PerformDeferredActions(assembler, max_register, affected_registers,
|
PerformDeferredActions(assembler, max_register, affected_registers,
|
||||||
®isters_to_pop, ®isters_to_clear,
|
®isters_to_pop, ®isters_to_clear,
|
||||||
compiler->zone());
|
compiler->zone());
|
||||||
@ -2540,15 +2575,15 @@ void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
|
|||||||
if (interval.to() - interval.from() >= kMapSize - 1) {
|
if (interval.to() - interval.from() >= kMapSize - 1) {
|
||||||
if (map_count_ != kMapSize) {
|
if (map_count_ != kMapSize) {
|
||||||
map_count_ = kMapSize;
|
map_count_ = kMapSize;
|
||||||
for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
|
for (int i = 0; i < kMapSize; i++) map_.set(i);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (int i = interval.from(); i <= interval.to(); i++) {
|
for (int i = interval.from(); i <= interval.to(); i++) {
|
||||||
int mod_character = (i & kMask);
|
int mod_character = (i & kMask);
|
||||||
if (!map_->at(mod_character)) {
|
if (!map_[mod_character]) {
|
||||||
map_count_++;
|
map_count_++;
|
||||||
map_->at(mod_character) = true;
|
map_.set(mod_character);
|
||||||
}
|
}
|
||||||
if (map_count_ == kMapSize) return;
|
if (map_count_ == kMapSize) return;
|
||||||
}
|
}
|
||||||
@ -2558,7 +2593,7 @@ void BoyerMoorePositionInfo::SetAll() {
|
|||||||
s_ = w_ = d_ = kLatticeUnknown;
|
s_ = w_ = d_ = kLatticeUnknown;
|
||||||
if (map_count_ != kMapSize) {
|
if (map_count_ != kMapSize) {
|
||||||
map_count_ = kMapSize;
|
map_count_ = kMapSize;
|
||||||
for (int i = 0; i < kMapSize; i++) map_->at(i) = true;
|
map_.set();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2572,7 +2607,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
|
|||||||
}
|
}
|
||||||
bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
|
bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
|
||||||
for (int i = 0; i < length; i++) {
|
for (int i = 0; i < length; i++) {
|
||||||
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(zone), zone);
|
bitmaps_->Add(new (zone) BoyerMoorePositionInfo(), zone);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3278,46 +3313,6 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
on_success()->Emit(compiler, trace);
|
on_success()->Emit(compiler, trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
|
||||||
// Splay tree
|
|
||||||
|
|
||||||
OutSet* OutSet::Extend(unsigned value, Zone* zone) {
|
|
||||||
if (Get(value)) return this;
|
|
||||||
if (successors(zone) != nullptr) {
|
|
||||||
for (int i = 0; i < successors(zone)->length(); i++) {
|
|
||||||
OutSet* successor = successors(zone)->at(i);
|
|
||||||
if (successor->Get(value)) return successor;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
successors_ = new (zone) ZoneList<OutSet*>(2, zone);
|
|
||||||
}
|
|
||||||
OutSet* result = new (zone) OutSet(first_, remaining_);
|
|
||||||
result->Set(value, zone);
|
|
||||||
successors(zone)->Add(result, zone);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void OutSet::Set(unsigned value, Zone* zone) {
|
|
||||||
if (value < kFirstLimit) {
|
|
||||||
first_ |= (1 << value);
|
|
||||||
} else {
|
|
||||||
if (remaining_ == nullptr)
|
|
||||||
remaining_ = new (zone) ZoneList<unsigned>(1, zone);
|
|
||||||
if (remaining_->is_empty() || !remaining_->Contains(value))
|
|
||||||
remaining_->Add(value, zone);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OutSet::Get(unsigned value) const {
|
|
||||||
if (value < kFirstLimit) {
|
|
||||||
return (first_ & (1 << value)) != 0;
|
|
||||||
} else if (remaining_ == nullptr) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return remaining_->Contains(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
// -------------------------------------------------------------------
|
||||||
// Analysis
|
// Analysis
|
||||||
|
|
||||||
|
@ -5,12 +5,15 @@
|
|||||||
#ifndef V8_REGEXP_REGEXP_COMPILER_H_
|
#ifndef V8_REGEXP_REGEXP_COMPILER_H_
|
||||||
#define V8_REGEXP_REGEXP_COMPILER_H_
|
#define V8_REGEXP_REGEXP_COMPILER_H_
|
||||||
|
|
||||||
|
#include <bitset>
|
||||||
|
|
||||||
#include "src/base/small-vector.h"
|
#include "src/base/small-vector.h"
|
||||||
#include "src/regexp/regexp-nodes.h"
|
#include "src/regexp/regexp-nodes.h"
|
||||||
|
|
||||||
namespace v8 {
|
namespace v8 {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
class DynamicBitSet;
|
||||||
class Isolate;
|
class Isolate;
|
||||||
|
|
||||||
namespace regexp_compiler_constants {
|
namespace regexp_compiler_constants {
|
||||||
@ -75,34 +78,6 @@ inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
|
|||||||
return IsUnicode(flags) && IgnoreCase(flags);
|
return IsUnicode(flags) && IgnoreCase(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
// A set of unsigned integers that behaves especially well on small
|
|
||||||
// integers (< 32). May do zone-allocation.
|
|
||||||
class OutSet : public ZoneObject {
|
|
||||||
public:
|
|
||||||
OutSet() : first_(0), remaining_(nullptr), successors_(nullptr) {}
|
|
||||||
OutSet* Extend(unsigned value, Zone* zone);
|
|
||||||
V8_EXPORT_PRIVATE bool Get(unsigned value) const;
|
|
||||||
static const unsigned kFirstLimit = 32;
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Destructively set a value in this set. In most cases you want
|
|
||||||
// to use Extend instead to ensure that only one instance exists
|
|
||||||
// that contains the same values.
|
|
||||||
void Set(unsigned value, Zone* zone);
|
|
||||||
|
|
||||||
// The successors are a list of sets that contain the same values
|
|
||||||
// as this set and the one more value that is not present in this
|
|
||||||
// set.
|
|
||||||
ZoneList<OutSet*>* successors(Zone* zone) { return successors_; }
|
|
||||||
|
|
||||||
OutSet(uint32_t first, ZoneList<unsigned>* remaining)
|
|
||||||
: first_(first), remaining_(remaining), successors_(nullptr) {}
|
|
||||||
uint32_t first_;
|
|
||||||
ZoneList<unsigned>* remaining_;
|
|
||||||
ZoneList<OutSet*>* successors_;
|
|
||||||
friend class Trace;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Details of a quick mask-compare check that can look ahead in the
|
// Details of a quick mask-compare check that can look ahead in the
|
||||||
// input stream.
|
// input stream.
|
||||||
class QuickCheckDetails {
|
class QuickCheckDetails {
|
||||||
@ -186,22 +161,10 @@ inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
|
|||||||
|
|
||||||
class BoyerMoorePositionInfo : public ZoneObject {
|
class BoyerMoorePositionInfo : public ZoneObject {
|
||||||
public:
|
public:
|
||||||
explicit BoyerMoorePositionInfo(Zone* zone)
|
bool at(int i) const { return map_[i]; }
|
||||||
: map_(new (zone) ZoneList<bool>(kMapSize, zone)),
|
|
||||||
map_count_(0),
|
|
||||||
w_(kNotYet),
|
|
||||||
s_(kNotYet),
|
|
||||||
d_(kNotYet),
|
|
||||||
surrogate_(kNotYet) {
|
|
||||||
for (int i = 0; i < kMapSize; i++) {
|
|
||||||
map_->Add(false, zone);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool& at(int i) { return map_->at(i); }
|
static constexpr int kMapSize = 128;
|
||||||
|
static constexpr int kMask = kMapSize - 1;
|
||||||
static const int kMapSize = 128;
|
|
||||||
static const int kMask = kMapSize - 1;
|
|
||||||
|
|
||||||
int map_count() const { return map_count_; }
|
int map_count() const { return map_count_; }
|
||||||
|
|
||||||
@ -212,12 +175,12 @@ class BoyerMoorePositionInfo : public ZoneObject {
|
|||||||
bool is_word() { return w_ == kLatticeIn; }
|
bool is_word() { return w_ == kLatticeIn; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ZoneList<bool>* map_;
|
std::bitset<kMapSize> map_;
|
||||||
int map_count_; // Number of set bits in the map.
|
int map_count_ = 0; // Number of set bits in the map.
|
||||||
ContainedInLattice w_; // The \w character class.
|
ContainedInLattice w_ = kNotYet; // The \w character class.
|
||||||
ContainedInLattice s_; // The \s character class.
|
ContainedInLattice s_ = kNotYet; // The \s character class.
|
||||||
ContainedInLattice d_; // The \d character class.
|
ContainedInLattice d_ = kNotYet; // The \d character class.
|
||||||
ContainedInLattice surrogate_; // Surrogate UTF-16 code units.
|
ContainedInLattice surrogate_ = kNotYet; // Surrogate UTF-16 code units.
|
||||||
};
|
};
|
||||||
|
|
||||||
class BoyerMooreLookahead : public ZoneObject {
|
class BoyerMooreLookahead : public ZoneObject {
|
||||||
@ -414,14 +377,14 @@ class Trace {
|
|||||||
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
|
void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
|
int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone);
|
||||||
void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
|
void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
|
||||||
const OutSet& affected_registers,
|
const DynamicBitSet& affected_registers,
|
||||||
OutSet* registers_to_pop,
|
DynamicBitSet* registers_to_pop,
|
||||||
OutSet* registers_to_clear, Zone* zone);
|
DynamicBitSet* registers_to_clear, Zone* zone);
|
||||||
void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
|
void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
|
||||||
const OutSet& registers_to_pop,
|
const DynamicBitSet& registers_to_pop,
|
||||||
const OutSet& registers_to_clear);
|
const DynamicBitSet& registers_to_clear);
|
||||||
int cp_offset_;
|
int cp_offset_;
|
||||||
DeferredAction* actions_;
|
DeferredAction* actions_;
|
||||||
Label* backtrack_;
|
Label* backtrack_;
|
||||||
|
Loading…
Reference in New Issue
Block a user