Removed propagation of information about preceding nodes by expanding

following nodes.  Found a better solution.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1000 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
christian.plesner.hansen@gmail.com 2008-12-18 14:30:53 +00:00
parent 392e6f849b
commit e5270bd6e4
3 changed files with 17 additions and 491 deletions

View File

@ -2453,17 +2453,6 @@ void DotPrinter::PrintAttributes(RegExpNode* that) {
printer.PrintBit("NI", info->follows_newline_interest);
printer.PrintBit("WI", info->follows_word_interest);
printer.PrintBit("SI", info->follows_start_interest);
printer.PrintBit("DN", info->determine_newline);
printer.PrintBit("DW", info->determine_word);
printer.PrintBit("DS", info->determine_start);
printer.PrintBit("DDN", info->does_determine_newline);
printer.PrintBit("DDW", info->does_determine_word);
printer.PrintBit("DDS", info->does_determine_start);
printer.PrintPositive("IW", info->is_word);
printer.PrintPositive("IN", info->is_newline);
printer.PrintPositive("FN", info->follows_newline);
printer.PrintPositive("FW", info->follows_word);
printer.PrintPositive("FS", info->follows_start);
Label* label = that->label();
if (label->is_bound())
printer.PrintPositive("@", label->pos());
@ -3075,7 +3064,6 @@ RegExpNode* RegExpNode::TryGetSibling(NodeInfo* info) {
RegExpNode* RegExpNode::EnsureSibling(NodeInfo* info, bool* cloned) {
ASSERT_EQ(false, *cloned);
ASSERT(!info->HasAssertions());
siblings_.Ensure(this);
RegExpNode* result = TryGetSibling(info);
if (result != NULL) return result;
@ -3307,7 +3295,7 @@ OutSet* DispatchTable::Get(uc16 value) {
// Analysis
void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) {
void Analysis::EnsureAnalyzed(RegExpNode* that) {
if (that->info()->been_analyzed || that->info()->being_analyzed)
return;
that->info()->being_analyzed = true;
@ -3317,7 +3305,7 @@ void AssertionPropagation::EnsureAnalyzed(RegExpNode* that) {
}
void AssertionPropagation::VisitEnd(EndNode* that) {
void Analysis::VisitEnd(EndNode* that) {
// nothing to do
}
@ -3340,23 +3328,16 @@ void TextNode::CalculateOffsets() {
}
void AssertionPropagation::VisitText(TextNode* that) {
void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
that->MakeCaseIndependent();
}
EnsureAnalyzed(that->on_success());
NodeInfo* info = that->info();
NodeInfo* next_info = that->on_success()->info();
// If the following node is interested in what it follows then this
// node must determine it.
info->determine_newline = next_info->follows_newline_interest;
info->determine_word = next_info->follows_word_interest;
info->determine_start = next_info->follows_start_interest;
that->CalculateOffsets();
}
void AssertionPropagation::VisitAction(ActionNode* that) {
void Analysis::VisitAction(ActionNode* that) {
RegExpNode* target = that->on_success();
EnsureAnalyzed(target);
// If the next node is interested in what it follows then this node
@ -3365,7 +3346,7 @@ void AssertionPropagation::VisitAction(ActionNode* that) {
}
void AssertionPropagation::VisitChoice(ChoiceNode* that) {
void Analysis::VisitChoice(ChoiceNode* that) {
NodeInfo* info = that->info();
for (int i = 0; i < that->alternatives()->length(); i++) {
RegExpNode* node = that->alternatives()->at(i).node();
@ -3377,7 +3358,7 @@ void AssertionPropagation::VisitChoice(ChoiceNode* that) {
}
void AssertionPropagation::VisitLoopChoice(LoopChoiceNode* that) {
void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
NodeInfo* info = that->info();
for (int i = 0; i < that->alternatives()->length(); i++) {
RegExpNode* node = that->alternatives()->at(i).node();
@ -3393,211 +3374,11 @@ void AssertionPropagation::VisitLoopChoice(LoopChoiceNode* that) {
}
void AssertionPropagation::VisitBackReference(BackReferenceNode* that) {
void Analysis::VisitBackReference(BackReferenceNode* that) {
EnsureAnalyzed(that->on_success());
}
// -------------------------------------------------------------------
// Assumption expansion
RegExpNode* RegExpNode::EnsureExpanded(NodeInfo* info) {
siblings_.Ensure(this);
NodeInfo new_info = *this->info();
if (new_info.follows_word_interest)
new_info.follows_word = info->follows_word;
if (new_info.follows_newline_interest)
new_info.follows_newline = info->follows_newline;
// If the following node should determine something we need to get
// a sibling that determines it.
new_info.does_determine_newline = new_info.determine_newline;
new_info.does_determine_word = new_info.determine_word;
new_info.does_determine_start = new_info.determine_start;
RegExpNode* sibling = TryGetSibling(&new_info);
if (sibling == NULL) {
sibling = ExpandLocal(&new_info);
siblings_.Add(sibling);
sibling->info()->being_expanded = true;
sibling->ExpandChildren();
sibling->info()->being_expanded = false;
sibling->info()->been_expanded = true;
} else {
NodeInfo* sib_info = sibling->info();
if (!sib_info->been_expanded && !sib_info->being_expanded) {
sibling->info()->being_expanded = true;
sibling->ExpandChildren();
sibling->info()->being_expanded = false;
sibling->info()->been_expanded = true;
}
}
return sibling;
}
RegExpNode* ChoiceNode::ExpandLocal(NodeInfo* info) {
ChoiceNode* clone = this->Clone();
clone->info()->ResetCompilationState();
clone->info()->AddAssumptions(info);
return clone;
}
void ChoiceNode::ExpandChildren() {
ZoneList<GuardedAlternative>* alts = alternatives();
ZoneList<GuardedAlternative>* new_alts
= new ZoneList<GuardedAlternative>(alts->length());
for (int i = 0; i < alts->length(); i++) {
GuardedAlternative next = alts->at(i);
next.set_node(next.node()->EnsureExpanded(info()));
new_alts->Add(next);
}
alternatives_ = new_alts;
}
RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
TextElement last = elements()->last();
if (last.type == TextElement::CHAR_CLASS) {
RegExpCharacterClass* char_class = last.data.u_char_class;
if (info->does_determine_word) {
ZoneList<CharacterRange>* word = NULL;
ZoneList<CharacterRange>* non_word = NULL;
CharacterRange::Split(char_class->ranges(),
CharacterRange::GetWordBounds(),
&word,
&non_word);
if (non_word == NULL) {
// This node contains no non-word characters so it must be
// all word.
this->info()->is_word = NodeInfo::TRUE;
} else if (word == NULL) {
// Vice versa.
this->info()->is_word = NodeInfo::FALSE;
} else {
// If this character class contains both word and non-word
// characters we need to split it into two.
ChoiceNode* result = new ChoiceNode(2);
// Welcome to the family, son!
result->set_siblings(this->siblings());
*result->info() = *this->info();
result->info()->ResetCompilationState();
result->info()->AddAssumptions(info);
RegExpNode* word_node
= new TextNode(new RegExpCharacterClass(word, false),
on_success());
word_node->info()->determine_word = true;
word_node->info()->does_determine_word = true;
word_node->info()->is_word = NodeInfo::TRUE;
result->alternatives()->Add(GuardedAlternative(word_node));
RegExpNode* non_word_node
= new TextNode(new RegExpCharacterClass(non_word, false),
on_success());
non_word_node->info()->determine_word = true;
non_word_node->info()->does_determine_word = true;
non_word_node->info()->is_word = NodeInfo::FALSE;
result->alternatives()->Add(GuardedAlternative(non_word_node));
return result;
}
}
}
TextNode* clone = this->Clone();
clone->info()->ResetCompilationState();
clone->info()->AddAssumptions(info);
return clone;
}
void TextNode::ExpandAtomChildren(RegExpAtom* that) {
NodeInfo new_info = *info();
uc16 last = that->data()[that->data().length() - 1];
if (info()->determine_word) {
new_info.follows_word = IsRegExpWord(last)
? NodeInfo::TRUE : NodeInfo::FALSE;
} else {
new_info.follows_word = NodeInfo::UNKNOWN;
}
if (info()->determine_newline) {
new_info.follows_newline = IsRegExpNewline(last)
? NodeInfo::TRUE : NodeInfo::FALSE;
} else {
new_info.follows_newline = NodeInfo::UNKNOWN;
}
if (info()->determine_start) {
new_info.follows_start = NodeInfo::FALSE;
} else {
new_info.follows_start = NodeInfo::UNKNOWN;
}
set_on_success(on_success()->EnsureExpanded(&new_info));
}
void TextNode::ExpandCharClassChildren(RegExpCharacterClass* that) {
if (info()->does_determine_word) {
// ASSERT(info()->is_word != NodeInfo::UNKNOWN);
NodeInfo next_info = *on_success()->info();
next_info.follows_word = info()->is_word;
set_on_success(on_success()->EnsureExpanded(&next_info));
} else {
set_on_success(on_success()->EnsureExpanded(info()));
}
}
void TextNode::ExpandChildren() {
TextElement last = elements()->last();
switch (last.type) {
case TextElement::ATOM:
ExpandAtomChildren(last.data.u_atom);
break;
case TextElement::CHAR_CLASS:
ExpandCharClassChildren(last.data.u_char_class);
break;
default:
UNREACHABLE();
}
}
RegExpNode* ActionNode::ExpandLocal(NodeInfo* info) {
ActionNode* clone = this->Clone();
clone->info()->ResetCompilationState();
clone->info()->AddAssumptions(info);
return clone;
}
void ActionNode::ExpandChildren() {
set_on_success(on_success()->EnsureExpanded(info()));
}
RegExpNode* BackReferenceNode::ExpandLocal(NodeInfo* info) {
BackReferenceNode* clone = this->Clone();
clone->info()->ResetCompilationState();
clone->info()->AddAssumptions(info);
return clone;
}
void BackReferenceNode::ExpandChildren() {
set_on_success(on_success()->EnsureExpanded(info()));
}
RegExpNode* EndNode::ExpandLocal(NodeInfo* info) {
EndNode* clone = this->Clone();
clone->info()->ResetCompilationState();
clone->info()->AddAssumptions(info);
return clone;
}
void EndNode::ExpandChildren() {
// nothing to do
}
// -------------------------------------------------------------------
// Dispatch table construction
@ -3708,110 +3489,6 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
}
#ifdef DEBUG
class VisitNodeScope {
public:
explicit VisitNodeScope(RegExpNode* node) : node_(node) {
ASSERT(!node->info()->visited);
node->info()->visited = true;
}
~VisitNodeScope() {
node_->info()->visited = false;
}
private:
RegExpNode* node_;
};
class NodeValidator : public NodeVisitor {
public:
virtual void ValidateInfo(NodeInfo* info) = 0;
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
};
class PostAnalysisNodeValidator : public NodeValidator {
public:
virtual void ValidateInfo(NodeInfo* info);
};
class PostExpansionNodeValidator : public NodeValidator {
public:
virtual void ValidateInfo(NodeInfo* info);
};
void PostAnalysisNodeValidator::ValidateInfo(NodeInfo* info) {
ASSERT(info->been_analyzed);
}
void PostExpansionNodeValidator::ValidateInfo(NodeInfo* info) {
ASSERT_EQ(info->determine_newline, info->does_determine_newline);
ASSERT_EQ(info->determine_start, info->does_determine_start);
ASSERT_EQ(info->determine_word, info->does_determine_word);
ASSERT_EQ(info->follows_word_interest,
(info->follows_word != NodeInfo::UNKNOWN));
if (false) {
// These are still unimplemented.
ASSERT_EQ(info->follows_start_interest,
(info->follows_start != NodeInfo::UNKNOWN));
ASSERT_EQ(info->follows_newline_interest,
(info->follows_newline != NodeInfo::UNKNOWN));
}
}
void NodeValidator::VisitAction(ActionNode* that) {
if (that->info()->visited) return;
VisitNodeScope scope(that);
ValidateInfo(that->info());
that->on_success()->Accept(this);
}
void NodeValidator::VisitBackReference(BackReferenceNode* that) {
if (that->info()->visited) return;
VisitNodeScope scope(that);
ValidateInfo(that->info());
that->on_success()->Accept(this);
}
void NodeValidator::VisitChoice(ChoiceNode* that) {
if (that->info()->visited) return;
VisitNodeScope scope(that);
ValidateInfo(that->info());
ZoneList<GuardedAlternative>* alts = that->alternatives();
for (int i = 0; i < alts->length(); i++)
alts->at(i).node()->Accept(this);
}
void NodeValidator::VisitEnd(EndNode* that) {
if (that->info()->visited) return;
VisitNodeScope scope(that);
ValidateInfo(that->info());
}
void NodeValidator::VisitText(TextNode* that) {
if (that->info()->visited) return;
VisitNodeScope scope(that);
ValidateInfo(that->info());
that->on_success()->Accept(this);
}
#endif
Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
bool ignore_case,
bool is_multiline,
@ -3834,43 +3511,16 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
new RegExpCharacterClass('*'),
&compiler,
captured_body);
AssertionPropagation analysis(ignore_case);
data->node = node;
Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
NodeInfo info = *node->info();
data->has_lookbehind = info.HasLookbehind();
if (data->has_lookbehind) {
// If this node needs information about the preceding text we let
// it start with a character class that consumes a single character
// and proceeds to wherever is appropriate. This means that if
// has_lookbehind is set the code generator must start one character
// before the start position.
node = new TextNode(new RegExpCharacterClass('*'), node);
analysis.EnsureAnalyzed(node);
}
#ifdef DEBUG
PostAnalysisNodeValidator post_analysis_validator;
node->Accept(&post_analysis_validator);
#endif
node = node->EnsureExpanded(&info);
#ifdef DEBUG
PostExpansionNodeValidator post_expansion_validator;
node->Accept(&post_expansion_validator);
#endif
data->node = node;
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
if (data->has_lookbehind) {
return Handle<FixedArray>::null();
}
if (FLAG_irregexp_native) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.

View File

@ -460,23 +460,10 @@ struct NodeInfo {
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
being_expanded(false),
been_expanded(false),
determine_word(false),
determine_newline(false),
determine_start(false),
does_determine_word(false),
does_determine_newline(false),
does_determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false),
is_word(UNKNOWN),
is_newline(UNKNOWN),
at_end(false),
follows_word(UNKNOWN),
follows_newline(UNKNOWN),
follows_start(UNKNOWN),
visited(false) { }
// Returns true if the interests and assumptions of this node
@ -485,19 +472,7 @@ struct NodeInfo {
return (at_end == that->at_end) &&
(follows_word_interest == that->follows_word_interest) &&
(follows_newline_interest == that->follows_newline_interest) &&
(follows_start_interest == that->follows_start_interest) &&
(follows_word == that->follows_word) &&
(follows_newline == that->follows_newline) &&
(follows_start == that->follows_start) &&
(does_determine_word == that->does_determine_word) &&
(does_determine_newline == that->does_determine_newline) &&
(does_determine_start == that->does_determine_start);
}
bool HasAssertions() {
return (follows_word != UNKNOWN) ||
(follows_newline != UNKNOWN) ||
(follows_start != UNKNOWN);
(follows_start_interest == that->follows_start_interest);
}
// Updates the interests of this node given the interests of the
@ -509,26 +484,6 @@ struct NodeInfo {
follows_start_interest |= that->follows_start_interest;
}
void AddAssumptions(NodeInfo* that) {
if (that->follows_word != UNKNOWN) {
ASSERT(follows_word == UNKNOWN || follows_word == that->follows_word);
follows_word = that->follows_word;
}
if (that->follows_newline != UNKNOWN) {
ASSERT(follows_newline == UNKNOWN ||
follows_newline == that->follows_newline);
follows_newline = that->follows_newline;
}
if (that->follows_start != UNKNOWN) {
ASSERT(follows_start == UNKNOWN ||
follows_start == that->follows_start);
follows_start = that->follows_start;
}
does_determine_word = that->does_determine_word;
does_determine_newline = that->does_determine_newline;
does_determine_start = that->does_determine_start;
}
bool HasLookbehind() {
return follows_word_interest ||
follows_newline_interest ||
@ -546,25 +501,10 @@ struct NodeInfo {
void ResetCompilationState() {
being_analyzed = false;
been_analyzed = false;
being_expanded = false;
been_expanded = false;
}
bool being_analyzed: 1;
bool been_analyzed: 1;
bool being_expanded: 1;
bool been_expanded: 1;
// These bits are set if this node must propagate forward information
// about the last character it consumed (or, in the case of 'start',
// if it is at the start of the input).
bool determine_word: 1;
bool determine_newline: 1;
bool determine_start: 1;
bool does_determine_word: 1;
bool does_determine_newline: 1;
bool does_determine_start: 1;
// These bits are set of this node has to know what the preceding
// character was.
@ -572,35 +512,11 @@ struct NodeInfo {
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
TriBool is_word: 2;
TriBool is_newline: 2;
bool at_end: 1;
// These bits are set if the node can make assumptions about what
// the previous character was.
TriBool follows_word: 2;
TriBool follows_newline: 2;
TriBool follows_start: 2;
bool visited: 1;
};
class ExpansionGuard {
public:
explicit inline ExpansionGuard(NodeInfo* info) : info_(info) {
ASSERT(!info->being_expanded);
info->being_expanded = true;
}
inline ~ExpansionGuard() {
info_->being_expanded = false;
}
private:
NodeInfo* info_;
};
class SiblingList {
public:
SiblingList() : list_(NULL) { }
@ -634,10 +550,6 @@ class RegExpNode: public ZoneObject {
Label* label() { return &label_; }
static const int kMaxVariantsGenerated = 10;
RegExpNode* EnsureExpanded(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
virtual void ExpandChildren() = 0;
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
@ -721,8 +633,6 @@ class ActionNode: public SeqRegExpNode {
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual RegExpNode* PropagateForward(NodeInfo* info);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
@ -768,8 +678,6 @@ class TextNode: public SeqRegExpNode {
}
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
@ -780,10 +688,8 @@ class TextNode: public SeqRegExpNode {
return result;
}
void CalculateOffsets();
private:
void ExpandAtomChildren(RegExpAtom* that);
void ExpandCharClassChildren(RegExpCharacterClass* that);
private:
ZoneList<TextElement>* elms_;
};
@ -801,8 +707,6 @@ class BackReferenceNode: public SeqRegExpNode {
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
private:
@ -818,8 +722,6 @@ class EndNode: public RegExpNode {
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual EndNode* Clone() { return new EndNode(*this); }
protected:
@ -888,8 +790,6 @@ class ChoiceNode: public RegExpNode {
DispatchTable* GetTable(bool ignore_case);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
bool being_calculated() { return being_calculated_; }
@ -901,7 +801,7 @@ class ChoiceNode: public RegExpNode {
private:
friend class DispatchTableConstructor;
friend class AssertionPropagation;
friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
GenerationVariant* variant);
@ -1091,33 +991,9 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
// +-------+ ---> +------------+
// | word? | | check word |
// +-------+ +------------+
//
// At a later phase all nodes that determine information for their
// following nodes are split into several 'sibling' nodes. In this
// case the first '.' is split into one node that only matches words
// and one that only matches non-words. The second '.' is also split,
// into one node that assumes that the previous character was a word
// character and one that assumes that is was non-word. In this case
// the result is
//
// +------------------+ +------------------+
// /--> | intersect(., \w) | ---> | intersect(., \W) |
// | +------------------+ +------------------+
// | | follows \w |
// | +------------------+
// --?
// | +------------------+ +------------------+
// \--> | intersect(., \W) | ---> | intersect(., \w) |
// +------------------+ +------------------+
// | follows \W |
// +------------------+
//
// This way we don't need to explicitly check the previous character
// but can always assume that whoever consumed the previous character
// has propagated the relevant information forward.
class AssertionPropagation: public NodeVisitor {
class Analysis: public NodeVisitor {
public:
explicit AssertionPropagation(bool ignore_case)
explicit Analysis(bool ignore_case)
: ignore_case_(ignore_case) { }
void EnsureAnalyzed(RegExpNode* node);
@ -1130,7 +1006,7 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
private:
bool ignore_case_;
DISALLOW_IMPLICIT_CONSTRUCTORS(AssertionPropagation);
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
@ -1138,12 +1014,10 @@ struct RegExpCompileData {
RegExpCompileData()
: tree(NULL),
node(NULL),
has_lookbehind(false),
simple(true),
capture_count(0) { }
RegExpTree* tree;
RegExpNode* node;
bool has_lookbehind;
bool simple;
Handle<String> error;
int capture_count;

View File

@ -4192,6 +4192,8 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
Advance(2);
return CharacterRange::Singleton(0); // Return dummy value.
}
case kEndMarker:
ReportError(CStrVector("\\ at end of pattern") CHECK_FAILED);
default:
uc32 c = ParseClassCharacterEscape(CHECK_FAILED);
return CharacterRange::Singleton(c);