Limit work done analyzing regexps with very large fanout.
BUG=128821 Review URL: https://chromiumcodereview.appspot.com/10448117 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11696 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
d87249945f
commit
ae4fcd9702
@ -2192,12 +2192,14 @@ int ActionNode::EatsAtLeast(int still_to_find,
|
|||||||
|
|
||||||
void ActionNode::FillInBMInfo(int offset,
|
void ActionNode::FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
if (type_ == BEGIN_SUBMATCH) {
|
if (type_ == BEGIN_SUBMATCH) {
|
||||||
bm->SetRest(offset);
|
bm->SetRest(offset);
|
||||||
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
|
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
|
||||||
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
on_success()->FillInBMInfo(
|
||||||
|
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||||
}
|
}
|
||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
}
|
}
|
||||||
@ -2221,11 +2223,13 @@ int AssertionNode::EatsAtLeast(int still_to_find,
|
|||||||
|
|
||||||
void AssertionNode::FillInBMInfo(int offset,
|
void AssertionNode::FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
// Match the behaviour of EatsAtLeast on this node.
|
// Match the behaviour of EatsAtLeast on this node.
|
||||||
if (type() == AT_START && not_at_start) return;
|
if (type() == AT_START && not_at_start) return;
|
||||||
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
on_success()->FillInBMInfo(
|
||||||
|
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2808,15 +2812,18 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
|||||||
|
|
||||||
void LoopChoiceNode::FillInBMInfo(int offset,
|
void LoopChoiceNode::FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
if (body_can_be_zero_length_ ||
|
if (body_can_be_zero_length_ ||
|
||||||
recursion_depth > RegExpCompiler::kMaxRecursion) {
|
recursion_depth > RegExpCompiler::kMaxRecursion ||
|
||||||
|
budget <= 0) {
|
||||||
bm->SetRest(offset);
|
bm->SetRest(offset);
|
||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
ChoiceNode::FillInBMInfo(
|
||||||
|
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2918,7 +2925,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
if (eats_at_least >= 1) {
|
if (eats_at_least >= 1) {
|
||||||
BoyerMooreLookahead* bm =
|
BoyerMooreLookahead* bm =
|
||||||
new BoyerMooreLookahead(eats_at_least, compiler);
|
new BoyerMooreLookahead(eats_at_least, compiler);
|
||||||
FillInBMInfo(0, 0, bm, not_at_start);
|
FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
|
||||||
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
|
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
|
||||||
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
|
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
|
||||||
}
|
}
|
||||||
@ -3856,7 +3863,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
BoyerMooreLookahead* bm =
|
BoyerMooreLookahead* bm =
|
||||||
new BoyerMooreLookahead(eats_at_least, compiler);
|
new BoyerMooreLookahead(eats_at_least, compiler);
|
||||||
GuardedAlternative alt0 = alternatives_->at(0);
|
GuardedAlternative alt0 = alternatives_->at(0);
|
||||||
alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
|
alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
|
||||||
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
|
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -5597,6 +5604,7 @@ void Analysis::VisitAssertion(AssertionNode* that) {
|
|||||||
|
|
||||||
void BackReferenceNode::FillInBMInfo(int offset,
|
void BackReferenceNode::FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
// Working out the set of characters that a backreference can match is too
|
// Working out the set of characters that a backreference can match is too
|
||||||
@ -5612,9 +5620,11 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
|
|||||||
|
|
||||||
void ChoiceNode::FillInBMInfo(int offset,
|
void ChoiceNode::FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
ZoneList<GuardedAlternative>* alts = alternatives();
|
ZoneList<GuardedAlternative>* alts = alternatives();
|
||||||
|
budget = (budget - 1) / alts->length();
|
||||||
for (int i = 0; i < alts->length(); i++) {
|
for (int i = 0; i < alts->length(); i++) {
|
||||||
GuardedAlternative& alt = alts->at(i);
|
GuardedAlternative& alt = alts->at(i);
|
||||||
if (alt.guards() != NULL && alt.guards()->length() != 0) {
|
if (alt.guards() != NULL && alt.guards()->length() != 0) {
|
||||||
@ -5622,7 +5632,8 @@ void ChoiceNode::FillInBMInfo(int offset,
|
|||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
alt.node()->FillInBMInfo(
|
||||||
|
offset, recursion_depth + 1, budget, bm, not_at_start);
|
||||||
}
|
}
|
||||||
SaveBMInfo(bm, not_at_start, offset);
|
SaveBMInfo(bm, not_at_start, offset);
|
||||||
}
|
}
|
||||||
@ -5630,6 +5641,7 @@ void ChoiceNode::FillInBMInfo(int offset,
|
|||||||
|
|
||||||
void TextNode::FillInBMInfo(int initial_offset,
|
void TextNode::FillInBMInfo(int initial_offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
if (initial_offset >= bm->length()) return;
|
if (initial_offset >= bm->length()) return;
|
||||||
@ -5686,6 +5698,7 @@ void TextNode::FillInBMInfo(int initial_offset,
|
|||||||
}
|
}
|
||||||
on_success()->FillInBMInfo(offset,
|
on_success()->FillInBMInfo(offset,
|
||||||
recursion_depth + 1,
|
recursion_depth + 1,
|
||||||
|
budget - 1,
|
||||||
bm,
|
bm,
|
||||||
true); // Not at start after a text node.
|
true); // Not at start after a text node.
|
||||||
if (initial_offset == 0) set_bm_info(not_at_start, bm);
|
if (initial_offset == 0) set_bm_info(not_at_start, bm);
|
||||||
|
@ -580,9 +580,12 @@ class RegExpNode: public ZoneObject {
|
|||||||
// Collects information on the possible code units (mod 128) that can match if
|
// Collects information on the possible code units (mod 128) that can match if
|
||||||
// we look forward. This is used for a Boyer-Moore-like string searching
|
// we look forward. This is used for a Boyer-Moore-like string searching
|
||||||
// implementation. TODO(erikcorry): This should share more code with
|
// implementation. TODO(erikcorry): This should share more code with
|
||||||
// EatsAtLeast, GetQuickCheckDetails.
|
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
|
||||||
|
// the number of nodes we are willing to look at in order to create this data.
|
||||||
|
static const int kFillInBMBudget = 200;
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
@ -685,9 +688,11 @@ class SeqRegExpNode: public RegExpNode {
|
|||||||
virtual RegExpNode* FilterASCII(int depth);
|
virtual RegExpNode* FilterASCII(int depth);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
on_success_->FillInBMInfo(
|
||||||
|
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -742,6 +747,7 @@ class ActionNode: public SeqRegExpNode {
|
|||||||
}
|
}
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
Type type() { return type_; }
|
Type type() { return type_; }
|
||||||
@ -813,6 +819,7 @@ class TextNode: public SeqRegExpNode {
|
|||||||
RegExpCompiler* compiler);
|
RegExpCompiler* compiler);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
void CalculateOffsets();
|
void CalculateOffsets();
|
||||||
@ -875,6 +882,7 @@ class AssertionNode: public SeqRegExpNode {
|
|||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
AssertionNodeType type() { return type_; }
|
AssertionNodeType type() { return type_; }
|
||||||
@ -915,6 +923,7 @@ class BackReferenceNode: public SeqRegExpNode {
|
|||||||
}
|
}
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
|
|
||||||
@ -942,6 +951,7 @@ class EndNode: public RegExpNode {
|
|||||||
}
|
}
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
// Returning 0 from EatsAtLeast should ensure we never get here.
|
// Returning 0 from EatsAtLeast should ensure we never get here.
|
||||||
@ -1034,6 +1044,7 @@ class ChoiceNode: public RegExpNode {
|
|||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
|
|
||||||
@ -1086,10 +1097,11 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
|
|||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start) {
|
bool not_at_start) {
|
||||||
alternatives_->at(1).node()->FillInBMInfo(
|
alternatives_->at(1).node()->FillInBMInfo(
|
||||||
offset, recursion_depth + 1, bm, not_at_start);
|
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||||
}
|
}
|
||||||
// For a negative lookahead we don't emit the quick check for the
|
// For a negative lookahead we don't emit the quick check for the
|
||||||
@ -1121,6 +1133,7 @@ class LoopChoiceNode: public ChoiceNode {
|
|||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
virtual void FillInBMInfo(int offset,
|
virtual void FillInBMInfo(int offset,
|
||||||
int recursion_depth,
|
int recursion_depth,
|
||||||
|
int budget,
|
||||||
BoyerMooreLookahead* bm,
|
BoyerMooreLookahead* bm,
|
||||||
bool not_at_start);
|
bool not_at_start);
|
||||||
RegExpNode* loop_node() { return loop_node_; }
|
RegExpNode* loop_node() { return loop_node_; }
|
||||||
|
@ -162,7 +162,6 @@ assertEquals("*foo * baz", a);
|
|||||||
// string we can test that the relevant node is removed by verifying that
|
// string we can test that the relevant node is removed by verifying that
|
||||||
// there is no hang.
|
// there is no hang.
|
||||||
function NoHang(re) {
|
function NoHang(re) {
|
||||||
print(re);
|
|
||||||
"This is an ASCII string that could take forever".match(re);
|
"This is an ASCII string that could take forever".match(re);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,3 +56,5 @@ assertEquals(["bbc", "b"], /^(b+|a){1,2}?bc/.exec("bbc"));
|
|||||||
assertEquals(["bbaa", "a", "", "a"],
|
assertEquals(["bbaa", "a", "", "a"],
|
||||||
/((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba"));
|
/((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba"));
|
||||||
|
|
||||||
|
// From crbug.com/128821 - don't hang:
|
||||||
|
"".match(/((a|i|A|I|u|o|U|O)(s|c|b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|v|w|x|y|z|B|C|D|F|G|H|J|K|L|M|N|P|Q|R|S|T|V|W|X|Y|Z)*) de\/da([.,!?\s]|$)/);
|
||||||
|
Loading…
Reference in New Issue
Block a user