Limit work done analyzing regexps with very large fanout.

BUG=128821
Review URL: https://chromiumcodereview.appspot.com/10448117

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11696 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2012-06-01 11:28:52 +00:00
parent d87249945f
commit ae4fcd9702
4 changed files with 38 additions and 11 deletions

View File

@ -2192,12 +2192,14 @@ int ActionNode::EatsAtLeast(int still_to_find,
void ActionNode::FillInBMInfo(int offset, void ActionNode::FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
if (type_ == BEGIN_SUBMATCH) { if (type_ == BEGIN_SUBMATCH) {
bm->SetRest(offset); bm->SetRest(offset);
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); on_success()->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
} }
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
} }
@ -2221,11 +2223,13 @@ int AssertionNode::EatsAtLeast(int still_to_find,
void AssertionNode::FillInBMInfo(int offset, void AssertionNode::FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
// Match the behaviour of EatsAtLeast on this node. // Match the behaviour of EatsAtLeast on this node.
if (type() == AT_START && not_at_start) return; if (type() == AT_START && not_at_start) return;
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); on_success()->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
} }
@ -2808,15 +2812,18 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
void LoopChoiceNode::FillInBMInfo(int offset, void LoopChoiceNode::FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
if (body_can_be_zero_length_ || if (body_can_be_zero_length_ ||
recursion_depth > RegExpCompiler::kMaxRecursion) { recursion_depth > RegExpCompiler::kMaxRecursion ||
budget <= 0) {
bm->SetRest(offset); bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
return; return;
} }
ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); ChoiceNode::FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
} }
@ -2918,7 +2925,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
if (eats_at_least >= 1) { if (eats_at_least >= 1) {
BoyerMooreLookahead* bm = BoyerMooreLookahead* bm =
new BoyerMooreLookahead(eats_at_least, compiler); new BoyerMooreLookahead(eats_at_least, compiler);
FillInBMInfo(0, 0, bm, not_at_start); FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
} }
@ -3856,7 +3863,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
BoyerMooreLookahead* bm = BoyerMooreLookahead* bm =
new BoyerMooreLookahead(eats_at_least, compiler); new BoyerMooreLookahead(eats_at_least, compiler);
GuardedAlternative alt0 = alternatives_->at(0); GuardedAlternative alt0 = alternatives_->at(0);
alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
} }
} else { } else {
@ -5597,6 +5604,7 @@ void Analysis::VisitAssertion(AssertionNode* that) {
void BackReferenceNode::FillInBMInfo(int offset, void BackReferenceNode::FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
// Working out the set of characters that a backreference can match is too // Working out the set of characters that a backreference can match is too
@ -5612,9 +5620,11 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
void ChoiceNode::FillInBMInfo(int offset, void ChoiceNode::FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
ZoneList<GuardedAlternative>* alts = alternatives(); ZoneList<GuardedAlternative>* alts = alternatives();
budget = (budget - 1) / alts->length();
for (int i = 0; i < alts->length(); i++) { for (int i = 0; i < alts->length(); i++) {
GuardedAlternative& alt = alts->at(i); GuardedAlternative& alt = alts->at(i);
if (alt.guards() != NULL && alt.guards()->length() != 0) { if (alt.guards() != NULL && alt.guards()->length() != 0) {
@ -5622,7 +5632,8 @@ void ChoiceNode::FillInBMInfo(int offset,
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
return; return;
} }
alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); alt.node()->FillInBMInfo(
offset, recursion_depth + 1, budget, bm, not_at_start);
} }
SaveBMInfo(bm, not_at_start, offset); SaveBMInfo(bm, not_at_start, offset);
} }
@ -5630,6 +5641,7 @@ void ChoiceNode::FillInBMInfo(int offset,
void TextNode::FillInBMInfo(int initial_offset, void TextNode::FillInBMInfo(int initial_offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
if (initial_offset >= bm->length()) return; if (initial_offset >= bm->length()) return;
@ -5686,6 +5698,7 @@ void TextNode::FillInBMInfo(int initial_offset,
} }
on_success()->FillInBMInfo(offset, on_success()->FillInBMInfo(offset,
recursion_depth + 1, recursion_depth + 1,
budget - 1,
bm, bm,
true); // Not at start after a text node. true); // Not at start after a text node.
if (initial_offset == 0) set_bm_info(not_at_start, bm); if (initial_offset == 0) set_bm_info(not_at_start, bm);

View File

@ -580,9 +580,12 @@ class RegExpNode: public ZoneObject {
// Collects information on the possible code units (mod 128) that can match if // Collects information on the possible code units (mod 128) that can match if
// we look forward. This is used for a Boyer-Moore-like string searching // we look forward. This is used for a Boyer-Moore-like string searching
// implementation. TODO(erikcorry): This should share more code with // implementation. TODO(erikcorry): This should share more code with
// EatsAtLeast, GetQuickCheckDetails. // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
// the number of nodes we are willing to look at in order to create this data.
static const int kFillInBMBudget = 200;
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
UNREACHABLE(); UNREACHABLE();
@ -685,9 +688,11 @@ class SeqRegExpNode: public RegExpNode {
virtual RegExpNode* FilterASCII(int depth); virtual RegExpNode* FilterASCII(int depth);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); on_success_->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm); if (offset == 0) set_bm_info(not_at_start, bm);
} }
@ -742,6 +747,7 @@ class ActionNode: public SeqRegExpNode {
} }
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
Type type() { return type_; } Type type() { return type_; }
@ -813,6 +819,7 @@ class TextNode: public SeqRegExpNode {
RegExpCompiler* compiler); RegExpCompiler* compiler);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
void CalculateOffsets(); void CalculateOffsets();
@ -875,6 +882,7 @@ class AssertionNode: public SeqRegExpNode {
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
AssertionNodeType type() { return type_; } AssertionNodeType type() { return type_; }
@ -915,6 +923,7 @@ class BackReferenceNode: public SeqRegExpNode {
} }
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
@ -942,6 +951,7 @@ class EndNode: public RegExpNode {
} }
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
// Returning 0 from EatsAtLeast should ensure we never get here. // Returning 0 from EatsAtLeast should ensure we never get here.
@ -1034,6 +1044,7 @@ class ChoiceNode: public RegExpNode {
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
@ -1086,10 +1097,11 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start) { bool not_at_start) {
alternatives_->at(1).node()->FillInBMInfo( alternatives_->at(1).node()->FillInBMInfo(
offset, recursion_depth + 1, bm, not_at_start); offset, recursion_depth + 1, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm); if (offset == 0) set_bm_info(not_at_start, bm);
} }
// For a negative lookahead we don't emit the quick check for the // For a negative lookahead we don't emit the quick check for the
@ -1121,6 +1133,7 @@ class LoopChoiceNode: public ChoiceNode {
bool not_at_start); bool not_at_start);
virtual void FillInBMInfo(int offset, virtual void FillInBMInfo(int offset,
int recursion_depth, int recursion_depth,
int budget,
BoyerMooreLookahead* bm, BoyerMooreLookahead* bm,
bool not_at_start); bool not_at_start);
RegExpNode* loop_node() { return loop_node_; } RegExpNode* loop_node() { return loop_node_; }

View File

@ -162,7 +162,6 @@ assertEquals("*foo * baz", a);
// string we can test that the relevant node is removed by verifying that // string we can test that the relevant node is removed by verifying that
// there is no hang. // there is no hang.
function NoHang(re) { function NoHang(re) {
print(re);
"This is an ASCII string that could take forever".match(re); "This is an ASCII string that could take forever".match(re);
} }

View File

@ -56,3 +56,5 @@ assertEquals(["bbc", "b"], /^(b+|a){1,2}?bc/.exec("bbc"));
assertEquals(["bbaa", "a", "", "a"], assertEquals(["bbaa", "a", "", "a"],
/((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba")); /((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba"));
// From crbug.com/128821 - don't hang:
"".match(/((a|i|A|I|u|o|U|O)(s|c|b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|v|w|x|y|z|B|C|D|F|G|H|J|K|L|M|N|P|Q|R|S|T|V|W|X|Y|Z)*) de\/da([.,!?\s]|$)/);