Limit work done analyzing regexps with very large fanout.
BUG=128821 Review URL: https://chromiumcodereview.appspot.com/10448117 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11696 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
d87249945f
commit
ae4fcd9702
@ -2192,12 +2192,14 @@ int ActionNode::EatsAtLeast(int still_to_find,
|
||||
|
||||
void ActionNode::FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
if (type_ == BEGIN_SUBMATCH) {
|
||||
bm->SetRest(offset);
|
||||
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
|
||||
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
||||
on_success()->FillInBMInfo(
|
||||
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||
}
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
}
|
||||
@ -2221,11 +2223,13 @@ int AssertionNode::EatsAtLeast(int still_to_find,
|
||||
|
||||
void AssertionNode::FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
// Match the behaviour of EatsAtLeast on this node.
|
||||
if (type() == AT_START && not_at_start) return;
|
||||
on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
||||
on_success()->FillInBMInfo(
|
||||
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
}
|
||||
|
||||
@ -2808,15 +2812,18 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
|
||||
void LoopChoiceNode::FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
if (body_can_be_zero_length_ ||
|
||||
recursion_depth > RegExpCompiler::kMaxRecursion) {
|
||||
recursion_depth > RegExpCompiler::kMaxRecursion ||
|
||||
budget <= 0) {
|
||||
bm->SetRest(offset);
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
return;
|
||||
}
|
||||
ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
||||
ChoiceNode::FillInBMInfo(
|
||||
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
}
|
||||
|
||||
@ -2918,7 +2925,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
|
||||
if (eats_at_least >= 1) {
|
||||
BoyerMooreLookahead* bm =
|
||||
new BoyerMooreLookahead(eats_at_least, compiler);
|
||||
FillInBMInfo(0, 0, bm, not_at_start);
|
||||
FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
|
||||
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
|
||||
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
|
||||
}
|
||||
@ -3856,7 +3863,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
BoyerMooreLookahead* bm =
|
||||
new BoyerMooreLookahead(eats_at_least, compiler);
|
||||
GuardedAlternative alt0 = alternatives_->at(0);
|
||||
alt0.node()->FillInBMInfo(0, 0, bm, not_at_start);
|
||||
alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
|
||||
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
|
||||
}
|
||||
} else {
|
||||
@ -5597,6 +5604,7 @@ void Analysis::VisitAssertion(AssertionNode* that) {
|
||||
|
||||
void BackReferenceNode::FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
// Working out the set of characters that a backreference can match is too
|
||||
@ -5612,9 +5620,11 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
|
||||
|
||||
void ChoiceNode::FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
ZoneList<GuardedAlternative>* alts = alternatives();
|
||||
budget = (budget - 1) / alts->length();
|
||||
for (int i = 0; i < alts->length(); i++) {
|
||||
GuardedAlternative& alt = alts->at(i);
|
||||
if (alt.guards() != NULL && alt.guards()->length() != 0) {
|
||||
@ -5622,7 +5632,8 @@ void ChoiceNode::FillInBMInfo(int offset,
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
return;
|
||||
}
|
||||
alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
||||
alt.node()->FillInBMInfo(
|
||||
offset, recursion_depth + 1, budget, bm, not_at_start);
|
||||
}
|
||||
SaveBMInfo(bm, not_at_start, offset);
|
||||
}
|
||||
@ -5630,6 +5641,7 @@ void ChoiceNode::FillInBMInfo(int offset,
|
||||
|
||||
void TextNode::FillInBMInfo(int initial_offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
if (initial_offset >= bm->length()) return;
|
||||
@ -5686,6 +5698,7 @@ void TextNode::FillInBMInfo(int initial_offset,
|
||||
}
|
||||
on_success()->FillInBMInfo(offset,
|
||||
recursion_depth + 1,
|
||||
budget - 1,
|
||||
bm,
|
||||
true); // Not at start after a text node.
|
||||
if (initial_offset == 0) set_bm_info(not_at_start, bm);
|
||||
|
@ -580,9 +580,12 @@ class RegExpNode: public ZoneObject {
|
||||
// Collects information on the possible code units (mod 128) that can match if
|
||||
// we look forward. This is used for a Boyer-Moore-like string searching
|
||||
// implementation. TODO(erikcorry): This should share more code with
|
||||
// EatsAtLeast, GetQuickCheckDetails.
|
||||
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
|
||||
// the number of nodes we are willing to look at in order to create this data.
|
||||
static const int kFillInBMBudget = 200;
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
UNREACHABLE();
|
||||
@ -685,9 +688,11 @@ class SeqRegExpNode: public RegExpNode {
|
||||
virtual RegExpNode* FilterASCII(int depth);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start);
|
||||
on_success_->FillInBMInfo(
|
||||
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||
}
|
||||
|
||||
@ -742,6 +747,7 @@ class ActionNode: public SeqRegExpNode {
|
||||
}
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
Type type() { return type_; }
|
||||
@ -813,6 +819,7 @@ class TextNode: public SeqRegExpNode {
|
||||
RegExpCompiler* compiler);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
void CalculateOffsets();
|
||||
@ -875,6 +882,7 @@ class AssertionNode: public SeqRegExpNode {
|
||||
bool not_at_start);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
AssertionNodeType type() { return type_; }
|
||||
@ -915,6 +923,7 @@ class BackReferenceNode: public SeqRegExpNode {
|
||||
}
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
|
||||
@ -942,6 +951,7 @@ class EndNode: public RegExpNode {
|
||||
}
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
// Returning 0 from EatsAtLeast should ensure we never get here.
|
||||
@ -1034,6 +1044,7 @@ class ChoiceNode: public RegExpNode {
|
||||
bool not_at_start);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
|
||||
@ -1086,10 +1097,11 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
|
||||
bool not_at_start);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start) {
|
||||
alternatives_->at(1).node()->FillInBMInfo(
|
||||
offset, recursion_depth + 1, bm, not_at_start);
|
||||
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
|
||||
if (offset == 0) set_bm_info(not_at_start, bm);
|
||||
}
|
||||
// For a negative lookahead we don't emit the quick check for the
|
||||
@ -1121,6 +1133,7 @@ class LoopChoiceNode: public ChoiceNode {
|
||||
bool not_at_start);
|
||||
virtual void FillInBMInfo(int offset,
|
||||
int recursion_depth,
|
||||
int budget,
|
||||
BoyerMooreLookahead* bm,
|
||||
bool not_at_start);
|
||||
RegExpNode* loop_node() { return loop_node_; }
|
||||
|
@ -162,7 +162,6 @@ assertEquals("*foo * baz", a);
|
||||
// string we can test that the relevant node is removed by verifying that
|
||||
// there is no hang.
|
||||
function NoHang(re) {
|
||||
print(re);
|
||||
"This is an ASCII string that could take forever".match(re);
|
||||
}
|
||||
|
||||
|
@ -56,3 +56,5 @@ assertEquals(["bbc", "b"], /^(b+|a){1,2}?bc/.exec("bbc"));
|
||||
assertEquals(["bbaa", "a", "", "a"],
|
||||
/((\3|b)\2(a)){2,}/.exec("bbaababbabaaaaabbaaaabba"));
|
||||
|
||||
// From crbug.com/128821 - don't hang:
|
||||
"".match(/((a|i|A|I|u|o|U|O)(s|c|b|c|d|f|g|h|j|k|l|m|n|p|q|r|s|t|v|w|x|y|z|B|C|D|F|G|H|J|K|L|M|N|P|Q|R|S|T|V|W|X|Y|Z)*) de\/da([.,!?\s]|$)/);
|
||||
|
Loading…
Reference in New Issue
Block a user