Limit EatAtLeast recursion by a budget.

BUG=178790

Review URL: https://chromiumcodereview.appspot.com/12380026

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13788 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2013-03-01 14:50:14 +00:00
parent d7539af89a
commit 358311e8ec
4 changed files with 111 additions and 92 deletions

View File

@ -2299,35 +2299,33 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
int ActionNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (budget <= 0) return 0;
if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
budget - 1,
not_at_start);
}
void ActionNode::FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
if (type_ == BEGIN_SUBMATCH) {
bm->SetRest(offset);
} else if (type_ != POSITIVE_SUBMATCH_SUCCESS) {
on_success()->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
int AssertionNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (budget <= 0) return 0;
// If we know we are not at the start and we are asked "how many characters
// will you match if you succeed?" then we can answer anything since false
// implies false. So lets just return the max answer (still_to_find) since
@ -2335,55 +2333,53 @@ int AssertionNode::EatsAtLeast(int still_to_find,
// branches in the node graph.
if (type() == AT_START && not_at_start) return still_to_find;
return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
budget - 1,
not_at_start);
}
void AssertionNode::FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
// Match the behaviour of EatsAtLeast on this node.
if (type() == AT_START && not_at_start) return;
on_success()->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
int BackReferenceNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (budget <= 0) return 0;
return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
budget - 1,
not_at_start);
}
int TextNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
int answer = Length();
if (answer >= still_to_find) return answer;
if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
if (budget <= 0) return answer;
// We are not at start after this node so we set the last argument to 'true'.
return answer + on_success()->EatsAtLeast(still_to_find - answer,
recursion_depth + 1,
budget - 1,
true);
}
int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (budget <= 0) return 0;
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
}
@ -2400,39 +2396,40 @@ void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
int ChoiceNode::EatsAtLeastHelper(int still_to_find,
int recursion_depth,
int budget,
RegExpNode* ignore_this_node,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (budget <= 0) return 0;
int min = 100;
int choice_count = alternatives_->length();
budget = (budget - 1) / choice_count;
for (int i = 0; i < choice_count; i++) {
RegExpNode* node = alternatives_->at(i).node();
if (node == ignore_this_node) continue;
int node_eats_at_least = node->EatsAtLeast(still_to_find,
recursion_depth + 1,
not_at_start);
int node_eats_at_least =
node->EatsAtLeast(still_to_find, budget, not_at_start);
if (node_eats_at_least < min) min = node_eats_at_least;
if (min == 0) return 0;
}
return min;
}
int LoopChoiceNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
return EatsAtLeastHelper(still_to_find,
recursion_depth,
budget - 1,
loop_node_,
not_at_start);
}
int ChoiceNode::EatsAtLeast(int still_to_find,
int recursion_depth,
int budget,
bool not_at_start) {
return EatsAtLeastHelper(still_to_find,
recursion_depth,
budget,
NULL,
not_at_start);
}
@ -2988,19 +2985,15 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
void LoopChoiceNode::FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
if (body_can_be_zero_length_ ||
recursion_depth > RegExpCompiler::kMaxRecursion ||
budget <= 0) {
if (body_can_be_zero_length_ || budget <= 0) {
bm->SetRest(offset);
SaveBMInfo(bm, not_at_start, offset);
return;
}
ChoiceNode::FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start);
SaveBMInfo(bm, not_at_start, offset);
}
@ -3097,12 +3090,13 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
if (lookahead == NULL) {
int eats_at_least =
Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
not_at_start));
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE;
if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE;
}
@ -4034,16 +4028,17 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
ASSERT(trace->is_trivial()); // This is the case on LoopChoiceNodes.
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
if (lookahead == NULL) {
eats_at_least =
Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore, 0, not_at_start));
eats_at_least = Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
not_at_start));
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new(zone()) BoyerMooreLookahead(eats_at_least,
compiler,
zone());
GuardedAlternative alt0 = alternatives_->at(0);
alt0.node()->FillInBMInfo(0, 0, kFillInBMBudget, bm, not_at_start);
alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
}
} else {
@ -4055,7 +4050,8 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
if (eats_at_least == kEatsAtLeastNotYetInitialized) {
// Save some time by looking at most one machine word ahead.
eats_at_least = EatsAtLeast(compiler->ascii() ? 4 : 2, 0, not_at_start);
eats_at_least =
EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
}
int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
@ -5811,7 +5807,6 @@ void Analysis::VisitAssertion(AssertionNode* that) {
void BackReferenceNode::FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -5827,7 +5822,6 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
void ChoiceNode::FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -5840,15 +5834,13 @@ void ChoiceNode::FillInBMInfo(int offset,
SaveBMInfo(bm, not_at_start, offset);
return;
}
alt.node()->FillInBMInfo(
offset, recursion_depth + 1, budget, bm, not_at_start);
alt.node()->FillInBMInfo(offset, budget, bm, not_at_start);
}
SaveBMInfo(bm, not_at_start, offset);
}
void TextNode::FillInBMInfo(int initial_offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -5905,7 +5897,6 @@ void TextNode::FillInBMInfo(int initial_offset,
return;
}
on_success()->FillInBMInfo(offset,
recursion_depth + 1,
budget - 1,
bm,
true); // Not at start after a text node.

View File

@ -582,9 +582,7 @@ class RegExpNode: public ZoneObject {
// used to indicate that we know we are not at the start of the input. In
// this case anchored branches will always fail and can be ignored when
// determining how many characters are consumed on success.
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) = 0;
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
// Emits some quick code that checks whether the preloaded characters match.
// Falls through on certain failure, jumps to the label on possible success.
// If the node cannot make a quick check it does nothing and returns false.
@ -616,9 +614,8 @@ class RegExpNode: public ZoneObject {
// implementation. TODO(erikcorry): This should share more code with
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
// the number of nodes we are willing to look at in order to create this data.
static const int kFillInBMBudget = 200;
static const int kRecursionBudget = 200;
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -725,12 +722,10 @@ class SeqRegExpNode: public RegExpNode {
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
on_success_->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
@ -773,9 +768,7 @@ class ActionNode: public SeqRegExpNode {
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int filled_in,
@ -784,7 +777,6 @@ class ActionNode: public SeqRegExpNode {
details, compiler, filled_in, not_at_start);
}
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -843,9 +835,7 @@ class TextNode: public SeqRegExpNode {
}
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in,
@ -856,7 +846,6 @@ class TextNode: public SeqRegExpNode {
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -911,15 +900,12 @@ class AssertionNode: public SeqRegExpNode {
}
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -960,7 +946,6 @@ class BackReferenceNode: public SeqRegExpNode {
return;
}
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -989,7 +974,6 @@ class EndNode: public RegExpNode {
UNREACHABLE();
}
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
@ -1075,11 +1059,9 @@ class ChoiceNode: public RegExpNode {
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* GetTable(bool ignore_case);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
int EatsAtLeastHelper(int still_to_find,
int recursion_depth,
int budget,
RegExpNode* ignore_this_node,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
@ -1087,7 +1069,6 @@ class ChoiceNode: public RegExpNode {
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);
@ -1133,20 +1114,17 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
AddAlternative(this_must_fail);
AddAlternative(then_do_this);
}
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start) {
alternatives_->at(1).node()->FillInBMInfo(
offset, recursion_depth + 1, budget - 1, bm, not_at_start);
offset, budget - 1, bm, not_at_start);
if (offset == 0) set_bm_info(not_at_start, bm);
}
// For a negative lookahead we don't emit the quick check for the
@ -1169,15 +1147,12 @@ class LoopChoiceNode: public ChoiceNode {
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start);
virtual void FillInBMInfo(int offset,
int recursion_depth,
int budget,
BoyerMooreLookahead* bm,
bool not_at_start);

View File

@ -820,7 +820,8 @@ function CallSiteGetMethodName() {
%_CallFunction(this.receiver,
ownName,
ObjectLookupSetter) === this.fun ||
%GetDataProperty(this.receiver, ownName) === this.fun)) {
(IS_OBJECT(this.receiver) &&
%GetDataProperty(this.receiver, ownName) === this.fun))) {
// To handle DontEnum properties we guess that the method has
// the same name as the function.
return ownName;
@ -829,7 +830,8 @@ function CallSiteGetMethodName() {
for (var prop in this.receiver) {
if (%_CallFunction(this.receiver, prop, ObjectLookupGetter) === this.fun ||
%_CallFunction(this.receiver, prop, ObjectLookupSetter) === this.fun ||
%GetDataProperty(this.receiver, prop) === this.fun) {
(IS_OBJECT(this.receiver) &&
%GetDataProperty(this.receiver, prop) === this.fun)) {
// If we find more than one match bail out to avoid confusion.
if (name) {
return null;
@ -883,10 +885,9 @@ function CallSiteGetPosition() {
function CallSiteIsConstructor() {
var receiver = this.receiver;
var constructor = receiver ? %GetDataProperty(receiver, "constructor") : null;
if (!constructor) {
return false;
}
var constructor =
IS_OBJECT(receiver) ? %GetDataProperty(receiver, "constructor") : null;
if (!constructor) return false;
return this.fun === constructor;
}

View File

@ -0,0 +1,52 @@
// Copyright 2013 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Create a regexp in the form of a?a?...a? so that fully
// traversing the entire graph would be prohibitively expensive.
// This should not cause time out.
var r1 = "";
for (var i = 0; i < 1000; i++) {
r1 += "a?";
}
"test".match(RegExp(r1));
var r2 = "";
for (var i = 0; i < 100; i++) {
r2 += "(a?|b?|c?|d?|e?|f?|g?)";
}
"test".match(RegExp(r2));
// Create a regexp in the form of ((..(a)a..)a.
// Compiling it causes EatsAtLeast to reach the maximum
// recursion depth possible with a given budget.
// This should not cause a stack overflow.
var r3 = "a";
for (var i = 0; i < 1000; i++) {
r3 = "(" + r3 + ")a";
}
"test".match(RegExp(r3));