From 047a7cfee05a203ff44867ac371be9716ac65340 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Thu, 31 May 2012 11:59:04 +0000 Subject: [PATCH] Avoid overdeep recursion in regexp where a guarded expression with a minimum repetition count is inside another quantifier. Bug=129926 Review URL: https://chromiumcodereview.appspot.com/10451092 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11686 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 47 ++++++++++++++--------- src/jsregexp.h | 65 +++++++++++++++++++++----------- test/mjsunit/regexp-capture-3.js | 2 + 3 files changed, 75 insertions(+), 39 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index c5e73ed653..51683682fd 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -2191,12 +2191,13 @@ int ActionNode::EatsAtLeast(int still_to_find, void ActionNode::FillInBMInfo(int offset, + int recursion_depth, BoyerMooreLookahead* bm, bool not_at_start) { if (type_ == BEGIN_SUBMATCH) { bm->SetRest(offset); } else if (type_ != POSITIVE_SUBMATCH_SUCCESS) { - on_success()->FillInBMInfo(offset, bm, not_at_start); + on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); } SaveBMInfo(bm, not_at_start, offset); } @@ -2218,11 +2219,13 @@ int AssertionNode::EatsAtLeast(int still_to_find, } -void AssertionNode::FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { +void AssertionNode::FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { // Match the behaviour of EatsAtLeast on this node. if (type() == AT_START && not_at_start) return; - on_success()->FillInBMInfo(offset, bm, not_at_start); + on_success()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); SaveBMInfo(bm, not_at_start, offset); } @@ -2803,14 +2806,17 @@ void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details, } -void LoopChoiceNode::FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { - if (body_can_be_zero_length_) { +void LoopChoiceNode::FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { + if (body_can_be_zero_length_ || + recursion_depth > RegExpCompiler::kMaxRecursion) { bm->SetRest(offset); SaveBMInfo(bm, not_at_start, offset); return; } - ChoiceNode::FillInBMInfo(offset, bm, not_at_start); + ChoiceNode::FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); SaveBMInfo(bm, not_at_start, offset); } @@ -2912,7 +2918,7 @@ void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) { if (eats_at_least >= 1) { BoyerMooreLookahead* bm = new BoyerMooreLookahead(eats_at_least, compiler); - FillInBMInfo(0, bm, not_at_start); + FillInBMInfo(0, 0, bm, not_at_start); if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE; if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE; } @@ -3850,7 +3856,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { BoyerMooreLookahead* bm = new BoyerMooreLookahead(eats_at_least, compiler); GuardedAlternative alt0 = alternatives_->at(0); - alt0.node()->FillInBMInfo(0, bm, not_at_start); + alt0.node()->FillInBMInfo(0, 0, bm, not_at_start); skip_was_emitted = bm->EmitSkipInstructions(macro_assembler); } } else { @@ -5589,8 +5595,10 @@ void Analysis::VisitAssertion(AssertionNode* that) { } -void BackReferenceNode::FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { +void BackReferenceNode::FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { // Working out the set of characters that a backreference can match is too // hard, so we just say that any character can match. bm->SetRest(offset); @@ -5602,8 +5610,10 @@ STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize == RegExpMacroAssembler::kTableSize); -void ChoiceNode::FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { +void ChoiceNode::FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { ZoneList* alts = alternatives(); for (int i = 0; i < alts->length(); i++) { GuardedAlternative& alt = alts->at(i); @@ -5612,14 +5622,16 @@ void ChoiceNode::FillInBMInfo( SaveBMInfo(bm, not_at_start, offset); return; } - alt.node()->FillInBMInfo(offset, bm, not_at_start); + alt.node()->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); } SaveBMInfo(bm, not_at_start, offset); } -void TextNode::FillInBMInfo( - int initial_offset, BoyerMooreLookahead* bm, bool not_at_start) { +void TextNode::FillInBMInfo(int initial_offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { if (initial_offset >= bm->length()) return; int offset = initial_offset; int max_char = bm->max_char(); @@ -5673,6 +5685,7 @@ void TextNode::FillInBMInfo( return; } on_success()->FillInBMInfo(offset, + recursion_depth + 1, bm, true); // Not at start after a text node. if (initial_offset == 0) set_bm_info(not_at_start, bm); diff --git a/src/jsregexp.h b/src/jsregexp.h index 2e90e9ad5a..e438eab915 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -581,8 +581,10 @@ class RegExpNode: public ZoneObject { // we look forward. This is used for a Boyer-Moore-like string searching // implementation. TODO(erikcorry): This should share more code with // EatsAtLeast, GetQuickCheckDetails. - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { UNREACHABLE(); } @@ -681,9 +683,11 @@ class SeqRegExpNode: public RegExpNode { RegExpNode* on_success() { return on_success_; } void set_on_success(RegExpNode* node) { on_success_ = node; } virtual RegExpNode* FilterASCII(int depth); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { - on_success_->FillInBMInfo(offset, bm, not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { + on_success_->FillInBMInfo(offset, recursion_depth + 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } @@ -736,8 +740,10 @@ class ActionNode: public SeqRegExpNode { return on_success()->GetQuickCheckDetails( details, compiler, filled_in, not_at_start); } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); Type type() { return type_; } // TODO(erikcorry): We should allow some action nodes in greedy loops. virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } @@ -805,8 +811,10 @@ class TextNode: public SeqRegExpNode { virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( RegExpCompiler* compiler); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); void CalculateOffsets(); virtual RegExpNode* FilterASCII(int depth); @@ -865,8 +873,10 @@ class AssertionNode: public SeqRegExpNode { RegExpCompiler* compiler, int filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); AssertionNodeType type() { return type_; } void set_type(AssertionNodeType type) { type_ = type; } @@ -903,8 +913,10 @@ class BackReferenceNode: public SeqRegExpNode { bool not_at_start) { return; } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); private: int start_reg_; @@ -928,8 +940,10 @@ class EndNode: public RegExpNode { // Returning 0 from EatsAtLeast should ensure we never get here. UNREACHABLE(); } - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { // Returning 0 from EatsAtLeast should ensure we never get here. UNREACHABLE(); } @@ -1018,8 +1032,10 @@ class ChoiceNode: public RegExpNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); bool being_calculated() { return being_calculated_; } bool not_at_start() { return not_at_start_; } @@ -1068,9 +1084,12 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start) { - alternatives_->at(1).node()->FillInBMInfo(offset, bm, not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start) { + alternatives_->at(1).node()->FillInBMInfo( + offset, recursion_depth + 1, bm, not_at_start); if (offset == 0) set_bm_info(not_at_start, bm); } // For a negative lookahead we don't emit the quick check for the @@ -1100,8 +1119,10 @@ class LoopChoiceNode: public ChoiceNode { RegExpCompiler* compiler, int characters_filled_in, bool not_at_start); - virtual void FillInBMInfo( - int offset, BoyerMooreLookahead* bm, bool not_at_start); + virtual void FillInBMInfo(int offset, + int recursion_depth, + BoyerMooreLookahead* bm, + bool not_at_start); RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } diff --git a/test/mjsunit/regexp-capture-3.js b/test/mjsunit/regexp-capture-3.js index 9bdd600599..06e07e7488 100755 --- a/test/mjsunit/regexp-capture-3.js +++ b/test/mjsunit/regexp-capture-3.js @@ -216,3 +216,5 @@ regex10.exec(input0); var regex11 = /^(?:[^\u0000-\u0080]|[0-9a-z?,.!&\s#()])+$/i; regex11.exec(input0); +var regex12 = /u(\xf0{8}?\D*?|( ? !)$h??(|)*?(||)+?\6((?:\W\B|--\d-*-|)?$){0, }?|^Y( ? !1)\d+)+a/; +regex12.exec("");