Reduce regexp compiler stack size when not optimizing regexps

R=jkummerow@chromium.org
BUG=chromium:475705
LOG=y

Review URL: https://codereview.chromium.org/1082763002

Cr-Commit-Position: refs/heads/master@{#27851}
This commit is contained in:
erikcorry 2015-04-15 08:15:58 -07:00 committed by Commit bot
parent ccc7952e3b
commit e0be05036f
3 changed files with 108 additions and 17 deletions

View File

@ -986,7 +986,12 @@ class RegExpCompiler {
int capture_count,
Handle<String> pattern);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
inline void AddWork(RegExpNode* node) {
if (!node->on_work_list() && !node->label()->is_bound()) {
node->set_on_work_list(true);
work_list_->Add(node);
}
}
static const int kImplementationOffset = 0;
static const int kNumberOfRegistersOffset = 0;
@ -1006,6 +1011,10 @@ class RegExpCompiler {
inline bool one_byte() { return one_byte_; }
inline bool optimize() { return optimize_; }
inline void set_optimize(bool value) { optimize_ = value; }
inline bool limiting_recursion() { return limiting_recursion_; }
inline void set_limiting_recursion(bool value) {
limiting_recursion_ = value;
}
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
@ -1027,6 +1036,7 @@ class RegExpCompiler {
bool ignore_case_;
bool one_byte_;
bool reg_exp_too_big_;
bool limiting_recursion_;
bool optimize_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
@ -1061,6 +1071,7 @@ RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
ignore_case_(ignore_case),
one_byte_(one_byte),
reg_exp_too_big_(false),
limiting_recursion_(false),
optimize_(FLAG_regexp_optimization),
current_expansion_factor_(1),
frequency_collator_(),
@ -1095,7 +1106,9 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble(
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
while (!work_list.is_empty()) {
work_list.RemoveLast()->Emit(this, &new_trace);
RegExpNode* node = work_list.RemoveLast();
node->set_on_work_list(false);
if (!node->label()->is_bound()) node->Emit(this, &new_trace);
}
if (reg_exp_too_big_) return IrregexpRegExpTooBig(isolate_);
@ -1371,8 +1384,13 @@ void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
// Create a new trivial state and generate the node with that.
Label undo;
assembler->PushBacktrack(&undo);
Trace new_state;
successor->Emit(compiler, &new_state);
if (successor->KeepRecursing(compiler)) {
Trace new_state;
successor->Emit(compiler, &new_state);
} else {
compiler->AddWork(successor);
assembler->GoTo(successor->label());
}
// On backtrack we need to restore state.
assembler->Bind(&undo);
@ -2213,17 +2231,13 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
if (trace->is_trivial()) {
if (label_.is_bound()) {
// We are being asked to generate a generic version, but that's already
// been done so just go to it.
if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
// If a generic version is already scheduled to be generated or we have
// recursed too deeply then just generate a jump to that code.
macro_assembler->GoTo(&label_);
return DONE;
}
if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
// To avoid too deep recursion we push the node to the work queue and just
// generate a goto here.
// This will queue it up for generation of a generic version if it hasn't
// already been queued.
compiler->AddWork(this);
macro_assembler->GoTo(&label_);
return DONE;
}
// Generate generic version of the node and bind the label for later use.
@ -2234,19 +2248,28 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
// We are being asked to make a non-generic version. Keep track of how many
// non-generic versions we generate so as not to overdo it.
trace_count_++;
if (compiler->optimize() && trace_count_ < kMaxCopiesCodeGenerated &&
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
if (KeepRecursing(compiler) && compiler->optimize() &&
trace_count_ < kMaxCopiesCodeGenerated) {
return CONTINUE;
}
// If we get here code has been generated for this node too many times or
// recursion is too deep. Time to switch to a generic version. The code for
// generic versions above can handle deep recursion properly.
bool was_limiting = compiler->limiting_recursion();
compiler->set_limiting_recursion(true);
trace->Flush(compiler, this);
compiler->set_limiting_recursion(was_limiting);
return DONE;
}
bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
return !compiler->limiting_recursion() &&
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
}
int ActionNode::EatsAtLeast(int still_to_find,
int budget,
bool not_at_start) {
@ -6025,7 +6048,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpCompiler compiler(isolate, zone, data->capture_count, ignore_case,
is_one_byte);
compiler.set_optimize(!TooMuchRegExpCode(pattern));
if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;

View File

@ -570,7 +570,7 @@ extern int kUninitializedRegExpNodePlaceHolder;
class RegExpNode: public ZoneObject {
public:
explicit RegExpNode(Zone* zone)
: replacement_(NULL), trace_count_(0), zone_(zone) {
: replacement_(NULL), on_work_list_(false), trace_count_(0), zone_(zone) {
bm_info_[0] = bm_info_[1] = NULL;
}
virtual ~RegExpNode();
@ -618,6 +618,7 @@ class RegExpNode: public ZoneObject {
// EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
// the number of nodes we are willing to look at in order to create this data.
static const int kRecursionBudget = 200;
bool KeepRecursing(RegExpCompiler* compiler);
virtual void FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
@ -658,6 +659,9 @@ class RegExpNode: public ZoneObject {
// the deferred actions in the current trace and generating a goto.
static const int kMaxCopiesCodeGenerated = 10;
bool on_work_list() { return on_work_list_; }
void set_on_work_list(bool value) { on_work_list_ = value; }
NodeInfo* info() { return &info_; }
BoyerMooreLookahead* bm_info(bool not_at_start) {
@ -679,6 +683,7 @@ class RegExpNode: public ZoneObject {
private:
static const int kFirstCharBudget = 10;
Label label_;
bool on_work_list_;
NodeInfo info_;
// This variable keeps track of how many times code has been generated for
// this node (in different traces). We don't keep track of where the

View File

@ -0,0 +1,63 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Crankshaft changes the stack usage and messes up the binary search for the
// stack depth that causes a stack overflow. The issue only arises without
// regexp optimization, which can happen on pages that create a lot of regexps.
// Flags: --nocrankshaft --noregexp-optimization
// Should not crash with a stack overflow in the regexp compiler, even when the
// JS has used most of the stack.
function use_space_then_do_test(depth) {
try {
// The "+ depth" is to avoid the regexp compilation cache.
var regexp_src = repeat(".(.)", 12) + depth;
use_space(depth, regexp_src);
return true;
} catch (e) {
assertFalse(("" + e).indexOf("tack") == -1); // Check for [Ss]tack.
return false;
}
}
function use_space(n, regexp_src) {
if (--n == 0) {
do_test(regexp_src);
return;
}
use_space(n, regexp_src);
}
function repeat(str, n) {
var answer = "";
while (n-- != 0) {
answer += str;
}
return answer;
}
var subject = repeat("y", 200);
function do_test(regexp_src) {
var re = new RegExp(regexp_src);
re.test(subject);
}
function try_different_stack_limits() {
var lower = 100;
var higher = 100000;
while (lower < higher - 1) {
var average = Math.floor((lower + higher) / 2);
if (use_space_then_do_test(average)) {
lower = average;
} else {
higher = average;
}
}
for (var i = lower - 5; i < higher + 5; i++) {
use_space_then_do_test(i);
}
}
try_different_stack_limits();