[regexp] Use experimental engine if backtrack limit exceeded

We fall back from irregexp to the experimental engine if a backtrack
limit is exceeded and the experimental engine can handle the regexp.
The feature can be turned on with a boolean flag, and an uint-valued
flag controls the default backtrack limit.  For regexps that are
constructed with an explicit backtrack limit (API,
%NewRegExpWithBacktrackLimit), we choose the lower of the explicit and
default backtrack limits.
The default backtrack limit does not apply to regexps that can't be
handled by the experimental engine, and for such regexps an explicitly
specified backtrack limit is handled as before by returning null if we
exceed it.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I580df79bd847520985b6c2c2159bc427315c89d1
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2436341
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70500}
This commit is contained in:
Martin Bidlingmaier 2020-10-14 11:35:16 +02:00 committed by Commit Bot
parent a0af1c2bc2
commit d4febb6b46
35 changed files with 495 additions and 91 deletions

View File

@ -372,7 +372,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
ToDirectStringAssembler to_direct(state(), string);
TVARIABLE(HeapObject, var_result);
Label out(this), atom(this), runtime(this, Label::kDeferred);
Label out(this), atom(this), runtime(this, Label::kDeferred),
retry_experimental(this, Label::kDeferred);
// External constants.
TNode<ExternalReference> isolate_address =
@ -595,6 +596,10 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpException)),
&if_exception);
GotoIf(IntPtrEqual(
int_result,
IntPtrConstant(RegExp::kInternalRegExpFallbackToExperimental)),
&retry_experimental);
CSA_ASSERT(this, IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpRetry)));
@ -672,6 +677,14 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
Unreachable();
}
BIND(&retry_experimental);
{
var_result =
CAST(CallRuntime(Runtime::kRegExpExperimentalOneshotExec, context,
regexp, string, last_index, match_info));
Goto(&out);
}
BIND(&runtime);
{
var_result = CAST(CallRuntime(Runtime::kRegExpExec, context, regexp, string,

View File

@ -1492,6 +1492,14 @@ DEFINE_BOOL(enable_experimental_regexp_engine, false,
DEFINE_BOOL(trace_experimental_regexp_engine, false,
"trace execution of experimental regexp engine")
DEFINE_BOOL(enable_experimental_regexp_engine_on_excessive_backtracks, false,
"fall back to a breadth-first regexp engine on excessive "
"backtracking")
DEFINE_UINT(regexp_backtracks_before_fallback, 10000,
"number of backtracks during regexp execution before fall back "
"to experimental engine if "
"enable_experimental_regexp_engine_on_excessive_backtracks is set")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
DEFINE_MAYBE_BOOL(testing_maybe_bool_flag, "testing_maybe_bool_flag")

View File

@ -127,6 +127,7 @@ RegExpMacroAssemblerARM::~RegExpMacroAssemblerARM() {
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
fallback_label_.Unuse();
}
@ -164,8 +165,13 @@ void RegExpMacroAssemblerARM::Backtrack() {
__ cmp(r0, Operand(backtrack_limit()));
__ b(ne, &next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -901,6 +907,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ jmp(&return_r0);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ mov(r0, Operand(FALLBACK_TO_EXPERIMENTAL));
__ jmp(&return_r0);
}
CodeDesc code_desc;
masm_->GetCode(isolate(), &code_desc);
Handle<Code> code =

View File

@ -203,6 +203,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -142,6 +142,7 @@ RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
fallback_label_.Unuse();
}
int RegExpMacroAssemblerARM64::stack_limit_slack() {
@ -201,8 +202,13 @@ void RegExpMacroAssemblerARM64::Backtrack() {
__ Cmp(scratch, Operand(backtrack_limit()));
__ B(ne, &next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ B(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -1094,6 +1100,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
__ B(&return_w0);
}
if (fallback_label_.is_linked()) {
__ Bind(&fallback_label_);
__ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
__ B(&return_w0);
}
CodeDesc code_desc;
masm_->GetCode(isolate(), &code_desc);
Handle<Code> code =

View File

@ -279,6 +279,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -177,7 +177,6 @@ class CanBeHandledVisitor final : private RegExpVisitor {
bool ExperimentalRegExpCompiler::CanBeHandled(RegExpTree* tree,
JSRegExp::Flags flags,
int capture_count) {
DCHECK(FLAG_enable_experimental_regexp_engine);
return CanBeHandledVisitor::Check(tree, flags, capture_count);
}

View File

@ -15,6 +15,8 @@ namespace internal {
bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
int capture_count) {
DCHECK(FLAG_enable_experimental_regexp_engine ||
FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
}
@ -33,7 +35,6 @@ void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
re->JSRegExpVerify(isolate);
@ -43,6 +44,59 @@ bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
Smi::FromInt(JSRegExp::kUninitializedValue);
}
template <class T>
Handle<ByteArray> VectorToByteArray(Isolate* isolate, Vector<T> data) {
STATIC_ASSERT(std::is_trivial<T>::value);
int byte_length = sizeof(T) * data.length();
Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
DisallowHeapAllocation no_gc;
MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
return byte_array;
}
namespace {
struct CompilationResult {
Handle<ByteArray> bytecode;
Handle<FixedArray> capture_name_map;
};
// Compiles source pattern, but doesn't change the regexp object.
base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
Handle<JSRegExp> regexp) {
Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> source(regexp->Pattern(), isolate);
JSRegExp::Flags flags = regexp->GetFlags();
// Parse and compile the regexp source.
RegExpCompileData parse_result;
FlatStringReader reader(isolate, source);
DCHECK(!isolate->has_pending_exception());
bool parse_success =
RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result);
if (!parse_success) {
// The pattern was already parsed successfully during initialization, so
// the only way parsing can fail now is because of stack overflow.
DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
USE(RegExp::ThrowRegExpException(isolate, regexp, source,
parse_result.error));
return base::nullopt;
}
ZoneList<RegExpInstruction> bytecode =
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
CompilationResult result;
result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
result.capture_name_map = parse_result.capture_name_map;
return result;
}
} // namespace
bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
@ -54,46 +108,23 @@ bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
}
Zone zone(isolate->allocator(), ZONE_NAME);
// Parse and compile the regexp source.
RegExpCompileData parse_result;
JSRegExp::Flags flags = re->GetFlags();
FlatStringReader reader(isolate, source);
DCHECK(!isolate->has_pending_exception());
bool parse_success =
RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result);
if (!parse_success) {
// The pattern was already parsed successfully during initialization, so
// the only way parsing can fail now is because of stack overflow.
CHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
USE(RegExp::ThrowRegExpException(isolate, re, source, parse_result.error));
base::Optional<CompilationResult> compilation_result =
CompileImpl(isolate, re);
if (!compilation_result.has_value()) {
DCHECK(isolate->has_pending_exception());
return false;
}
ZoneList<RegExpInstruction> bytecode =
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
if (FLAG_print_regexp_bytecode) {
StdoutStream{} << "Bytecode:" << std::endl;
StdoutStream{} << bytecode.ToVector() << std::endl;
}
int byte_length = sizeof(RegExpInstruction) * bytecode.length();
Handle<ByteArray> bytecode_byte_array =
isolate->factory()->NewByteArray(byte_length);
MemCopy(bytecode_byte_array->GetDataStartAddress(), bytecode.begin(),
byte_length);
re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, *bytecode_byte_array);
re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, *bytecode_byte_array);
re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex,
*compilation_result->bytecode);
re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex,
*compilation_result->bytecode);
Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
re->SetCaptureNameMap(parse_result.capture_name_map);
re->SetCaptureNameMap(compilation_result->capture_name_map);
return true;
}
@ -106,27 +137,16 @@ Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
return Vector<RegExpInstruction>(inst_begin, inst_num);
}
// Returns the number of matches.
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
RegExp::CallOrigin call_origin,
JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
namespace {
int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
ByteArray bytecode, String subject, int capture_count,
int32_t* output_registers, int32_t output_register_count,
int32_t subject_index) {
DisallowHeapAllocation no_gc;
DCHECK(FLAG_enable_experimental_regexp_engine);
if (FLAG_trace_experimental_regexp_engine) {
String source = String::cast(regexp.DataAt(JSRegExp::kSourceIndex));
StdoutStream{} << "Executing experimental regexp " << source << std::endl;
}
ByteArray bytecode =
ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex));
int register_count_per_match =
JSRegExp::RegistersForCaptureCount(regexp.CaptureCount());
JSRegExp::RegistersForCaptureCount(capture_count);
int32_t result;
do {
@ -140,13 +160,37 @@ int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
return result;
}
} // namespace
// Returns the number of matches.
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
RegExp::CallOrigin call_origin,
JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DisallowHeapAllocation no_gc;
if (FLAG_trace_experimental_regexp_engine) {
String source = String::cast(regexp.DataAt(JSRegExp::kSourceIndex));
StdoutStream{} << "Executing experimental regexp " << source << std::endl;
}
ByteArray bytecode =
ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex));
return ExecRawImpl(isolate, call_origin, bytecode, subject,
regexp.CaptureCount(), output_registers,
output_register_count, subject_index);
}
int32_t ExperimentalRegExp::MatchForCallFromJs(
Address subject, int32_t start_position, Address input_start,
Address input_end, int* output_registers, int32_t output_register_count,
Address backtrack_stack, RegExp::CallOrigin call_origin, Isolate* isolate,
Address regexp) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_NOT_NULL(isolate);
DCHECK_NOT_NULL(output_registers);
DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
@ -168,7 +212,6 @@ MaybeHandle<Object> ExperimentalRegExp::Exec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int subject_index, Handle<RegExpMatchInfo> last_match_info) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(regexp->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
regexp->JSRegExpVerify(isolate);
@ -212,5 +255,63 @@ MaybeHandle<Object> ExperimentalRegExp::Exec(
}
}
int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate, JSRegExp regexp,
String subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
if (FLAG_trace_experimental_regexp_engine) {
StdoutStream{} << "Experimental execution (oneshot) of regexp "
<< regexp.Pattern() << std::endl;
}
Handle<JSRegExp> regexp_handle(regexp, isolate);
base::Optional<CompilationResult> compilation_result =
CompileImpl(isolate, regexp_handle);
if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;
DisallowHeapAllocation no_gc;
return ExecRawImpl(isolate, RegExp::kFromRuntime,
*compilation_result->bytecode, subject,
regexp.CaptureCount(), output_registers,
output_register_count, subject_index);
}
MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int subject_index, Handle<RegExpMatchInfo> last_match_info) {
DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
DCHECK_NE(regexp->TypeTag(), JSRegExp::NOT_COMPILED);
int capture_count = regexp->CaptureCount();
int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
int32_t* output_registers;
std::unique_ptr<int32_t[]> output_registers_release;
if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
output_registers = isolate->jsregexp_static_offsets_vector();
} else {
output_registers = NewArray<int32_t>(output_register_count);
output_registers_release.reset(output_registers);
}
int num_matches = OneshotExecRaw(isolate, *regexp, *subject, output_registers,
output_register_count, subject_index);
if (num_matches > 0) {
DCHECK_EQ(num_matches, 1);
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
} else if (num_matches == 0) {
return isolate->factory()->null_value();
} else {
DCHECK_LT(num_matches, 0);
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
}
} // namespace internal
} // namespace v8

View File

@ -44,6 +44,16 @@ class ExperimentalRegExp final : public AllStatic {
int32_t* output_registers,
int32_t output_register_count, int32_t subject_index);
// Compile and execute a regexp with the experimental engine, regardless of
// its type tag. The regexp itself is not changed (apart from lastIndex).
static MaybeHandle<Object> OneshotExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
static int32_t OneshotExecRaw(Isolate* isolate, JSRegExp regexp,
String subject, int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index);
static constexpr bool kSupportsUnicode = false;
};

View File

@ -116,6 +116,7 @@ RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
fallback_label_.Unuse();
}
@ -148,8 +149,13 @@ void RegExpMacroAssemblerIA32::Backtrack() {
__ cmp(Operand(ebp, kBacktrackCount), Immediate(backtrack_limit()));
__ j(not_equal, &next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -940,6 +946,12 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ jmp(&return_eax);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ mov(eax, FALLBACK_TO_EXPERIMENTAL);
__ jmp(&return_eax);
}
CodeDesc code_desc;
masm_->GetCode(masm_->isolate(), &code_desc);
Handle<Code> code =

View File

@ -192,6 +192,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -129,6 +129,7 @@ RegExpMacroAssemblerMIPS::~RegExpMacroAssemblerMIPS() {
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
internal_failure_label_.Unuse();
fallback_label_.Unuse();
}
@ -165,8 +166,13 @@ void RegExpMacroAssemblerMIPS::Backtrack() {
__ Sw(a0, MemOperand(frame_pointer(), kBacktrackCount));
__ Branch(&next, ne, a0, Operand(backtrack_limit()));
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -910,6 +916,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ li(v0, Operand(EXCEPTION));
__ jmp(&return_v0);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ li(v0, Operand(FALLBACK_TO_EXPERIMENTAL));
__ jmp(&return_v0);
}
}
CodeDesc code_desc;

View File

@ -211,6 +211,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
Label check_preempt_label_;
Label stack_overflow_label_;
Label internal_failure_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -165,6 +165,7 @@ RegExpMacroAssemblerMIPS::~RegExpMacroAssemblerMIPS() {
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
internal_failure_label_.Unuse();
fallback_label_.Unuse();
}
@ -201,8 +202,13 @@ void RegExpMacroAssemblerMIPS::Backtrack() {
__ Sd(a0, MemOperand(frame_pointer(), kBacktrackCount));
__ Branch(&next, ne, a0, Operand(backtrack_limit()));
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -946,6 +952,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ li(v0, Operand(EXCEPTION));
__ jmp(&return_v0);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ li(v0, Operand(FALLBACK_TO_EXPERIMENTAL));
__ jmp(&return_v0);
}
}
CodeDesc code_desc;

View File

@ -216,6 +216,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerMIPS
Label check_preempt_label_;
Label stack_overflow_label_;
Label internal_failure_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -136,6 +136,7 @@ RegExpMacroAssemblerPPC::~RegExpMacroAssemblerPPC() {
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
internal_failure_label_.Unuse();
fallback_label_.Unuse();
}
@ -179,8 +180,13 @@ void RegExpMacroAssemblerPPC::Backtrack() {
__ cmpi(r3, Operand(backtrack_limit()));
__ bne(&next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ b(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -952,6 +958,12 @@ Handle<HeapObject> RegExpMacroAssemblerPPC::GetCode(Handle<String> source) {
__ li(r3, Operand(EXCEPTION));
__ b(&return_r3);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ li(r3, Operand(FALLBACK_TO_EXPERIMENTAL));
__ b(&return_r3);
}
}
CodeDesc code_desc;

View File

@ -197,6 +197,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerPPC
Label check_preempt_label_;
Label stack_overflow_label_;
Label internal_failure_label_;
Label fallback_label_;
};
// Set of non-volatile registers saved/restored by generated regexp code.

View File

@ -132,7 +132,11 @@ void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); }
void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); }
void RegExpBytecodeGenerator::Backtrack() { Emit(BC_POP_BT, 0); }
void RegExpBytecodeGenerator::Backtrack() {
int error_code =
can_fallback() ? RegExp::RE_FALLBACK_TO_EXPERIMENTAL : RegExp::RE_FAILURE;
Emit(BC_POP_BT, error_code);
}
void RegExpBytecodeGenerator::GoTo(Label* l) {
if (advance_current_end_ == pc_) {
@ -368,7 +372,7 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT, 0);
Backtrack();
Handle<ByteArray> array;
if (FLAG_regexp_peephole_optimization) {

View File

@ -521,8 +521,8 @@ IrregexpInterpreter::Result RawMatch(
BYTECODE(POP_BT) {
STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0);
if (++backtrack_count == backtrack_limit) {
// Exceeded limits are treated as a failed match.
return IrregexpInterpreter::FAILURE;
int return_code = LoadPacked24Signed(insn);
return static_cast<IrregexpInterpreter::Result>(return_code);
}
IrregexpInterpreter::Result return_code =

View File

@ -19,6 +19,7 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
SUCCESS = RegExp::kInternalRegExpSuccess,
EXCEPTION = RegExp::kInternalRegExpException,
RETRY = RegExp::kInternalRegExpRetry,
FALLBACK_TO_EXPERIMENTAL = RegExp::kInternalRegExpFallbackToExperimental,
};
// In case a StackOverflow occurs, a StackOverflowException is created and

View File

@ -315,7 +315,7 @@ int NativeRegExpMacroAssembler::Execute(
int result =
fn.Call(input.ptr(), start_offset, input_start, input_end, output,
output_size, stack_base, call_origin, isolate, regexp.ptr());
DCHECK(result >= RETRY);
DCHECK_GE(result, SMALLEST_REGEXP_RESULT);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
// We detected a stack overflow (on the backtrack stack) in RegExp code,

View File

@ -183,10 +183,19 @@ class RegExpMacroAssembler {
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
// Controls after how many backtracks irregexp should abort execution. If it
// can fall back to the experimental engine (see `set_can_fallback`), it will
// return the appropriate error code, otherwise it will return the number of
// matches found so far (perhaps none).
void set_backtrack_limit(uint32_t backtrack_limit) {
backtrack_limit_ = backtrack_limit;
}
// Set whether or not irregexp can fall back to the experimental engine on
// excessive backtracking. The number of backtracks considered excessive can
// be controlled with set_backtrack_limit.
void set_can_fallback(bool val) { can_fallback_ = val; }
enum GlobalMode {
NOT_GLOBAL,
GLOBAL_NO_ZERO_LENGTH_CHECK,
@ -211,9 +220,12 @@ class RegExpMacroAssembler {
}
uint32_t backtrack_limit() const { return backtrack_limit_; }
bool can_fallback() const { return can_fallback_; }
private:
bool slow_safe_compiler_;
uint32_t backtrack_limit_ = JSRegExp::kNoBacktrackLimit;
bool can_fallback_ = false;
GlobalMode global_mode_;
Isolate* isolate_;
Zone* zone_;
@ -228,16 +240,20 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
// RETRY: Something significant changed during execution, and the matching
// should be retried from scratch.
// EXCEPTION: Something failed during execution. If no exception has been
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// thrown, it's an internal out-of-memory, and the caller should
// throw the exception.
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
// capture positions.
// FALLBACK_TO_EXPERIMENTAL: Execute the regexp on this subject using the
// experimental engine instead.
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
SUCCESS = RegExp::kInternalRegExpSuccess,
EXCEPTION = RegExp::kInternalRegExpException,
RETRY = RegExp::kInternalRegExpRetry,
FALLBACK_TO_EXPERIMENTAL = RegExp::kInternalRegExpFallbackToExperimental,
SMALLEST_REGEXP_RESULT = RegExp::kInternalRegExpSmallestResult,
};
NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone);

View File

@ -88,7 +88,7 @@ class RegExpImpl final : public AllStatic {
static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit);
uint32_t& backtrack_limit);
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray re);
@ -247,6 +247,14 @@ bool RegExp::EnsureFullyCompiled(Isolate* isolate, Handle<JSRegExp> re,
}
}
// static
MaybeHandle<Object> RegExp::ExperimentalOneshotExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info) {
return ExperimentalRegExp::OneshotExec(isolate, regexp, subject, index,
last_match_info);
}
// static
MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
@ -450,9 +458,10 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
compile_data.compilation_target = re->ShouldProduceBytecode()
? RegExpCompilationTarget::kBytecode
: RegExpCompilationTarget::kNative;
uint32_t backtrack_limit = re->BacktrackLimit();
const bool compilation_succeeded =
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte, re->BacktrackLimit());
is_one_byte, backtrack_limit);
if (!compilation_succeeded) {
DCHECK(compile_data.error != RegExpError::kNone);
RegExp::ThrowRegExpException(isolate, re, compile_data.error);
@ -482,6 +491,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
if (compile_data.register_count > register_max) {
SetIrregexpMaxRegisterCount(*data, compile_data.register_count);
}
data->set(JSRegExp::kIrregexpBacktrackLimit, Smi::FromInt(backtrack_limit));
if (FLAG_trace_regexp_tier_up) {
PrintF("JSRegExp object %p %s size: %d\n",
@ -595,6 +605,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
case IrregexpInterpreter::SUCCESS:
case IrregexpInterpreter::EXCEPTION:
case IrregexpInterpreter::FAILURE:
case IrregexpInterpreter::FALLBACK_TO_EXPERIMENTAL:
return result;
case IrregexpInterpreter::RETRY:
// The string has changed representation, and we must restart the
@ -665,13 +676,16 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
int capture_count = regexp->CaptureCount();
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
}
if (res == RegExp::RE_EXCEPTION) {
} else if (res == RegExp::RE_FALLBACK_TO_EXPERIMENTAL) {
return ExperimentalRegExp::OneshotExec(isolate, regexp, subject,
previous_index, last_match_info);
} else if (res == RegExp::RE_EXCEPTION) {
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
} else {
DCHECK(res == RegExp::RE_FAILURE);
return isolate->factory()->null_value();
}
DCHECK(res == RegExp::RE_FAILURE);
return isolate->factory()->null_value();
}
// static
@ -740,15 +754,15 @@ bool RegExp::CompileForTesting(Isolate* isolate, Zone* zone,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte) {
uint32_t backtrack_limit = JSRegExp::kNoBacktrackLimit;
return RegExpImpl::Compile(isolate, zone, data, flags, pattern,
sample_subject, is_one_byte,
JSRegExp::kNoBacktrackLimit);
sample_subject, is_one_byte, backtrack_limit);
}
bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
uint32_t& backtrack_limit) {
if (JSRegExp::RegistersForCaptureCount(data->capture_count) >
RegExpMacroAssembler::kMaxRegisterCount) {
data->error = RegExpError::kTooLarge;
@ -825,7 +839,21 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
macro_assembler->set_slow_safe(TooMuchRegExpCode(isolate, pattern));
macro_assembler->set_backtrack_limit(backtrack_limit);
if (FLAG_enable_experimental_regexp_engine_on_excessive_backtracks &&
ExperimentalRegExp::CanBeHandled(data->tree, flags,
data->capture_count)) {
if (backtrack_limit == JSRegExp::kNoBacktrackLimit) {
backtrack_limit = FLAG_regexp_backtracks_before_fallback;
} else {
backtrack_limit =
std::min(backtrack_limit, FLAG_regexp_backtracks_before_fallback);
}
macro_assembler->set_backtrack_limit(backtrack_limit);
macro_assembler->set_can_fallback(true);
} else {
macro_assembler->set_backtrack_limit(backtrack_limit);
macro_assembler->set_can_fallback(false);
}
// Inserted here, instead of in Assembler, because it depends on information
// in the AST that isn't replicated in the Node structure.
@ -1035,7 +1063,16 @@ int32_t* RegExpGlobalCache::FetchNext() {
}
}
if (num_matches_ <= 0) return nullptr;
// Fall back to experimental engine if needed and possible.
if (num_matches_ == RegExp::kInternalRegExpFallbackToExperimental) {
num_matches_ = ExperimentalRegExp::OneshotExecRaw(
isolate_, *regexp_, *subject_, register_array_, register_array_size_,
last_end_index);
}
if (num_matches_ <= 0) {
return nullptr;
}
current_match_index_ = 0;
return register_array_;
} else {

View File

@ -92,16 +92,25 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object>
ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
// Integral return values used throughout regexp code layers.
static constexpr int kInternalRegExpFailure = 0;
static constexpr int kInternalRegExpSuccess = 1;
static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2;
static constexpr int kInternalRegExpFallbackToExperimental = -3;
static constexpr int kInternalRegExpSmallestResult = -3;
enum IrregexpResult : int32_t {
RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException,
RE_RETRY = kInternalRegExpRetry,
RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental,
};
// Set last match info. If match is nullptr, then setting captures is

View File

@ -137,6 +137,7 @@ RegExpMacroAssemblerS390::~RegExpMacroAssemblerS390() {
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
internal_failure_label_.Unuse();
fallback_label_.Unuse();
}
int RegExpMacroAssemblerS390::stack_limit_slack() {
@ -174,8 +175,13 @@ void RegExpMacroAssemblerS390::Backtrack() {
__ CmpLogicalP(r2, Operand(backtrack_limit()));
__ bne(&next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -949,6 +955,12 @@ Handle<HeapObject> RegExpMacroAssemblerS390::GetCode(Handle<String> source) {
__ b(&return_r2);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ LoadImmP(r2, Operand(FALLBACK_TO_EXPERIMENTAL));
__ b(&return_r2);
}
CodeDesc code_desc;
masm_->GetCode(isolate(), &code_desc);
Handle<Code> code =

View File

@ -197,6 +197,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerS390
Label check_preempt_label_;
Label stack_overflow_label_;
Label internal_failure_label_;
Label fallback_label_;
};
// Set of non-volatile registers saved/restored by generated regexp code.

View File

@ -125,6 +125,7 @@ RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
exit_label_.Unuse();
check_preempt_label_.Unuse();
stack_overflow_label_.Unuse();
fallback_label_.Unuse();
}
@ -157,8 +158,13 @@ void RegExpMacroAssemblerX64::Backtrack() {
__ cmpq(Operand(rbp, kBacktrackCount), Immediate(backtrack_limit()));
__ j(not_equal, &next);
// Exceeded limits are treated as a failed match.
Fail();
// Backtrack limit exceeded.
if (can_fallback()) {
__ jmp(&fallback_label_);
} else {
// Can't fallback, so we treat it as a failed match.
Fail();
}
__ bind(&next);
}
@ -1000,6 +1006,12 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ jmp(&return_rax);
}
if (fallback_label_.is_linked()) {
__ bind(&fallback_label_);
__ Set(rax, FALLBACK_TO_EXPERIMENTAL);
__ jmp(&return_rax);
}
FixupCodeRelativePositions();
CodeDesc code_desc;

View File

@ -248,6 +248,7 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
Label exit_label_;
Label check_preempt_label_;
Label stack_overflow_label_;
Label fallback_label_;
};
} // namespace internal

View File

@ -877,6 +877,23 @@ RUNTIME_FUNCTION(Runtime_RegExpExec) {
isolate, RegExp::Exec(isolate, regexp, subject, index, last_match_info));
}
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
HandleScope scope(isolate);
DCHECK_EQ(4, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_INT32_ARG_CHECKED(index, 2);
CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
// Due to the way the JS calls are constructed this must be less than the
// length of a string, i.e. it is always a Smi. We check anyway for security.
CHECK_LE(0, index);
CHECK_GE(subject->length(), index);
isolate->counters()->regexp_entry_runtime()->Increment();
RETURN_RESULT_OR_FAILURE(
isolate, RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
last_match_info));
}
namespace {
class MatchInfoBackedMatch : public String::Match {

View File

@ -387,6 +387,7 @@ namespace internal {
#define FOR_EACH_INTRINSIC_REGEXP(F, I) \
I(IsRegExp, 1, 1) \
F(RegExpExec, 4, 1) \
F(RegExpExperimentalOneshotExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpInitializeAndCompile, 3, 1) \
F(RegExpReplaceRT, 3, 1) \

View File

@ -21533,18 +21533,38 @@ class RegExpInterruptTest {
} // namespace
TEST(RegExpInterruptAndCollectAllGarbage) {
i::FLAG_always_compact = true; // Move all movable objects on GC.
// Move all movable objects on GC.
i::FLAG_always_compact = true;
// We want to be stuck regexp execution, so no fallback to linear-time
// engine.
// TODO(mbid,v8:10765): Find a way to test interrupt support of the
// experimental engine.
i::FLAG_enable_experimental_regexp_engine_on_excessive_backtracks = false;
RegExpInterruptTest test;
test.RunTest(RegExpInterruptTest::CollectAllGarbage);
}
TEST(RegExpInterruptAndMakeSubjectOneByteExternal) {
// We want to be stuck regexp execution, so no fallback to linear-time
// engine.
// TODO(mbid,v8:10765): Find a way to test interrupt support of the
// experimental engine.
i::FLAG_enable_experimental_regexp_engine_on_excessive_backtracks = false;
RegExpInterruptTest test;
test.RunTest(RegExpInterruptTest::MakeSubjectOneByteExternal);
}
TEST(RegExpInterruptAndMakeSubjectTwoByteExternal) {
// We want to be stuck regexp execution, so no fallback to linear-time
// engine.
// TODO(mbid,v8:10765): Find a way to test interrupt support of the
// experimental engine.
i::FLAG_enable_experimental_regexp_engine_on_excessive_backtracks = false;
RegExpInterruptTest test;
// We want to be stuck regexp execution, so no fallback to linear-time
// engine.
// TODO(mbid,v8:10765): Find a way to test interrupt support of the
// experimental engine.
test.RunTest(RegExpInterruptTest::MakeSubjectTwoByteExternal);
}

View File

@ -872,6 +872,12 @@ class TerminatorSleeperThread : public v8::base::Thread {
TEST(TerminateRegExp) {
i::FLAG_allow_natives_syntax = true;
// We want to be stuck regexp execution, so no fallback to linear-time
// engine.
// TODO(mbid,v8:10765): Find a way to test interrupt support of the
// experimental engine.
i::FLAG_enable_experimental_regexp_engine_on_excessive_backtracks = false;
v8::Isolate* isolate = CcTest::isolate();
v8::HandleScope scope(isolate);
v8::Local<v8::ObjectTemplate> global = CreateGlobalTemplate(

View File

@ -3,6 +3,7 @@
// found in the LICENSE file.
// Flags: --allow-natives-syntax --no-enable-experimental-regexp-engine
// Flags: --no-enable-experimental-regexp-engine-on-excessive-backtracks
const kNoBacktrackLimit = 0; // To match JSRegExp::kNoBacktrackLimit.
const re0 = %NewRegExpWithBacktrackLimit("(\\d+)+x", "", kNoBacktrackLimit);

View File

@ -0,0 +1,20 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax
// Flags: --no-enable-experimental-regexp-engine
// Flags: --enable-experimental-regexp-engine-on-excessive-backtracks
// Flags: --regexp-backtracks-before-fallback=1000000000
// This test is similar to regexp-fallback.js but with
// large--regexp-backtracks-before-fallback value.
//
// If the backtrack limit from --regexp-backtracks-before-fallback is larger
// than an explicit limit, then we should take the explicit limit.
let regexp = %NewRegExpWithBacktrackLimit(".+".repeat(100) + "x", "", 5000);
let subject = "a".repeat(100) + "x" + "a".repeat(99);
let result = ["a".repeat(100) + "x"];
assertArrayEquals(result, regexp.exec(subject));
assertArrayEquals(result, regexp.exec(subject));

View File

@ -0,0 +1,37 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax
// Flags: --no-enable-experimental-regexp-engine
// Flags: --enable-experimental-regexp-engine-on-excessive-backtracks
// Flags: --regexp-tier-up --regexp-tier-up-ticks 1
// We should report accurate results on patterns for which irregexp suffers
// from catastrophic backtracking.
let regexp = new RegExp("a+".repeat(100) + "x");
let match = "a".repeat(100) + "x";
let subject = match.repeat(3);
// First for the irregexp interpreter:
assertArrayEquals([match], regexp.exec(subject));
// Now for native irregexp:
assertArrayEquals([match], regexp.exec(subject));
// Now the same again with String.replace and a replacement function to
// exercise the RegExpGlobalCache.
regexp = new RegExp(regexp.source, "g");
assertEquals("", subject.replace(regexp, function () { return ""; }));
assertEquals("", subject.replace(regexp, function () { return ""; }));
// If an explicit backtrack limit is larger than the default, then we should
// take the default limit.
regexp = %NewRegExpWithBacktrackLimit(regexp.source, "", 1000000000)
assertArrayEquals([match], regexp.exec(subject));
assertArrayEquals([match], regexp.exec(subject));
// If the experimental engine can't handle a regexp with an explicit backtrack
// limit, we should abort and return null on excessive backtracking.
regexp = %NewRegExpWithBacktrackLimit(regexp.source + "(?=a)", "", 100)
assertEquals(null, regexp.exec(subject));
assertEquals(null, regexp.exec(subject));