[regexp] Make the interpreter backtracking stack growable

The backtracking stack (which is actually a generic stack) used to be
statically sized. At 10k elements, it was fairly large, but still easy
to overflow on large subject strings. This CL changes it to a
std::vector-based implementation instead which grows on-demand.

Drive-by: Add braces to the BYTECODE cases to make clang-format
produce a nicer output.

Bug: v8:8776
Change-Id: If41a444fe3d05f6d5be1be019129788a86e6118b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1634914
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#61912}
This commit is contained in:
Jakob Gruber 2019-05-29 12:22:44 +02:00 committed by Commit Bot
parent cbc352983e
commit b114cb4c64
2 changed files with 84 additions and 94 deletions

View File

@ -93,16 +93,10 @@ static void TraceInterpreter(const byte* code_base,
}
}
#define BYTECODE(name) \
case BC_##name: \
TraceInterpreter(code_base, \
pc, \
static_cast<int>(backtrack_sp - backtrack_stack_base), \
current, \
current_char, \
BC_##name##_LENGTH, \
#name);
TraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
current_char, BC_##name##_LENGTH, #name);
#else
#define BYTECODE(name) \
case BC_##name:
@ -120,27 +114,35 @@ static int32_t Load16Aligned(const byte* pc) {
return *reinterpret_cast<const uint16_t *>(pc);
}
// A simple abstraction over the backtracking stack used by the interpreter.
// This backtracking stack does not grow automatically, but it ensures that the
// the memory held by the stack is released or remembered in a cache if the
// matching terminates.
//
// Despite the name 'backtracking' stack, it's actually used as a generic stack
// that stores both program counters (= offsets into the bytecode) and generic
// integer values.
class BacktrackStack {
public:
BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
BacktrackStack() = default;
~BacktrackStack() {
DeleteArray(data_);
void push(int v) { data_.emplace_back(v); }
int peek() const {
DCHECK(!data_.empty());
return data_.back();
}
int pop() {
int v = peek();
data_.pop_back();
return v;
}
int* data() const { return data_; }
int max_size() const { return kBacktrackStackSize; }
// The 'sp' is the index of the first empty element in the stack.
int sp() const { return static_cast<int>(data_.size()); }
void set_sp(int new_sp) {
DCHECK_LE(new_sp, sp());
data_.resize(new_sp);
}
private:
static const int kBacktrackStackSize = 10000;
int* data_;
std::vector<int> data_;
DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};
@ -221,78 +223,67 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
const byte* pc = code_array->GetDataStartAddress();
const byte* code_base = pc;
// BacktrackStack ensures that the memory allocated for the backtracking stack
// is returned to the system or cached if there is no stack being cached at
// the moment.
BacktrackStack backtrack_stack;
int* backtrack_stack_base = backtrack_stack.data();
int* backtrack_sp = backtrack_stack_base;
int backtrack_stack_space = backtrack_stack.max_size();
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
PrintF("\n\nStart bytecode interpreter\n\n");
}
#endif
while (true) {
int32_t insn = Load32Aligned(pc);
const int32_t insn = Load32Aligned(pc);
switch (insn & BYTECODE_MASK) {
BYTECODE(BREAK)
UNREACHABLE();
BYTECODE(PUSH_CP)
if (--backtrack_stack_space < 0) {
return StackOverflow(isolate);
}
*backtrack_sp++ = current;
BYTECODE(BREAK) { UNREACHABLE(); }
BYTECODE(PUSH_CP) {
backtrack_stack.push(current);
pc += BC_PUSH_CP_LENGTH;
break;
BYTECODE(PUSH_BT)
if (--backtrack_stack_space < 0) {
return StackOverflow(isolate);
}
*backtrack_sp++ = Load32Aligned(pc + 4);
BYTECODE(PUSH_BT) {
backtrack_stack.push(Load32Aligned(pc + 4));
pc += BC_PUSH_BT_LENGTH;
break;
BYTECODE(PUSH_REGISTER)
if (--backtrack_stack_space < 0) {
return StackOverflow(isolate);
}
*backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
BYTECODE(PUSH_REGISTER) {
backtrack_stack.push(registers[insn >> BYTECODE_SHIFT]);
pc += BC_PUSH_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER)
}
BYTECODE(SET_REGISTER) {
registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
pc += BC_SET_REGISTER_LENGTH;
break;
BYTECODE(ADVANCE_REGISTER)
}
BYTECODE(ADVANCE_REGISTER) {
registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
pc += BC_ADVANCE_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_CP)
}
BYTECODE(SET_REGISTER_TO_CP) {
registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
pc += BC_SET_REGISTER_TO_CP_LENGTH;
break;
BYTECODE(SET_CP_TO_REGISTER)
}
BYTECODE(SET_CP_TO_REGISTER) {
current = registers[insn >> BYTECODE_SHIFT];
pc += BC_SET_CP_TO_REGISTER_LENGTH;
break;
BYTECODE(SET_REGISTER_TO_SP)
registers[insn >> BYTECODE_SHIFT] =
static_cast<int>(backtrack_sp - backtrack_stack_base);
}
BYTECODE(SET_REGISTER_TO_SP) {
registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
pc += BC_SET_REGISTER_TO_SP_LENGTH;
break;
BYTECODE(SET_SP_TO_REGISTER)
backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
backtrack_stack_space = backtrack_stack.max_size() -
static_cast<int>(backtrack_sp - backtrack_stack_base);
}
BYTECODE(SET_SP_TO_REGISTER) {
backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
pc += BC_SET_SP_TO_REGISTER_LENGTH;
break;
BYTECODE(POP_CP)
backtrack_stack_space++;
--backtrack_sp;
current = *backtrack_sp;
}
BYTECODE(POP_CP) {
current = backtrack_stack.pop();
pc += BC_POP_CP_LENGTH;
break;
// clang-format off
}
BYTECODE(POP_BT) {
IrregexpInterpreter::Result return_code = HandleInterrupts(
isolate, subject_string);
@ -301,41 +292,39 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
&code_base, &pc, &subject);
backtrack_stack_space++;
--backtrack_sp;
pc = code_base + *backtrack_sp;
pc = code_base + backtrack_stack.pop();
break;
}
BYTECODE(POP_REGISTER) // clang-format on
backtrack_stack_space++;
--backtrack_sp;
registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
BYTECODE(POP_REGISTER) {
registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
pc += BC_POP_REGISTER_LENGTH;
break;
BYTECODE(FAIL)
return IrregexpInterpreter::FAILURE;
BYTECODE(SUCCEED)
return IrregexpInterpreter::SUCCESS;
BYTECODE(ADVANCE_CP)
}
BYTECODE(FAIL) { return IrregexpInterpreter::FAILURE; }
BYTECODE(SUCCEED) { return IrregexpInterpreter::SUCCESS; }
BYTECODE(ADVANCE_CP) {
current += insn >> BYTECODE_SHIFT;
pc += BC_ADVANCE_CP_LENGTH;
break;
BYTECODE(GOTO)
}
BYTECODE(GOTO) {
pc = code_base + Load32Aligned(pc + 4);
break;
BYTECODE(ADVANCE_CP_AND_GOTO)
}
BYTECODE(ADVANCE_CP_AND_GOTO) {
current += insn >> BYTECODE_SHIFT;
pc = code_base + Load32Aligned(pc + 4);
break;
BYTECODE(CHECK_GREEDY)
if (current == backtrack_sp[-1]) {
backtrack_sp--;
backtrack_stack_space++;
}
BYTECODE(CHECK_GREEDY) {
if (current == backtrack_stack.peek()) {
backtrack_stack.pop();
pc = code_base + Load32Aligned(pc + 4);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
}
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + (insn >> BYTECODE_SHIFT);
if (pos >= subject.length() || pos < 0) {
@ -533,28 +522,31 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
}
break;
}
BYTECODE(CHECK_REGISTER_LT)
BYTECODE(CHECK_REGISTER_LT) {
if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc += BC_CHECK_REGISTER_LT_LENGTH;
}
break;
BYTECODE(CHECK_REGISTER_GE)
}
BYTECODE(CHECK_REGISTER_GE) {
if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
pc = code_base + Load32Aligned(pc + 8);
} else {
pc += BC_CHECK_REGISTER_GE_LENGTH;
}
break;
BYTECODE(CHECK_REGISTER_EQ_POS)
}
BYTECODE(CHECK_REGISTER_EQ_POS) {
if (registers[insn >> BYTECODE_SHIFT] == current) {
pc = code_base + Load32Aligned(pc + 4);
} else {
pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
}
break;
BYTECODE(CHECK_NOT_REGS_EQUAL)
}
BYTECODE(CHECK_NOT_REGS_EQUAL) {
if (registers[insn >> BYTECODE_SHIFT] ==
registers[Load32Aligned(pc + 4)]) {
pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
@ -562,6 +554,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
pc = code_base + Load32Aligned(pc + 8);
}
break;
}
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
@ -628,20 +621,22 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
break;
}
BYTECODE(CHECK_AT_START)
BYTECODE(CHECK_AT_START) {
if (current == 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
pc += BC_CHECK_AT_START_LENGTH;
}
break;
BYTECODE(CHECK_NOT_AT_START)
}
BYTECODE(CHECK_NOT_AT_START) {
if (current + (insn >> BYTECODE_SHIFT) == 0) {
pc += BC_CHECK_NOT_AT_START_LENGTH;
} else {
pc = code_base + Load32Aligned(pc + 4);
}
break;
}
BYTECODE(SET_CURRENT_POSITION_FROM_END) {
int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
if (subject.length() - current > by) {
@ -658,6 +653,8 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
}
}
#undef BYTECODE
} // namespace
// static

View File

@ -1201,13 +1201,6 @@
'built-ins/SharedArrayBuffer/length-is-too-large-throws': [SKIP],
}], # asan == True or msan == True or tsan == True
['variant == interpreted_regexp', {
# Call stack exceeded: https://crbug.com/v8/8678
'built-ins/RegExp/CharacterClassEscapes/character-class-non-digit-class-escape-plus-quantifier-flags-u': [SKIP],
'built-ins/RegExp/CharacterClassEscapes/character-class-non-whitespace-class-escape-plus-quantifier-flags-u': [SKIP],
'built-ins/RegExp/CharacterClassEscapes/character-class-non-word-class-escape-plus-quantifier-flags-u': [SKIP],
}], # variant == interpreted_regexp
##############################################################################
['variant == jitless', {
# https://crbug.com/v8/7777