[regexp] Specify signedness when accessing packed arguments

So far this is mainly a readability improvement to specify
expectations on the packed argument. In the future we should also
check signedness during bytecode generation.

Drive-by: Update DCHECK to allow signed args to
CHECK_CURRENT_POSITION.

Bug: chromium:1083450
Change-Id: I9376ec691b51eb251c972309ad65dd6c04eec3ae
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2207137
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67880}
This commit is contained in:
Jakob Gruber 2020-05-18 14:50:45 +02:00 committed by Commit Bot
parent 1637b617b9
commit 508569f5e7
4 changed files with 66 additions and 46 deletions

View File

@ -182,7 +182,7 @@ void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
int eats_at_least) {
DCHECK_GE(eats_at_least, characters);
if (eats_at_least > characters && check_bounds) {
DCHECK(is_uint24(cp_offset + eats_at_least));
DCHECK(is_int24(cp_offset + eats_at_least));
Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
EmitOrLink(on_failure);
check_bounds = false; // Load below doesn't need to check.

View File

@ -27,6 +27,7 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
// name or at least by position.
// TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32).
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 4) /* bc8 */ \
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \

View File

@ -100,6 +100,18 @@ int32_t Load16AlignedSigned(const byte* pc) {
return *reinterpret_cast<const int16_t*>(pc);
}
// Helpers to access the packed argument. Takes the 32 bits containing the
// current bytecode, where the 8 LSB contain the bytecode and the rest contains
// a packed 24-bit argument.
// TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
// police restrictions during bytecode generation.
int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {
return bytecode_and_packed_arg >> BYTECODE_SHIFT;
}
uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {
return static_cast<uint32_t>(bytecode_and_packed_arg) >> BYTECODE_SHIFT;
}
// A simple abstraction over the backtracking stack used by the interpreter.
//
// Despite the name 'backtracking' stack, it's actually used as a generic stack
@ -455,39 +467,39 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(PUSH_REGISTER) {
ADVANCE(PUSH_REGISTER);
if (!backtrack_stack.push(registers[insn >> BYTECODE_SHIFT])) {
if (!backtrack_stack.push(registers[LoadPacked24Unsigned(insn)])) {
return MaybeThrowStackOverflow(isolate, call_origin);
}
DISPATCH();
}
BYTECODE(SET_REGISTER) {
ADVANCE(SET_REGISTER);
registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
registers[LoadPacked24Unsigned(insn)] = Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(ADVANCE_REGISTER) {
ADVANCE(ADVANCE_REGISTER);
registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
registers[LoadPacked24Unsigned(insn)] += Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(SET_REGISTER_TO_CP) {
ADVANCE(SET_REGISTER_TO_CP);
registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
registers[LoadPacked24Unsigned(insn)] = current + Load32Aligned(pc + 4);
DISPATCH();
}
BYTECODE(SET_CP_TO_REGISTER) {
ADVANCE(SET_CP_TO_REGISTER);
SET_CURRENT_POSITION(registers[insn >> BYTECODE_SHIFT]);
SET_CURRENT_POSITION(registers[LoadPacked24Unsigned(insn)]);
DISPATCH();
}
BYTECODE(SET_REGISTER_TO_SP) {
ADVANCE(SET_REGISTER_TO_SP);
registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
registers[LoadPacked24Unsigned(insn)] = backtrack_stack.sp();
DISPATCH();
}
BYTECODE(SET_SP_TO_REGISTER) {
ADVANCE(SET_SP_TO_REGISTER);
backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
backtrack_stack.set_sp(registers[LoadPacked24Unsigned(insn)]);
DISPATCH();
}
BYTECODE(POP_CP) {
@ -512,7 +524,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(POP_REGISTER) {
ADVANCE(POP_REGISTER);
registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
registers[LoadPacked24Unsigned(insn)] = backtrack_stack.pop();
DISPATCH();
}
BYTECODE(FAIL) {
@ -528,7 +540,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(ADVANCE_CP) {
ADVANCE(ADVANCE_CP);
ADVANCE_CURRENT_POSITION(insn >> BYTECODE_SHIFT);
ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
DISPATCH();
}
BYTECODE(GOTO) {
@ -537,7 +549,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(ADVANCE_CP_AND_GOTO) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
ADVANCE_CURRENT_POSITION(insn >> BYTECODE_SHIFT);
ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
DISPATCH();
}
BYTECODE(CHECK_GREEDY) {
@ -550,7 +562,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
if (pos >= subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -561,12 +573,12 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
current_char = subject[pos];
DISPATCH();
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
if (pos + 2 > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -578,14 +590,14 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
Char next = subject[pos + 1];
current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
DISPATCH();
}
BYTECODE(LOAD_4_CURRENT_CHARS) {
DCHECK_EQ(1, sizeof(Char));
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
if (pos + 4 > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -601,7 +613,7 @@ IrregexpInterpreter::Result RawMatch(
BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
DCHECK_EQ(1, sizeof(Char));
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
Char next1 = subject[pos + 1];
Char next2 = subject[pos + 2];
Char next3 = subject[pos + 3];
@ -619,7 +631,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t c = LoadPacked24Unsigned(insn);
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -637,7 +649,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t c = LoadPacked24Unsigned(insn);
if (c != current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -655,7 +667,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(AND_CHECK_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t c = LoadPacked24Unsigned(insn);
if (c == (current_char & Load32Aligned(pc + 4))) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
@ -673,7 +685,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(AND_CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t c = LoadPacked24Unsigned(insn);
if (c != (current_char & Load32Aligned(pc + 4))) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
@ -682,7 +694,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
uint32_t c = (insn >> BYTECODE_SHIFT);
uint32_t c = LoadPacked24Unsigned(insn);
uint32_t minus = Load16Aligned(pc + 4);
uint32_t mask = Load16Aligned(pc + 6);
if (c != ((current_char - minus) & mask)) {
@ -721,7 +733,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_LT) {
uint32_t limit = (insn >> BYTECODE_SHIFT);
uint32_t limit = LoadPacked24Unsigned(insn);
if (current_char < limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -730,7 +742,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_GT) {
uint32_t limit = (insn >> BYTECODE_SHIFT);
uint32_t limit = LoadPacked24Unsigned(insn);
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -739,7 +751,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_LT) {
if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
if (registers[LoadPacked24Unsigned(insn)] < Load32Aligned(pc + 4)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
ADVANCE(CHECK_REGISTER_LT);
@ -747,7 +759,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_GE) {
if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
if (registers[LoadPacked24Unsigned(insn)] >= Load32Aligned(pc + 4)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
} else {
ADVANCE(CHECK_REGISTER_GE);
@ -755,7 +767,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_REGISTER_EQ_POS) {
if (registers[insn >> BYTECODE_SHIFT] == current) {
if (registers[LoadPacked24Unsigned(insn)] == current) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_REGISTER_EQ_POS);
@ -763,7 +775,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_REGS_EQUAL) {
if (registers[insn >> BYTECODE_SHIFT] ==
if (registers[LoadPacked24Unsigned(insn)] ==
registers[Load32Aligned(pc + 4)]) {
ADVANCE(CHECK_NOT_REGS_EQUAL);
} else {
@ -772,8 +784,8 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
CompareChars(&subject[from], &subject[current], len) != 0) {
@ -786,8 +798,8 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
CompareChars(&subject[from], &subject[current - len], len) != 0) {
@ -803,8 +815,8 @@ IrregexpInterpreter::Result RawMatch(
UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
!BackRefMatchesNoCase(isolate, from, current, len, subject)) {
@ -820,8 +832,8 @@ IrregexpInterpreter::Result RawMatch(
UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
int from = registers[LoadPacked24Unsigned(insn)];
int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
!BackRefMatchesNoCase(isolate, from, current - len, len, subject)) {
@ -834,7 +846,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_AT_START) {
if (current + (insn >> BYTECODE_SHIFT) == 0) {
if (current + LoadPacked24Signed(insn) == 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_AT_START);
@ -842,7 +854,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_NOT_AT_START) {
if (current + (insn >> BYTECODE_SHIFT) == 0) {
if (current + LoadPacked24Signed(insn) == 0) {
ADVANCE(CHECK_NOT_AT_START);
} else {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
@ -851,7 +863,7 @@ IrregexpInterpreter::Result RawMatch(
}
BYTECODE(SET_CURRENT_POSITION_FROM_END) {
ADVANCE(SET_CURRENT_POSITION_FROM_END);
int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
int by = LoadPacked24Unsigned(insn);
if (subject.length() - current > by) {
SET_CURRENT_POSITION(subject.length() - by);
current_char = subject[current - 1];
@ -859,7 +871,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(CHECK_CURRENT_POSITION) {
int pos = current + (insn >> BYTECODE_SHIFT);
int pos = current + LoadPacked24Signed(insn);
if (pos > subject.length() || pos < 0) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
@ -868,7 +880,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint32_t c = Load16Aligned(pc + 6);
while (static_cast<uintptr_t>(current + load_offset) <
@ -884,7 +896,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_AND) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
uint32_t mask = Load32Aligned(pc + 8);
@ -902,7 +914,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
int32_t maximum_offset = Load32Aligned(pc + 8);
@ -919,7 +931,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
@ -935,7 +947,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load16AlignedSigned(pc + 4);
uint16_t limit = Load16Aligned(pc + 6);
const byte* table = pc + 8;
@ -956,7 +968,7 @@ IrregexpInterpreter::Result RawMatch(
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t load_offset = LoadPacked24Unsigned(insn);
int32_t advance = Load32Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 8);
uint16_t c2 = Load16Aligned(pc + 10);

View File

@ -0,0 +1,7 @@
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
const source = "(?<=(?=ab)(|)abc)"
const re = new RegExp(source);
assertNotNull(re.exec("abc"));