[regexp] Bytecode peephole optimization

Bytecodes used by the regular expression interpreter often occur in
specific sequences. The number of dispatches in the interpreter can be
reduced if those sequences are combined into a single bytecode.

This CL adds a peephole optimization pass for regexp bytecodes.
This pass checks the generated bytecode for pre-defined sequences that
can be merged into a single bytecode.

With the currently implemented bytecode sequences a speedup of 1.12x on
regex-dna and octane-regexp is achieved.

Bug: v8:9330
Change-Id: I827f93273a5848e5963c7e3329daeb898995d151
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1813743
Commit-Queue: Patrick Thier <pthier@google.com>
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63992}
This commit is contained in:
Patrick Thier 2019-09-26 15:53:53 +02:00 committed by Commit Bot
parent 4ce267a832
commit 6612943010
13 changed files with 2031 additions and 70 deletions

View File

@ -2776,6 +2776,9 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-bytecode-generator-inl.h",
"src/regexp/regexp-bytecode-generator.cc",
"src/regexp/regexp-bytecode-generator.h",
"src/regexp/regexp-bytecode-peephole.cc",
"src/regexp/regexp-bytecode-peephole.h",
"src/regexp/regexp-bytecodes.cc",
"src/regexp/regexp-bytecodes.h",
"src/regexp/regexp-compiler-tonode.cc",
"src/regexp/regexp-compiler.cc",

View File

@ -1274,6 +1274,15 @@ DEFINE_BOOL(regexp_tier_up, true,
DEFINE_INT(regexp_tier_up_ticks, 1,
"set the number of executions for the regexp interpreter before "
"tiering-up to the compiler")
DEFINE_BOOL(regexp_peephole_optimization, true,
"enable peephole optimization for regexp bytecode")
DEFINE_BOOL(trace_regexp_peephole_optimization, false,
"trace regexp bytecode peephole optimization")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
@ -1408,11 +1417,6 @@ DEFINE_BOOL(trace_isolates, false, "trace isolate state changes")
// Regexp
DEFINE_BOOL(regexp_possessive_quantifier, false,
"enable possessive quantifier syntax for testing")
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
DEFINE_BOOL(trace_regexp_assembler, false,
"trace regexp macro assembler calls.")
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
// Debugger
DEFINE_BOOL(print_break_location, false, "print source location on debug break")

View File

@ -7,6 +7,7 @@
#include "src/ast/ast.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecode-generator-inl.h"
#include "src/regexp/regexp-bytecode-peephole.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
@ -18,6 +19,7 @@ RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
buffer_(Vector<byte>::New(1024)),
pc_(0),
advance_current_end_(kInvalidPC),
jump_edges_(zone),
isolate_(isolate) {}
RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
@ -39,6 +41,7 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
int fixup = pos;
pos = *reinterpret_cast<int32_t*>(buffer_.begin() + fixup);
*reinterpret_cast<uint32_t*>(buffer_.begin() + fixup) = pc_;
jump_edges_.emplace(fixup, pc_);
}
}
l->bind_to(pc_);
@ -46,16 +49,17 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
if (l == nullptr) l = &backtrack_;
int pos = 0;
if (l->is_bound()) {
Emit32(l->pos());
pos = l->pos();
jump_edges_.emplace(pc_, pos);
} else {
int pos = 0;
if (l->is_linked()) {
pos = l->pos();
}
l->link_to(pc_);
Emit32(pos);
}
Emit32(pos);
}
void RegExpBytecodeGenerator::PopRegister(int register_index) {
@ -365,8 +369,16 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT, 0);
Handle<ByteArray> array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
Handle<ByteArray> array;
if (FLAG_regexp_peephole_optimization) {
array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
isolate_, zone(), source, buffer_.begin(), length(), jump_edges_);
} else {
array = isolate_->factory()->NewByteArray(length());
Copy(array->GetDataStartAddress());
}
return array;
}

View File

@ -100,6 +100,12 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
int advance_current_offset_;
int advance_current_end_;
// Stores jump edges emitted for the bytecode (used by
// RegExpBytecodePeepholeOptimization).
// Key: jump source (offset in buffer_ where jump destination is stored).
// Value: jump destination (offset in buffer_ to jump to).
ZoneUnorderedMap<int, int> jump_edges_;
Isolate* isolate_;
static const int kInvalidPC = -1;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
#include "src/common/globals.h"
#include "src/zone/zone-containers.h"
namespace v8 {
namespace internal {
class ByteArray;
// Peephole optimization for regexp interpreter bytecode.
// Pre-defined bytecode sequences occuring in the bytecode generated by the
// RegExpBytecodeGenerator can be optimized into a single bytecode.
class RegExpBytecodePeepholeOptimization : public AllStatic {
public:
// Performs peephole optimization on the given bytecode and returns the
// optimized bytecode.
static Handle<ByteArray> OptimizeBytecode(
Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
int length, const ZoneUnorderedMap<int, int>& jump_edges);
};
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_

View File

@ -0,0 +1,46 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-bytecodes.h"
#include <cctype>
#include "src/utils/utils.h"
namespace v8 {
namespace internal {
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
void RegExpBytecodeDisassemble(const byte* code_base, int length,
const char* pattern) {
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern);
ptrdiff_t offset = 0;
while (offset < length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
RegExpBytecodeDisassembleSingle(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
} // namespace internal
} // namespace v8

View File

@ -6,6 +6,7 @@
#define V8_REGEXP_REGEXP_BYTECODES_H_
#include "src/base/macros.h"
#include "src/common/globals.h"
namespace v8 {
namespace internal {
@ -24,6 +25,8 @@ const unsigned int MAX_FIRST_ARG = 0x7fffffu;
const int BYTECODE_SHIFT = 8;
STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
// name or at least by position.
#define BYTECODE_ITERATOR(V) \
V(BREAK, 0, 4) /* bc8 */ \
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
@ -41,25 +44,61 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
V(FAIL, 13, 4) /* bc8 pad24 */ \
V(SUCCEED, 14, 4) /* bc8 pad24 */ \
V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
/* Jump to another bytecode given its offset. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
/* 0x20 - 0x3F: Address of bytecode to jump to */ \
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
/* Check if offset is in range and load character at given offset. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \
/* 0x08 - 0x1F: Offset from current position */ \
/* 0x20 - 0x3F: Address of bytecode when load is out of range */ \
V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
/* Load character at given offset without range checks. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \
/* 0x08 - 0x1F: Offset from current position */ \
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
/* Check if current character is equal to a given character */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \
/* 0x08 - 0x0F: 0x00 (unused) Padding */ \
/* 0x10 - 0x1F: Character to check */ \
/* 0x20 - 0x3F: Address of bytecode when matched */ \
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
/* Checks if the current character combined with mask (bitwise and) */ \
/* matches a character (e.g. used when two characters in a disjunction */ \
/* differ by only a single bit */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x1c (fixed) Bytecode */ \
/* 0x08 - 0x0F: 0x00 (unused) Padding */ \
/* 0x10 - 0x1F: Character to match against (after mask aplied) */ \
/* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \
/* 0x40 - 0x5F: Address of bytecode when matched */ \
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
/* Checks if the current character matches any of the characters encoded */ \
/* in a bit table. Similar to/inspired by boyer moore string search */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x22 (fixed) Bytecode */ \
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
/* 0x20 - 0x3F: Address of bytecode when bit is set */ \
/* 0x40 - 0xBF: Bit table */ \
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
@ -74,10 +113,99 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
/* Checks if the current position matches top of backtrack stack */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
/* 0x20 - 0x3F: Address of bytecode when current matches tos */ \
V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
/* Advance character pointer by given offset and jump to another bytecode.*/ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \
/* 0x08 - 0x1F: Number of characters to advance */ \
/* 0x20 - 0x3F: Address of bytecode to jump to */ \
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */
/* Checks if current position + given offset is in range. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \
/* 0x08 - 0x1F: Offset from current position */ \
/* 0x20 - 0x3F: Address of bytecode when position is out of range */ \
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \
/* Combination of: */ \
/* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x35 (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x3F Number of characters to advance */ \
/* 0x40 - 0xBF Bit Table */ \
/* 0xC0 - 0xDF Address of bytecode when character is matched */ \
/* 0xE0 - 0xFF Address of bytecode when no match */ \
V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32) \
/* Combination of: */ \
/* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \
/* and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x36 (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x2F Number of characters to advance */ \
/* 0x30 - 0x3F Character to match against (after mask applied) */ \
/* 0x40 - 0x5F: Bitmask bitwise and combined with current character */ \
/* 0x60 - 0x7F Minimum number of characters this pattern consumes */ \
/* 0x80 - 0x9F Address of bytecode when character is matched */ \
/* 0xA0 - 0xBF Address of bytecode when no match */ \
V(SKIP_UNTIL_CHAR_AND, 54, 24) \
/* Combination of: */ \
/* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x37 (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x2F Number of characters to advance */ \
/* 0x30 - 0x3F Character to match */ \
/* 0x40 - 0x5F Address of bytecode when character is matched */ \
/* 0x60 - 0x7F Address of bytecode when no match */ \
V(SKIP_UNTIL_CHAR, 55, 16) \
/* Combination of: */ \
/* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \
/* and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x38 (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x2F Number of characters to advance */ \
/* 0x30 - 0x3F Character to match */ \
/* 0x40 - 0x5F Minimum number of characters this pattern consumes */ \
/* 0x60 - 0x7F Address of bytecode when character is matched */ \
/* 0x80 - 0x9F Address of bytecode when no match */ \
V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20) \
/* Combination of: */ \
/* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x39 (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x3F Number of characters to advance */ \
/* 0x40 - 0x4F Character to match */ \
/* 0x50 - 0x5F Other Character to match */ \
/* 0x60 - 0x7F Address of bytecode when either character is matched */ \
/* 0x80 - 0x9F Address of bytecode when no match */ \
V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20) \
/* Combination of: */ \
/* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \
/* and ADVANCE_CP_AND_GOTO */ \
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
/* Bit Layout: */ \
/* 0x00 - 0x07 0x3A (fixed) Bytecode */ \
/* 0x08 - 0x1F Load character offset from current position */ \
/* 0x20 - 0x2F Number of characters to advance */ \
/* 0x30 - 0x3F Character to check if it is less than current char */ \
/* 0x40 - 0xBF Bit Table */ \
/* 0xC0 - 0xDF Address of bytecode when character is matched */ \
/* 0xE0 - 0xFF Address of bytecode when no match */ \
V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32)
#define COUNT(...) +1
static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
@ -87,7 +215,7 @@ static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
// contiguous, strictly increasing, and start at 0.
// TODO(jgruber): Do not explicitly assign values, instead generate them
// implicitly from the list order.
STATIC_ASSERT(kRegExpBytecodeCount == 53);
STATIC_ASSERT(kRegExpBytecodeCount == 59);
#define DECLARE_BYTECODES(name, code, length) \
static constexpr int BC_##name = code;
@ -114,6 +242,10 @@ inline const char* RegExpBytecodeName(int bytecode) {
return kRegExpBytecodeNames[bytecode];
}
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc);
void RegExpBytecodeDisassemble(const byte* code_base, int length,
const char* pattern);
} // namespace internal
} // namespace v8

View File

@ -64,23 +64,6 @@ bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
return true;
}
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
PrintF("%s", RegExpBytecodeName(*pc));
// Args and the bytecode as hex.
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
PrintF(", %02x", pc[i]);
}
PrintF(" ");
// Args as ascii.
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
unsigned char b = pc[i];
PrintF("%c", std::isprint(b) ? b : '.');
}
PrintF("\n");
}
#ifdef DEBUG
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
int stack_depth, int current_position,
@ -95,7 +78,7 @@ void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
printable ? current_char : '.');
DisassembleSingleBytecode(code_base, pc);
RegExpBytecodeDisassembleSingle(code_base, pc);
}
}
#endif // DEBUG
@ -257,6 +240,13 @@ IrregexpInterpreter::Result HandleInterrupts(
return IrregexpInterpreter::SUCCESS;
}
bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
int mask = RegExpMacroAssembler::kTableMask;
int b = table[(current_char & mask) >> kBitsPerByteLog2];
int bit = (current_char & (kBitsPerByte - 1));
return (b & (1 << bit)) != 0;
}
// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
@ -281,7 +271,7 @@ IrregexpInterpreter::Result HandleInterrupts(
#define DISPATCH() \
pc = next_pc; \
insn = next_insn; \
break
goto switch_dispatch_continuation
#endif // V8_USE_COMPUTED_GOTO
// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
@ -331,19 +321,13 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
// Fill dispatch table from last defined bytecode up to the next power of two
// with BREAK (invalid operation).
// TODO(pthier): Find a way to fill up automatically (at compile time)
// 53 real bytecodes -> 11 fillers
// 59 real bytecodes -> 5 fillers
#define BYTECODE_FILLER_ITERATOR(V) \
V(BREAK) /* 1 */ \
V(BREAK) /* 2 */ \
V(BREAK) /* 3 */ \
V(BREAK) /* 4 */ \
V(BREAK) /* 5 */ \
V(BREAK) /* 6 */ \
V(BREAK) /* 7 */ \
V(BREAK) /* 8 */ \
V(BREAK) /* 9 */ \
V(BREAK) /* 10 */ \
V(BREAK) /* 11 */
V(BREAK) /* 5 */
#define COUNT(...) +1
static constexpr int kRegExpBytecodeFillerCount =
@ -652,10 +636,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
DISPATCH();
}
BYTECODE(CHECK_BIT_IN_TABLE) {
int mask = RegExpMacroAssembler::kTableMask;
byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
int bit = (current_char & (kBitsPerByte - 1));
if ((b & (1 << bit)) != 0) {
if (CheckBitInTable(current_char, pc + 8)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
} else {
ADVANCE(CHECK_BIT_IN_TABLE);
@ -834,6 +815,118 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
}
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
uint32_t c = Load16Aligned(pc + 6);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_AND) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
uint32_t mask = Load32Aligned(pc + 8);
int32_t maximum_offset = Load32Aligned(pc + 12);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == (current_char & mask)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 6);
int32_t maximum_offset = Load32Aligned(pc + 8);
while (static_cast<uintptr_t>(current + maximum_offset) <=
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint16_t advance = Load16Aligned(pc + 4);
uint16_t limit = Load16Aligned(pc + 6);
const byte* table = pc + 8;
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
if (current_char > limit) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
if (!CheckBitInTable(current_char, table)) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
DISPATCH();
}
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
int load_offset = (insn >> BYTECODE_SHIFT);
uint32_t advance = Load16Aligned(pc + 4);
uint16_t c = Load16Aligned(pc + 8);
uint16_t c2 = Load16Aligned(pc + 10);
while (static_cast<uintptr_t>(current + load_offset) <
static_cast<uintptr_t>(subject.length())) {
current_char = subject[current + load_offset];
// The two if-statements below are split up intentionally, as combining
// them seems to result in register allocation behaving quite
// differently and slowing down the resulting code.
if (c == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
if (c2 == current_char) {
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
DISPATCH();
}
current += advance;
}
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
DISPATCH();
}
#if V8_USE_COMPUTED_GOTO
// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
// V8_USE_COMPUTED_GOTO here.
@ -841,6 +934,9 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
default:
UNREACHABLE();
}
// Label we jump to in DISPATCH(). There must be no instructions between the
// end of the switch, this label and the end of the loop.
switch_dispatch_continuation : {}
#endif // V8_USE_COMPUTED_GOTO
}
}
@ -855,25 +951,6 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
} // namespace
// static
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
const std::string& pattern) {
DisallowHeapAllocation no_gc;
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
const byte* const code_base = byte_array.GetDataStartAddress();
const int byte_array_length = byte_array.length();
ptrdiff_t offset = 0;
while (offset < byte_array_length) {
const byte* const pc = code_base + offset;
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
DisassembleSingleBytecode(code_base, pc);
offset += RegExpBytecodeLength(*pc);
}
}
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,

View File

@ -46,8 +46,6 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
static void Disassemble(ByteArray byte_array, const std::string& pattern);
private:
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
int* registers, int registers_length, int start_position,

View File

@ -9,6 +9,7 @@
#include "src/heap/heap-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-dotprinter.h"
#include "src/regexp/regexp-interpreter.h"
@ -867,7 +868,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->compilation_target == RegExpCompilationTarget::kBytecode) {
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
auto pattern_cstring = pattern->ToCString();
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
bytecode->length(), pattern_cstring.get());
}
}

View File

@ -38,6 +38,7 @@
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-bytecode-generator.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
@ -1783,6 +1784,567 @@ TEST(UncachedExternalString) {
ExpectString("external.substring(1).match(re)[1]", "z");
}
// Test bytecode peephole optimization
void CreatePeepholeNoChangeBytecode(RegExpMacroAssembler* m) {
Label fail, backtrack;
m->PushBacktrack(&fail);
m->CheckNotAtStart(0, nullptr);
m->LoadCurrentCharacter(2, nullptr);
m->CheckNotCharacter('o', nullptr);
m->LoadCurrentCharacter(1, nullptr, false);
m->CheckNotCharacter('o', nullptr);
m->LoadCurrentCharacter(0, nullptr, false);
m->CheckNotCharacter('f', nullptr);
m->WriteCurrentPositionToRegister(0, 0);
m->WriteCurrentPositionToRegister(1, 3);
m->AdvanceCurrentPosition(3);
m->PushBacktrack(&backtrack);
m->Succeed();
m->Bind(&backtrack);
m->Backtrack();
m->Bind(&fail);
m->Fail();
}
TEST(PeepholeNoChange) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeNoChangeBytecode(&orig);
CreatePeepholeNoChangeBytecode(&opt);
Handle<String> source = factory->NewStringFromStaticChars("^foo");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
byte* byte_array = array->GetDataStartAddress();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
byte* byte_array_optimized = array_optimized->GetDataStartAddress();
CHECK_EQ(0, memcmp(byte_array, byte_array_optimized, length));
}
void CreatePeepholeSkipUntilCharBytecode(RegExpMacroAssembler* m) {
Label start;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacter('x', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
}
TEST(PeepholeSkipUntilChar) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilCharBytecode(&orig);
CreatePeepholeSkipUntilCharBytecode(&opt);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
RegExpBytecodeLength(BC_CHECK_CHAR) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected = RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_CHAR, array_optimized->get(0));
CHECK_EQ(BC_POP_BT,
array_optimized->get(RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR)));
}
void CreatePeepholeSkipUntilBitInTableBytecode(RegExpMacroAssembler* m,
Factory* factory) {
Handle<ByteArray> bit_table = factory->NewByteArray(
RegExpMacroAssembler::kTableSize, AllocationType::kOld);
for (uint32_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
bit_table->set(i, 0);
}
Label start;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckBitInTable(bit_table, nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
}
TEST(PeepholeSkipUntilBitInTable) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilBitInTableBytecode(&orig, factory);
CreatePeepholeSkipUntilBitInTableBytecode(&opt, factory);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
RegExpBytecodeLength(BC_CHECK_BIT_IN_TABLE) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected =
RegExpBytecodeLength(BC_SKIP_UNTIL_BIT_IN_TABLE) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_BIT_IN_TABLE, array_optimized->get(0));
CHECK_EQ(BC_POP_BT, array_optimized->get(
RegExpBytecodeLength(BC_SKIP_UNTIL_BIT_IN_TABLE)));
}
void CreatePeepholeSkipUntilCharPosCheckedBytecode(RegExpMacroAssembler* m) {
Label start;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true, 1, 2);
m->CheckCharacter('x', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
}
TEST(PeepholeSkipUntilCharPosChecked) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilCharPosCheckedBytecode(&orig);
CreatePeepholeSkipUntilCharPosCheckedBytecode(&opt);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_CHECK_CURRENT_POSITION) +
RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR_UNCHECKED) +
RegExpBytecodeLength(BC_CHECK_CHAR) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected =
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_POS_CHECKED) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_CHAR_POS_CHECKED, array_optimized->get(0));
CHECK_EQ(BC_POP_BT, array_optimized->get(RegExpBytecodeLength(
BC_SKIP_UNTIL_CHAR_POS_CHECKED)));
}
void CreatePeepholeSkipUntilCharAndBytecode(RegExpMacroAssembler* m) {
Label start;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true, 1, 2);
m->CheckCharacterAfterAnd('x', 0xFF, nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
}
TEST(PeepholeSkipUntilCharAnd) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilCharAndBytecode(&orig);
CreatePeepholeSkipUntilCharAndBytecode(&opt);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_CHECK_CURRENT_POSITION) +
RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR_UNCHECKED) +
RegExpBytecodeLength(BC_AND_CHECK_CHAR) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected = RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_AND) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_CHAR_AND, array_optimized->get(0));
CHECK_EQ(BC_POP_BT,
array_optimized->get(RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_AND)));
}
void CreatePeepholeSkipUntilCharOrCharBytecode(RegExpMacroAssembler* m) {
Label start;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacter('x', nullptr);
m->CheckCharacter('y', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
}
TEST(PeepholeSkipUntilCharOrChar) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilCharOrCharBytecode(&orig);
CreatePeepholeSkipUntilCharOrCharBytecode(&opt);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
RegExpBytecodeLength(BC_CHECK_CHAR) +
RegExpBytecodeLength(BC_CHECK_CHAR) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected =
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_OR_CHAR) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_CHAR_OR_CHAR, array_optimized->get(0));
CHECK_EQ(BC_POP_BT, array_optimized->get(
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_OR_CHAR)));
}
void CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(RegExpMacroAssembler* m,
Factory* factory) {
Handle<ByteArray> bit_table = factory->NewByteArray(
RegExpMacroAssembler::kTableSize, AllocationType::kOld);
for (uint32_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
bit_table->set(i, 0);
}
Label start, end, advance;
m->Bind(&start);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacterGT('x', nullptr);
m->CheckBitInTable(bit_table, &advance);
m->GoTo(&end);
m->Bind(&advance);
m->AdvanceCurrentPosition(1);
m->GoTo(&start);
m->Bind(&end);
}
TEST(PeepholeSkipUntilGtOrNotBitInTable) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(&orig, factory);
CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(&opt, factory);
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
int length = array->length();
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
int length_optimized = array_optimized->length();
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
RegExpBytecodeLength(BC_CHECK_GT) +
RegExpBytecodeLength(BC_CHECK_BIT_IN_TABLE) +
RegExpBytecodeLength(BC_GOTO) +
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
RegExpBytecodeLength(BC_POP_BT);
int length_optimized_expected =
RegExpBytecodeLength(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) +
RegExpBytecodeLength(BC_POP_BT);
CHECK_EQ(length, length_expected);
CHECK_EQ(length_optimized, length_optimized_expected);
CHECK_EQ(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, array_optimized->get(0));
CHECK_EQ(BC_POP_BT, array_optimized->get(RegExpBytecodeLength(
BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE)));
}
void CreatePeepholeLabelFixupsInsideBytecode(RegExpMacroAssembler* m,
Label* dummy_before,
Label* dummy_after,
Label* dummy_inside) {
Label loop;
m->Bind(dummy_before);
m->LoadCurrentCharacter(0, dummy_before);
m->CheckCharacter('a', dummy_after);
m->CheckCharacter('b', dummy_inside);
m->Bind(&loop);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacter('x', nullptr);
m->Bind(dummy_inside);
m->CheckCharacter('y', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&loop);
m->Bind(dummy_after);
m->LoadCurrentCharacter(0, dummy_before);
m->CheckCharacter('a', dummy_after);
m->CheckCharacter('b', dummy_inside);
}
TEST(PeepholeLabelFixupsInside) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
{
Label dummy_before, dummy_after, dummy_inside;
CreatePeepholeLabelFixupsInsideBytecode(&opt, &dummy_before, &dummy_after,
&dummy_inside);
}
Label dummy_before, dummy_after, dummy_inside;
CreatePeepholeLabelFixupsInsideBytecode(&orig, &dummy_before, &dummy_after,
&dummy_inside);
CHECK_EQ(0x00, dummy_before.pos());
CHECK_EQ(0x28, dummy_inside.pos());
CHECK_EQ(0x38, dummy_after.pos());
const Label* labels[] = {&dummy_before, &dummy_after, &dummy_inside};
const int label_positions[4][3] = {
{0x04, 0x3C}, // dummy_before
{0x0C, 0x44}, // dummy after
{0x14, 0x4C} // dummy inside
};
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
for (int label_idx = 0; label_idx < 3; label_idx++) {
for (int pos_idx = 0; pos_idx < 2; pos_idx++) {
CHECK_EQ(labels[label_idx]->pos(),
array->get(label_positions[label_idx][pos_idx]));
}
}
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
const int pos_fixups[] = {
0, // Position before optimization should be unchanged.
4, // Position after first replacement should be 4 (optimized size (20) -
// original size (32) + preserve length (16)).
};
const int target_fixups[] = {
0, // dummy_before should be unchanged
4, // dummy_inside should be 4
4 // dummy_after should be 4
};
for (int label_idx = 0; label_idx < 3; label_idx++) {
for (int pos_idx = 0; pos_idx < 2; pos_idx++) {
int label_pos = label_positions[label_idx][pos_idx] + pos_fixups[pos_idx];
int jump_address = *reinterpret_cast<uint32_t*>(
array_optimized->GetDataStartAddress() + label_pos);
int expected_jump_address =
labels[label_idx]->pos() + target_fixups[label_idx];
CHECK_EQ(expected_jump_address, jump_address);
}
}
}
void CreatePeepholeLabelFixupsComplexBytecode(RegExpMacroAssembler* m,
Label* dummy_before,
Label* dummy_between,
Label* dummy_after,
Label* dummy_inside) {
Label loop1, loop2;
m->Bind(dummy_before);
m->LoadCurrentCharacter(0, dummy_before);
m->CheckCharacter('a', dummy_between);
m->CheckCharacter('b', dummy_after);
m->CheckCharacter('c', dummy_inside);
m->Bind(&loop1);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacter('x', nullptr);
m->CheckCharacter('y', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&loop1);
m->Bind(dummy_between);
m->LoadCurrentCharacter(0, dummy_before);
m->CheckCharacter('a', dummy_between);
m->CheckCharacter('b', dummy_after);
m->CheckCharacter('c', dummy_inside);
m->Bind(&loop2);
m->LoadCurrentCharacter(0, nullptr, true);
m->CheckCharacter('x', nullptr);
m->Bind(dummy_inside);
m->CheckCharacter('y', nullptr);
m->AdvanceCurrentPosition(1);
m->GoTo(&loop2);
m->Bind(dummy_after);
m->LoadCurrentCharacter(0, dummy_before);
m->CheckCharacter('a', dummy_between);
m->CheckCharacter('b', dummy_after);
m->CheckCharacter('c', dummy_inside);
}
TEST(PeepholeLabelFixupsComplex) {
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
Isolate* isolate = CcTest::i_isolate();
Factory* factory = isolate->factory();
HandleScope scope(isolate);
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
{
Label dummy_before, dummy_between, dummy_after, dummy_inside;
CreatePeepholeLabelFixupsComplexBytecode(
&opt, &dummy_before, &dummy_between, &dummy_after, &dummy_inside);
}
Label dummy_before, dummy_between, dummy_after, dummy_inside;
CreatePeepholeLabelFixupsComplexBytecode(&orig, &dummy_before, &dummy_between,
&dummy_after, &dummy_inside);
CHECK_EQ(0x00, dummy_before.pos());
CHECK_EQ(0x40, dummy_between.pos());
CHECK_EQ(0x70, dummy_inside.pos());
CHECK_EQ(0x80, dummy_after.pos());
const Label* labels[] = {&dummy_before, &dummy_between, &dummy_after,
&dummy_inside};
const int label_positions[4][3] = {
{0x04, 0x44, 0x84}, // dummy_before
{0x0C, 0x4C, 0x8C}, // dummy between
{0x14, 0x54, 0x94}, // dummy after
{0x1C, 0x5C, 0x9C} // dummy inside
};
Handle<String> source = factory->NewStringFromStaticChars("dummy");
i::FLAG_regexp_peephole_optimization = false;
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
for (int label_idx = 0; label_idx < 4; label_idx++) {
for (int pos_idx = 0; pos_idx < 3; pos_idx++) {
CHECK_EQ(labels[label_idx]->pos(),
array->get(label_positions[label_idx][pos_idx]));
}
}
i::FLAG_regexp_peephole_optimization = true;
Handle<ByteArray> array_optimized =
Handle<ByteArray>::cast(opt.GetCode(source));
const int pos_fixups[] = {
0, // Position before optimization should be unchanged.
-12, // Position after first replacement should be -12 (optimized size =
// 20 - 32 = original size).
-8 // Position after second replacement should be -8 (-12 from first
// optimization -12 from second optimization + 16 preserved
// bytecodes).
};
const int target_fixups[] = {
0, // dummy_before should be unchanged
-12, // dummy_between should be -12
-8, // dummy_inside should be -8
-8 // dummy_after should be -8
};
for (int label_idx = 0; label_idx < 4; label_idx++) {
for (int pos_idx = 0; pos_idx < 3; pos_idx++) {
int label_pos = label_positions[label_idx][pos_idx] + pos_fixups[pos_idx];
int jump_address = *reinterpret_cast<uint32_t*>(
array_optimized->GetDataStartAddress() + label_pos);
int expected_jump_address =
labels[label_idx]->pos() + target_fixups[label_idx];
CHECK_EQ(expected_jump_address, jump_address);
}
}
}
#undef CHECK_PARSE_ERROR
#undef CHECK_SIMPLE
#undef CHECK_MIN_MAX

67
tools/regexp-sequences.py Executable file
View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
# Copyright 2019 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""
python %prog trace-file
Parses output generated by v8 with flag --trace-regexp-bytecodes and generates
a list of the most common sequences.
"""
from __future__ import print_function
import sys
import re
import collections
def parse(file, seqlen):
# example:
# pc = 00, sp = 0, curpos = 0, curchar = 0000000a ..., bc = PUSH_BT, 02, 00, 00, 00, e8, 00, 00, 00 .......
rx = re.compile(r'pc = (?P<pc>[0-9a-f]+), sp = (?P<sp>\d+), '
r'curpos = (?P<curpos>\d+), curchar = (?P<char_hex>[0-9a-f]+) '
r'(:?\.|\()(?P<char>\.|\w)(:?\.|\)), bc = (?P<bc>\w+), .*')
total = 0
bc_cnt = [None] * seqlen
for i in xrange(seqlen):
bc_cnt[i] = {}
last = [None] * seqlen
with open(file) as f:
l = f.readline()
while l:
l = l.strip()
if l.startswith("Start bytecode interpreter"):
for i in xrange(seqlen):
last[i] = collections.deque(maxlen=i+1)
match = rx.search(l)
if match:
total += 1
bc = match.group('bc')
for i in xrange(seqlen):
last[i].append(bc)
key = ' --> '.join(last[i])
bc_cnt[i][key] = bc_cnt[i].get(key,0) + 1
l = f.readline()
return bc_cnt, total
def print_most_common(d, seqlen, total):
sorted_d = sorted(d.items(), key=lambda kv: kv[1], reverse=True)
for (k,v) in sorted_d:
if v*100/total < 1.0:
return
print("{}: {} ({} %)".format(k,v,(v*100/total)))
def main(argv):
max_seq = 7
bc_cnt, total = parse(argv[1],max_seq)
for i in xrange(max_seq):
print()
print("Most common of length {}".format(i+1))
print()
print_most_common(bc_cnt[i], i, total)
if __name__ == '__main__':
main(sys.argv)