Reland "[regexp] Bytecode peephole optimization"
This is a reland of 6612943010
Fixed: Unaligned reads, unspecified evaluation order.
Original change's description:
> [regexp] Bytecode peephole optimization
>
> Bytecodes used by the regular expression interpreter often occur in
> specific sequences. The number of dispatches in the interpreter can be
> reduced if those sequences are combined into a single bytecode.
>
> This CL adds a peephole optimization pass for regexp bytecodes.
> This pass checks the generated bytecode for pre-defined sequences that
> can be merged into a single bytecode.
>
> With the currently implemented bytecode sequences a speedup of 1.12x on
> regex-dna and octane-regexp is achieved.
>
> Bug: v8:9330
> Change-Id: I827f93273a5848e5963c7e3329daeb898995d151
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1813743
> Commit-Queue: Patrick Thier <pthier@google.com>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63992}
Cq-Include-Trybots: luci.v8.try:v8_linux64_ubsan_rel_ng
Cq-Include-Trybots: luci.v8.try:v8_linux_gcc_rel
Bug: v8:9330,chromium:1008502,chromium:1008631
Change-Id: Ib9fc395b6809aa1debdb54d9fba5b7f09a235e5b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1828917
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64064}
This commit is contained in:
parent
14ffd21dd9
commit
282a74c7f0
3
BUILD.gn
3
BUILD.gn
@ -2778,6 +2778,9 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/regexp/regexp-bytecode-generator-inl.h",
|
||||
"src/regexp/regexp-bytecode-generator.cc",
|
||||
"src/regexp/regexp-bytecode-generator.h",
|
||||
"src/regexp/regexp-bytecode-peephole.cc",
|
||||
"src/regexp/regexp-bytecode-peephole.h",
|
||||
"src/regexp/regexp-bytecodes.cc",
|
||||
"src/regexp/regexp-bytecodes.h",
|
||||
"src/regexp/regexp-compiler-tonode.cc",
|
||||
"src/regexp/regexp-compiler.cc",
|
||||
|
@ -1274,6 +1274,15 @@ DEFINE_BOOL(regexp_tier_up, true,
|
||||
DEFINE_INT(regexp_tier_up_ticks, 1,
|
||||
"set the number of executions for the regexp interpreter before "
|
||||
"tiering-up to the compiler")
|
||||
DEFINE_BOOL(regexp_peephole_optimization, true,
|
||||
"enable peephole optimization for regexp bytecode")
|
||||
DEFINE_BOOL(trace_regexp_peephole_optimization, false,
|
||||
"trace regexp bytecode peephole optimization")
|
||||
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
|
||||
DEFINE_BOOL(trace_regexp_assembler, false,
|
||||
"trace regexp macro assembler calls.")
|
||||
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
|
||||
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
|
||||
|
||||
// Testing flags test/cctest/test-{flags,api,serialization}.cc
|
||||
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
|
||||
@ -1408,11 +1417,6 @@ DEFINE_BOOL(trace_isolates, false, "trace isolate state changes")
|
||||
// Regexp
|
||||
DEFINE_BOOL(regexp_possessive_quantifier, false,
|
||||
"enable possessive quantifier syntax for testing")
|
||||
DEFINE_BOOL(trace_regexp_bytecodes, false, "trace regexp bytecode execution")
|
||||
DEFINE_BOOL(trace_regexp_assembler, false,
|
||||
"trace regexp macro assembler calls.")
|
||||
DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
|
||||
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
|
||||
|
||||
// Debugger
|
||||
DEFINE_BOOL(print_break_location, false, "print source location on debug break")
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "src/ast/ast.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/regexp/regexp-bytecode-generator-inl.h"
|
||||
#include "src/regexp/regexp-bytecode-peephole.h"
|
||||
#include "src/regexp/regexp-bytecodes.h"
|
||||
#include "src/regexp/regexp-macro-assembler.h"
|
||||
|
||||
@ -18,6 +19,7 @@ RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
|
||||
buffer_(Vector<byte>::New(1024)),
|
||||
pc_(0),
|
||||
advance_current_end_(kInvalidPC),
|
||||
jump_edges_(zone),
|
||||
isolate_(isolate) {}
|
||||
|
||||
RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
|
||||
@ -39,6 +41,7 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
|
||||
int fixup = pos;
|
||||
pos = *reinterpret_cast<int32_t*>(buffer_.begin() + fixup);
|
||||
*reinterpret_cast<uint32_t*>(buffer_.begin() + fixup) = pc_;
|
||||
jump_edges_.emplace(fixup, pc_);
|
||||
}
|
||||
}
|
||||
l->bind_to(pc_);
|
||||
@ -46,16 +49,17 @@ void RegExpBytecodeGenerator::Bind(Label* l) {
|
||||
|
||||
void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
|
||||
if (l == nullptr) l = &backtrack_;
|
||||
int pos = 0;
|
||||
if (l->is_bound()) {
|
||||
Emit32(l->pos());
|
||||
pos = l->pos();
|
||||
jump_edges_.emplace(pc_, pos);
|
||||
} else {
|
||||
int pos = 0;
|
||||
if (l->is_linked()) {
|
||||
pos = l->pos();
|
||||
}
|
||||
l->link_to(pc_);
|
||||
Emit32(pos);
|
||||
}
|
||||
Emit32(pos);
|
||||
}
|
||||
|
||||
void RegExpBytecodeGenerator::PopRegister(int register_index) {
|
||||
@ -365,8 +369,16 @@ void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
|
||||
Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
|
||||
Bind(&backtrack_);
|
||||
Emit(BC_POP_BT, 0);
|
||||
Handle<ByteArray> array = isolate_->factory()->NewByteArray(length());
|
||||
Copy(array->GetDataStartAddress());
|
||||
|
||||
Handle<ByteArray> array;
|
||||
if (FLAG_regexp_peephole_optimization) {
|
||||
array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
|
||||
isolate_, zone(), source, buffer_.begin(), length(), jump_edges_);
|
||||
} else {
|
||||
array = isolate_->factory()->NewByteArray(length());
|
||||
Copy(array->GetDataStartAddress());
|
||||
}
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
|
@ -100,6 +100,12 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
|
||||
int advance_current_offset_;
|
||||
int advance_current_end_;
|
||||
|
||||
// Stores jump edges emitted for the bytecode (used by
|
||||
// RegExpBytecodePeepholeOptimization).
|
||||
// Key: jump source (offset in buffer_ where jump destination is stored).
|
||||
// Value: jump destination (offset in buffer_ to jump to).
|
||||
ZoneUnorderedMap<int, int> jump_edges_;
|
||||
|
||||
Isolate* isolate_;
|
||||
|
||||
static const int kInvalidPC = -1;
|
||||
|
1034
src/regexp/regexp-bytecode-peephole.cc
Normal file
1034
src/regexp/regexp-bytecode-peephole.cc
Normal file
File diff suppressed because it is too large
Load Diff
31
src/regexp/regexp-bytecode-peephole.h
Normal file
31
src/regexp/regexp-bytecode-peephole.h
Normal file
@ -0,0 +1,31 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
|
||||
#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
|
||||
|
||||
#include "src/common/globals.h"
|
||||
#include "src/zone/zone-containers.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class ByteArray;
|
||||
|
||||
// Peephole optimization for regexp interpreter bytecode.
|
||||
// Pre-defined bytecode sequences occuring in the bytecode generated by the
|
||||
// RegExpBytecodeGenerator can be optimized into a single bytecode.
|
||||
class RegExpBytecodePeepholeOptimization : public AllStatic {
|
||||
public:
|
||||
// Performs peephole optimization on the given bytecode and returns the
|
||||
// optimized bytecode.
|
||||
static Handle<ByteArray> OptimizeBytecode(
|
||||
Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
|
||||
int length, const ZoneUnorderedMap<int, int>& jump_edges);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
|
46
src/regexp/regexp-bytecodes.cc
Normal file
46
src/regexp/regexp-bytecodes.cc
Normal file
@ -0,0 +1,46 @@
|
||||
// Copyright 2019 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/regexp/regexp-bytecodes.h"
|
||||
|
||||
#include <cctype>
|
||||
|
||||
#include "src/utils/utils.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc) {
|
||||
PrintF("%s", RegExpBytecodeName(*pc));
|
||||
|
||||
// Args and the bytecode as hex.
|
||||
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
|
||||
PrintF(", %02x", pc[i]);
|
||||
}
|
||||
PrintF(" ");
|
||||
|
||||
// Args as ascii.
|
||||
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
|
||||
unsigned char b = pc[i];
|
||||
PrintF("%c", std::isprint(b) ? b : '.');
|
||||
}
|
||||
PrintF("\n");
|
||||
}
|
||||
|
||||
void RegExpBytecodeDisassemble(const byte* code_base, int length,
|
||||
const char* pattern) {
|
||||
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern);
|
||||
|
||||
ptrdiff_t offset = 0;
|
||||
|
||||
while (offset < length) {
|
||||
const byte* const pc = code_base + offset;
|
||||
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
|
||||
RegExpBytecodeDisassembleSingle(code_base, pc);
|
||||
offset += RegExpBytecodeLength(*pc);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
@ -6,6 +6,7 @@
|
||||
#define V8_REGEXP_REGEXP_BYTECODES_H_
|
||||
|
||||
#include "src/base/macros.h"
|
||||
#include "src/common/globals.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -24,6 +25,8 @@ const unsigned int MAX_FIRST_ARG = 0x7fffffu;
|
||||
const int BYTECODE_SHIFT = 8;
|
||||
STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
|
||||
|
||||
// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
|
||||
// name or at least by position.
|
||||
#define BYTECODE_ITERATOR(V) \
|
||||
V(BREAK, 0, 4) /* bc8 */ \
|
||||
V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
|
||||
@ -41,25 +44,61 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
|
||||
V(FAIL, 13, 4) /* bc8 pad24 */ \
|
||||
V(SUCCEED, 14, 4) /* bc8 pad24 */ \
|
||||
V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
|
||||
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
|
||||
/* Jump to another bytecode given its offset. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode to jump to */ \
|
||||
V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
|
||||
/* Check if offset is in range and load character at given offset. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: Offset from current position */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode when load is out of range */ \
|
||||
V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
|
||||
/* Load character at given offset without range checks. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: Offset from current position */ \
|
||||
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
|
||||
V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
|
||||
V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
|
||||
V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
|
||||
V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
|
||||
V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
|
||||
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
|
||||
/* Check if current character is equal to a given character */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x0F: 0x00 (unused) Padding */ \
|
||||
/* 0x10 - 0x1F: Character to check */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode when matched */ \
|
||||
V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
|
||||
V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
|
||||
V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
|
||||
V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
||||
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
||||
/* Checks if the current character combined with mask (bitwise and) */ \
|
||||
/* matches a character (e.g. used when two characters in a disjunction */ \
|
||||
/* differ by only a single bit */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x1c (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x0F: 0x00 (unused) Padding */ \
|
||||
/* 0x10 - 0x1F: Character to match against (after mask aplied) */ \
|
||||
/* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \
|
||||
/* 0x40 - 0x5F: Address of bytecode when matched */ \
|
||||
V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
||||
V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
|
||||
V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
|
||||
V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
|
||||
V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
||||
V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
|
||||
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
|
||||
/* Checks if the current character matches any of the characters encoded */ \
|
||||
/* in a bit table. Similar to/inspired by boyer moore string search */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x22 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode when bit is set */ \
|
||||
/* 0x40 - 0xBF: Bit table */ \
|
||||
V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
|
||||
V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
|
||||
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
|
||||
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
@ -74,10 +113,99 @@ STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
|
||||
V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
|
||||
V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
|
||||
V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
|
||||
/* Checks if the current position matches top of backtrack stack */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: 0x00 (unused) Padding */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode when current matches tos */ \
|
||||
V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
|
||||
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
|
||||
/* Advance character pointer by given offset and jump to another bytecode.*/ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: Number of characters to advance */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode to jump to */ \
|
||||
V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
|
||||
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
|
||||
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */
|
||||
/* Checks if current position + given offset is in range. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F: Offset from current position */ \
|
||||
/* 0x20 - 0x3F: Address of bytecode when position is out of range */ \
|
||||
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \
|
||||
/* Combination of: */ \
|
||||
/* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x35 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x3F Number of characters to advance */ \
|
||||
/* 0x40 - 0xBF Bit Table */ \
|
||||
/* 0xC0 - 0xDF Address of bytecode when character is matched */ \
|
||||
/* 0xE0 - 0xFF Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32) \
|
||||
/* Combination of: */ \
|
||||
/* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \
|
||||
/* and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x36 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x2F Number of characters to advance */ \
|
||||
/* 0x30 - 0x3F Character to match against (after mask applied) */ \
|
||||
/* 0x40 - 0x5F: Bitmask bitwise and combined with current character */ \
|
||||
/* 0x60 - 0x7F Minimum number of characters this pattern consumes */ \
|
||||
/* 0x80 - 0x9F Address of bytecode when character is matched */ \
|
||||
/* 0xA0 - 0xBF Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_CHAR_AND, 54, 24) \
|
||||
/* Combination of: */ \
|
||||
/* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x37 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x2F Number of characters to advance */ \
|
||||
/* 0x30 - 0x3F Character to match */ \
|
||||
/* 0x40 - 0x5F Address of bytecode when character is matched */ \
|
||||
/* 0x60 - 0x7F Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_CHAR, 55, 16) \
|
||||
/* Combination of: */ \
|
||||
/* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \
|
||||
/* and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x38 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x2F Number of characters to advance */ \
|
||||
/* 0x30 - 0x3F Character to match */ \
|
||||
/* 0x40 - 0x5F Minimum number of characters this pattern consumes */ \
|
||||
/* 0x60 - 0x7F Address of bytecode when character is matched */ \
|
||||
/* 0x80 - 0x9F Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20) \
|
||||
/* Combination of: */ \
|
||||
/* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x39 (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x3F Number of characters to advance */ \
|
||||
/* 0x40 - 0x4F Character to match */ \
|
||||
/* 0x50 - 0x5F Other Character to match */ \
|
||||
/* 0x60 - 0x7F Address of bytecode when either character is matched */ \
|
||||
/* 0x80 - 0x9F Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20) \
|
||||
/* Combination of: */ \
|
||||
/* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \
|
||||
/* and ADVANCE_CP_AND_GOTO */ \
|
||||
/* Emitted by RegExpBytecodePeepholeOptimization. */ \
|
||||
/* Bit Layout: */ \
|
||||
/* 0x00 - 0x07 0x3A (fixed) Bytecode */ \
|
||||
/* 0x08 - 0x1F Load character offset from current position */ \
|
||||
/* 0x20 - 0x2F Number of characters to advance */ \
|
||||
/* 0x30 - 0x3F Character to check if it is less than current char */ \
|
||||
/* 0x40 - 0xBF Bit Table */ \
|
||||
/* 0xC0 - 0xDF Address of bytecode when character is matched */ \
|
||||
/* 0xE0 - 0xFF Address of bytecode when no match */ \
|
||||
V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32)
|
||||
|
||||
#define COUNT(...) +1
|
||||
static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
|
||||
@ -87,7 +215,7 @@ static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
|
||||
// contiguous, strictly increasing, and start at 0.
|
||||
// TODO(jgruber): Do not explicitly assign values, instead generate them
|
||||
// implicitly from the list order.
|
||||
STATIC_ASSERT(kRegExpBytecodeCount == 53);
|
||||
STATIC_ASSERT(kRegExpBytecodeCount == 59);
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) \
|
||||
static constexpr int BC_##name = code;
|
||||
@ -114,6 +242,10 @@ inline const char* RegExpBytecodeName(int bytecode) {
|
||||
return kRegExpBytecodeNames[bytecode];
|
||||
}
|
||||
|
||||
void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc);
|
||||
void RegExpBytecodeDisassemble(const byte* code_base, int length,
|
||||
const char* pattern);
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
|
@ -64,23 +64,6 @@ bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
|
||||
return true;
|
||||
}
|
||||
|
||||
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
|
||||
PrintF("%s", RegExpBytecodeName(*pc));
|
||||
|
||||
// Args and the bytecode as hex.
|
||||
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
|
||||
PrintF(", %02x", pc[i]);
|
||||
}
|
||||
PrintF(" ");
|
||||
|
||||
// Args as ascii.
|
||||
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
|
||||
unsigned char b = pc[i];
|
||||
PrintF("%c", std::isprint(b) ? b : '.');
|
||||
}
|
||||
PrintF("\n");
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
|
||||
int stack_depth, int current_position,
|
||||
@ -95,7 +78,7 @@ void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
|
||||
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
|
||||
printable ? current_char : '.');
|
||||
|
||||
DisassembleSingleBytecode(code_base, pc);
|
||||
RegExpBytecodeDisassembleSingle(code_base, pc);
|
||||
}
|
||||
}
|
||||
#endif // DEBUG
|
||||
@ -257,6 +240,13 @@ IrregexpInterpreter::Result HandleInterrupts(
|
||||
return IrregexpInterpreter::SUCCESS;
|
||||
}
|
||||
|
||||
bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
|
||||
int mask = RegExpMacroAssembler::kTableMask;
|
||||
int b = table[(current_char & mask) >> kBitsPerByteLog2];
|
||||
int bit = (current_char & (kBitsPerByte - 1));
|
||||
return (b & (1 << bit)) != 0;
|
||||
}
|
||||
|
||||
// If computed gotos are supported by the compiler, we can get addresses to
|
||||
// labels directly in C/C++. Every bytecode handler has its own label and we
|
||||
// store the addresses in a dispatch table indexed by bytecode. To execute the
|
||||
@ -281,7 +271,7 @@ IrregexpInterpreter::Result HandleInterrupts(
|
||||
#define DISPATCH() \
|
||||
pc = next_pc; \
|
||||
insn = next_insn; \
|
||||
break
|
||||
goto switch_dispatch_continuation
|
||||
#endif // V8_USE_COMPUTED_GOTO
|
||||
|
||||
// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
|
||||
@ -331,19 +321,13 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
// Fill dispatch table from last defined bytecode up to the next power of two
|
||||
// with BREAK (invalid operation).
|
||||
// TODO(pthier): Find a way to fill up automatically (at compile time)
|
||||
// 53 real bytecodes -> 11 fillers
|
||||
// 59 real bytecodes -> 5 fillers
|
||||
#define BYTECODE_FILLER_ITERATOR(V) \
|
||||
V(BREAK) /* 1 */ \
|
||||
V(BREAK) /* 2 */ \
|
||||
V(BREAK) /* 3 */ \
|
||||
V(BREAK) /* 4 */ \
|
||||
V(BREAK) /* 5 */ \
|
||||
V(BREAK) /* 6 */ \
|
||||
V(BREAK) /* 7 */ \
|
||||
V(BREAK) /* 8 */ \
|
||||
V(BREAK) /* 9 */ \
|
||||
V(BREAK) /* 10 */ \
|
||||
V(BREAK) /* 11 */
|
||||
V(BREAK) /* 5 */
|
||||
|
||||
#define COUNT(...) +1
|
||||
static constexpr int kRegExpBytecodeFillerCount =
|
||||
@ -652,10 +636,7 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(CHECK_BIT_IN_TABLE) {
|
||||
int mask = RegExpMacroAssembler::kTableMask;
|
||||
byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
|
||||
int bit = (current_char & (kBitsPerByte - 1));
|
||||
if ((b & (1 << bit)) != 0) {
|
||||
if (CheckBitInTable(current_char, pc + 8)) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
|
||||
} else {
|
||||
ADVANCE(CHECK_BIT_IN_TABLE);
|
||||
@ -834,6 +815,118 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
}
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_CHAR) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint32_t advance = Load16Aligned(pc + 4);
|
||||
uint32_t c = Load16Aligned(pc + 6);
|
||||
while (static_cast<uintptr_t>(current + load_offset) <
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (c == current_char) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_CHAR_AND) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint16_t advance = Load16Aligned(pc + 4);
|
||||
uint16_t c = Load16Aligned(pc + 6);
|
||||
uint32_t mask = Load32Aligned(pc + 8);
|
||||
int32_t maximum_offset = Load32Aligned(pc + 12);
|
||||
while (static_cast<uintptr_t>(current + maximum_offset) <=
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (c == (current_char & mask)) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint16_t advance = Load16Aligned(pc + 4);
|
||||
uint16_t c = Load16Aligned(pc + 6);
|
||||
int32_t maximum_offset = Load32Aligned(pc + 8);
|
||||
while (static_cast<uintptr_t>(current + maximum_offset) <=
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (c == current_char) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint32_t advance = Load16Aligned(pc + 4);
|
||||
const byte* table = pc + 8;
|
||||
while (static_cast<uintptr_t>(current + load_offset) <
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (CheckBitInTable(current_char, table)) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint16_t advance = Load16Aligned(pc + 4);
|
||||
uint16_t limit = Load16Aligned(pc + 6);
|
||||
const byte* table = pc + 8;
|
||||
while (static_cast<uintptr_t>(current + load_offset) <
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
if (current_char > limit) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
|
||||
DISPATCH();
|
||||
}
|
||||
if (!CheckBitInTable(current_char, table)) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
|
||||
DISPATCH();
|
||||
}
|
||||
BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
|
||||
int load_offset = (insn >> BYTECODE_SHIFT);
|
||||
uint32_t advance = Load32Aligned(pc + 4);
|
||||
uint16_t c = Load16Aligned(pc + 8);
|
||||
uint16_t c2 = Load16Aligned(pc + 10);
|
||||
while (static_cast<uintptr_t>(current + load_offset) <
|
||||
static_cast<uintptr_t>(subject.length())) {
|
||||
current_char = subject[current + load_offset];
|
||||
// The two if-statements below are split up intentionally, as combining
|
||||
// them seems to result in register allocation behaving quite
|
||||
// differently and slowing down the resulting code.
|
||||
if (c == current_char) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
|
||||
DISPATCH();
|
||||
}
|
||||
if (c2 == current_char) {
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
|
||||
DISPATCH();
|
||||
}
|
||||
current += advance;
|
||||
}
|
||||
SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
|
||||
DISPATCH();
|
||||
}
|
||||
#if V8_USE_COMPUTED_GOTO
|
||||
// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
|
||||
// V8_USE_COMPUTED_GOTO here.
|
||||
@ -841,6 +934,9 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
// Label we jump to in DISPATCH(). There must be no instructions between the
|
||||
// end of the switch, this label and the end of the loop.
|
||||
switch_dispatch_continuation : {}
|
||||
#endif // V8_USE_COMPUTED_GOTO
|
||||
}
|
||||
}
|
||||
@ -855,25 +951,6 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
|
||||
const std::string& pattern) {
|
||||
DisallowHeapAllocation no_gc;
|
||||
|
||||
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
|
||||
|
||||
const byte* const code_base = byte_array.GetDataStartAddress();
|
||||
const int byte_array_length = byte_array.length();
|
||||
ptrdiff_t offset = 0;
|
||||
|
||||
while (offset < byte_array_length) {
|
||||
const byte* const pc = code_base + offset;
|
||||
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
|
||||
DisassembleSingleBytecode(code_base, pc);
|
||||
offset += RegExpBytecodeLength(*pc);
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
IrregexpInterpreter::Result IrregexpInterpreter::Match(
|
||||
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
|
||||
|
@ -46,8 +46,6 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
|
||||
int registers_length, int start_position,
|
||||
RegExp::CallOrigin call_origin);
|
||||
|
||||
static void Disassemble(ByteArray byte_array, const std::string& pattern);
|
||||
|
||||
private:
|
||||
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
|
||||
int* registers, int registers_length, int start_position,
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "src/heap/heap-inl.h"
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
#include "src/regexp/regexp-bytecode-generator.h"
|
||||
#include "src/regexp/regexp-bytecodes.h"
|
||||
#include "src/regexp/regexp-compiler.h"
|
||||
#include "src/regexp/regexp-dotprinter.h"
|
||||
#include "src/regexp/regexp-interpreter.h"
|
||||
@ -881,7 +882,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
|
||||
data->compilation_target == RegExpCompilationTarget::kBytecode) {
|
||||
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
|
||||
auto pattern_cstring = pattern->ToCString();
|
||||
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
|
||||
RegExpBytecodeDisassemble(bytecode->GetDataStartAddress(),
|
||||
bytecode->length(), pattern_cstring.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/regexp/regexp-bytecode-generator.h"
|
||||
#include "src/regexp/regexp-bytecodes.h"
|
||||
#include "src/regexp/regexp-compiler.h"
|
||||
#include "src/regexp/regexp-interpreter.h"
|
||||
#include "src/regexp/regexp-macro-assembler-arch.h"
|
||||
@ -1783,6 +1784,567 @@ TEST(UncachedExternalString) {
|
||||
ExpectString("external.substring(1).match(re)[1]", "z");
|
||||
}
|
||||
|
||||
// Test bytecode peephole optimization
|
||||
|
||||
void CreatePeepholeNoChangeBytecode(RegExpMacroAssembler* m) {
|
||||
Label fail, backtrack;
|
||||
m->PushBacktrack(&fail);
|
||||
m->CheckNotAtStart(0, nullptr);
|
||||
m->LoadCurrentCharacter(2, nullptr);
|
||||
m->CheckNotCharacter('o', nullptr);
|
||||
m->LoadCurrentCharacter(1, nullptr, false);
|
||||
m->CheckNotCharacter('o', nullptr);
|
||||
m->LoadCurrentCharacter(0, nullptr, false);
|
||||
m->CheckNotCharacter('f', nullptr);
|
||||
m->WriteCurrentPositionToRegister(0, 0);
|
||||
m->WriteCurrentPositionToRegister(1, 3);
|
||||
m->AdvanceCurrentPosition(3);
|
||||
m->PushBacktrack(&backtrack);
|
||||
m->Succeed();
|
||||
m->Bind(&backtrack);
|
||||
m->Backtrack();
|
||||
m->Bind(&fail);
|
||||
m->Fail();
|
||||
}
|
||||
|
||||
TEST(PeepholeNoChange) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeNoChangeBytecode(&orig);
|
||||
CreatePeepholeNoChangeBytecode(&opt);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("^foo");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
byte* byte_array = array->GetDataStartAddress();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
byte* byte_array_optimized = array_optimized->GetDataStartAddress();
|
||||
|
||||
CHECK_EQ(0, memcmp(byte_array, byte_array_optimized, length));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilCharBytecode(RegExpMacroAssembler* m) {
|
||||
Label start;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilChar) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilCharBytecode(&orig);
|
||||
CreatePeepholeSkipUntilCharBytecode(&opt);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
|
||||
RegExpBytecodeLength(BC_CHECK_CHAR) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected = RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_CHAR, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT,
|
||||
array_optimized->get(RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR)));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilBitInTableBytecode(RegExpMacroAssembler* m,
|
||||
Factory* factory) {
|
||||
Handle<ByteArray> bit_table = factory->NewByteArray(
|
||||
RegExpMacroAssembler::kTableSize, AllocationType::kOld);
|
||||
for (uint32_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
|
||||
bit_table->set(i, 0);
|
||||
}
|
||||
|
||||
Label start;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckBitInTable(bit_table, nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilBitInTable) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilBitInTableBytecode(&orig, factory);
|
||||
CreatePeepholeSkipUntilBitInTableBytecode(&opt, factory);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
|
||||
RegExpBytecodeLength(BC_CHECK_BIT_IN_TABLE) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected =
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_BIT_IN_TABLE) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_BIT_IN_TABLE, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT, array_optimized->get(
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_BIT_IN_TABLE)));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilCharPosCheckedBytecode(RegExpMacroAssembler* m) {
|
||||
Label start;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true, 1, 2);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilCharPosChecked) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilCharPosCheckedBytecode(&orig);
|
||||
CreatePeepholeSkipUntilCharPosCheckedBytecode(&opt);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_CHECK_CURRENT_POSITION) +
|
||||
RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR_UNCHECKED) +
|
||||
RegExpBytecodeLength(BC_CHECK_CHAR) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected =
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_POS_CHECKED) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_CHAR_POS_CHECKED, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT, array_optimized->get(RegExpBytecodeLength(
|
||||
BC_SKIP_UNTIL_CHAR_POS_CHECKED)));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilCharAndBytecode(RegExpMacroAssembler* m) {
|
||||
Label start;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true, 1, 2);
|
||||
m->CheckCharacterAfterAnd('x', 0xFF, nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilCharAnd) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilCharAndBytecode(&orig);
|
||||
CreatePeepholeSkipUntilCharAndBytecode(&opt);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_CHECK_CURRENT_POSITION) +
|
||||
RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR_UNCHECKED) +
|
||||
RegExpBytecodeLength(BC_AND_CHECK_CHAR) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected = RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_AND) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_CHAR_AND, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT,
|
||||
array_optimized->get(RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_AND)));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilCharOrCharBytecode(RegExpMacroAssembler* m) {
|
||||
Label start;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->CheckCharacter('y', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilCharOrChar) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilCharOrCharBytecode(&orig);
|
||||
CreatePeepholeSkipUntilCharOrCharBytecode(&opt);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
|
||||
RegExpBytecodeLength(BC_CHECK_CHAR) +
|
||||
RegExpBytecodeLength(BC_CHECK_CHAR) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected =
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_OR_CHAR) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_CHAR_OR_CHAR, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT, array_optimized->get(
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_CHAR_OR_CHAR)));
|
||||
}
|
||||
|
||||
void CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(RegExpMacroAssembler* m,
|
||||
Factory* factory) {
|
||||
Handle<ByteArray> bit_table = factory->NewByteArray(
|
||||
RegExpMacroAssembler::kTableSize, AllocationType::kOld);
|
||||
for (uint32_t i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
|
||||
bit_table->set(i, 0);
|
||||
}
|
||||
|
||||
Label start, end, advance;
|
||||
m->Bind(&start);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacterGT('x', nullptr);
|
||||
m->CheckBitInTable(bit_table, &advance);
|
||||
m->GoTo(&end);
|
||||
m->Bind(&advance);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&start);
|
||||
m->Bind(&end);
|
||||
}
|
||||
|
||||
TEST(PeepholeSkipUntilGtOrNotBitInTable) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(&orig, factory);
|
||||
CreatePeepholeSkipUntilGtOrNotBitInTableBytecode(&opt, factory);
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
int length = array->length();
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
int length_optimized = array_optimized->length();
|
||||
|
||||
int length_expected = RegExpBytecodeLength(BC_LOAD_CURRENT_CHAR) +
|
||||
RegExpBytecodeLength(BC_CHECK_GT) +
|
||||
RegExpBytecodeLength(BC_CHECK_BIT_IN_TABLE) +
|
||||
RegExpBytecodeLength(BC_GOTO) +
|
||||
RegExpBytecodeLength(BC_ADVANCE_CP_AND_GOTO) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
int length_optimized_expected =
|
||||
RegExpBytecodeLength(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) +
|
||||
RegExpBytecodeLength(BC_POP_BT);
|
||||
|
||||
CHECK_EQ(length, length_expected);
|
||||
CHECK_EQ(length_optimized, length_optimized_expected);
|
||||
|
||||
CHECK_EQ(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, array_optimized->get(0));
|
||||
CHECK_EQ(BC_POP_BT, array_optimized->get(RegExpBytecodeLength(
|
||||
BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE)));
|
||||
}
|
||||
|
||||
void CreatePeepholeLabelFixupsInsideBytecode(RegExpMacroAssembler* m,
|
||||
Label* dummy_before,
|
||||
Label* dummy_after,
|
||||
Label* dummy_inside) {
|
||||
Label loop;
|
||||
m->Bind(dummy_before);
|
||||
m->LoadCurrentCharacter(0, dummy_before);
|
||||
m->CheckCharacter('a', dummy_after);
|
||||
m->CheckCharacter('b', dummy_inside);
|
||||
m->Bind(&loop);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->Bind(dummy_inside);
|
||||
m->CheckCharacter('y', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&loop);
|
||||
m->Bind(dummy_after);
|
||||
m->LoadCurrentCharacter(0, dummy_before);
|
||||
m->CheckCharacter('a', dummy_after);
|
||||
m->CheckCharacter('b', dummy_inside);
|
||||
}
|
||||
|
||||
TEST(PeepholeLabelFixupsInside) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
{
|
||||
Label dummy_before, dummy_after, dummy_inside;
|
||||
CreatePeepholeLabelFixupsInsideBytecode(&opt, &dummy_before, &dummy_after,
|
||||
&dummy_inside);
|
||||
}
|
||||
Label dummy_before, dummy_after, dummy_inside;
|
||||
CreatePeepholeLabelFixupsInsideBytecode(&orig, &dummy_before, &dummy_after,
|
||||
&dummy_inside);
|
||||
|
||||
CHECK_EQ(0x00, dummy_before.pos());
|
||||
CHECK_EQ(0x28, dummy_inside.pos());
|
||||
CHECK_EQ(0x38, dummy_after.pos());
|
||||
|
||||
const Label* labels[] = {&dummy_before, &dummy_after, &dummy_inside};
|
||||
const int label_positions[4][3] = {
|
||||
{0x04, 0x3C}, // dummy_before
|
||||
{0x0C, 0x44}, // dummy after
|
||||
{0x14, 0x4C} // dummy inside
|
||||
};
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
|
||||
for (int label_idx = 0; label_idx < 3; label_idx++) {
|
||||
for (int pos_idx = 0; pos_idx < 2; pos_idx++) {
|
||||
CHECK_EQ(labels[label_idx]->pos(),
|
||||
array->get(label_positions[label_idx][pos_idx]));
|
||||
}
|
||||
}
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
|
||||
const int pos_fixups[] = {
|
||||
0, // Position before optimization should be unchanged.
|
||||
4, // Position after first replacement should be 4 (optimized size (20) -
|
||||
// original size (32) + preserve length (16)).
|
||||
};
|
||||
const int target_fixups[] = {
|
||||
0, // dummy_before should be unchanged
|
||||
4, // dummy_inside should be 4
|
||||
4 // dummy_after should be 4
|
||||
};
|
||||
|
||||
for (int label_idx = 0; label_idx < 3; label_idx++) {
|
||||
for (int pos_idx = 0; pos_idx < 2; pos_idx++) {
|
||||
int label_pos = label_positions[label_idx][pos_idx] + pos_fixups[pos_idx];
|
||||
int jump_address = *reinterpret_cast<uint32_t*>(
|
||||
array_optimized->GetDataStartAddress() + label_pos);
|
||||
int expected_jump_address =
|
||||
labels[label_idx]->pos() + target_fixups[label_idx];
|
||||
CHECK_EQ(expected_jump_address, jump_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CreatePeepholeLabelFixupsComplexBytecode(RegExpMacroAssembler* m,
|
||||
Label* dummy_before,
|
||||
Label* dummy_between,
|
||||
Label* dummy_after,
|
||||
Label* dummy_inside) {
|
||||
Label loop1, loop2;
|
||||
m->Bind(dummy_before);
|
||||
m->LoadCurrentCharacter(0, dummy_before);
|
||||
m->CheckCharacter('a', dummy_between);
|
||||
m->CheckCharacter('b', dummy_after);
|
||||
m->CheckCharacter('c', dummy_inside);
|
||||
m->Bind(&loop1);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->CheckCharacter('y', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&loop1);
|
||||
m->Bind(dummy_between);
|
||||
m->LoadCurrentCharacter(0, dummy_before);
|
||||
m->CheckCharacter('a', dummy_between);
|
||||
m->CheckCharacter('b', dummy_after);
|
||||
m->CheckCharacter('c', dummy_inside);
|
||||
m->Bind(&loop2);
|
||||
m->LoadCurrentCharacter(0, nullptr, true);
|
||||
m->CheckCharacter('x', nullptr);
|
||||
m->Bind(dummy_inside);
|
||||
m->CheckCharacter('y', nullptr);
|
||||
m->AdvanceCurrentPosition(1);
|
||||
m->GoTo(&loop2);
|
||||
m->Bind(dummy_after);
|
||||
m->LoadCurrentCharacter(0, dummy_before);
|
||||
m->CheckCharacter('a', dummy_between);
|
||||
m->CheckCharacter('b', dummy_after);
|
||||
m->CheckCharacter('c', dummy_inside);
|
||||
}
|
||||
|
||||
TEST(PeepholeLabelFixupsComplex) {
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
Isolate* isolate = CcTest::i_isolate();
|
||||
Factory* factory = isolate->factory();
|
||||
HandleScope scope(isolate);
|
||||
|
||||
RegExpBytecodeGenerator orig(CcTest::i_isolate(), &zone);
|
||||
RegExpBytecodeGenerator opt(CcTest::i_isolate(), &zone);
|
||||
|
||||
{
|
||||
Label dummy_before, dummy_between, dummy_after, dummy_inside;
|
||||
CreatePeepholeLabelFixupsComplexBytecode(
|
||||
&opt, &dummy_before, &dummy_between, &dummy_after, &dummy_inside);
|
||||
}
|
||||
Label dummy_before, dummy_between, dummy_after, dummy_inside;
|
||||
CreatePeepholeLabelFixupsComplexBytecode(&orig, &dummy_before, &dummy_between,
|
||||
&dummy_after, &dummy_inside);
|
||||
|
||||
CHECK_EQ(0x00, dummy_before.pos());
|
||||
CHECK_EQ(0x40, dummy_between.pos());
|
||||
CHECK_EQ(0x70, dummy_inside.pos());
|
||||
CHECK_EQ(0x80, dummy_after.pos());
|
||||
|
||||
const Label* labels[] = {&dummy_before, &dummy_between, &dummy_after,
|
||||
&dummy_inside};
|
||||
const int label_positions[4][3] = {
|
||||
{0x04, 0x44, 0x84}, // dummy_before
|
||||
{0x0C, 0x4C, 0x8C}, // dummy between
|
||||
{0x14, 0x54, 0x94}, // dummy after
|
||||
{0x1C, 0x5C, 0x9C} // dummy inside
|
||||
};
|
||||
|
||||
Handle<String> source = factory->NewStringFromStaticChars("dummy");
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = false;
|
||||
Handle<ByteArray> array = Handle<ByteArray>::cast(orig.GetCode(source));
|
||||
|
||||
for (int label_idx = 0; label_idx < 4; label_idx++) {
|
||||
for (int pos_idx = 0; pos_idx < 3; pos_idx++) {
|
||||
CHECK_EQ(labels[label_idx]->pos(),
|
||||
array->get(label_positions[label_idx][pos_idx]));
|
||||
}
|
||||
}
|
||||
|
||||
i::FLAG_regexp_peephole_optimization = true;
|
||||
Handle<ByteArray> array_optimized =
|
||||
Handle<ByteArray>::cast(opt.GetCode(source));
|
||||
|
||||
const int pos_fixups[] = {
|
||||
0, // Position before optimization should be unchanged.
|
||||
-12, // Position after first replacement should be -12 (optimized size =
|
||||
// 20 - 32 = original size).
|
||||
-8 // Position after second replacement should be -8 (-12 from first
|
||||
// optimization -12 from second optimization + 16 preserved
|
||||
// bytecodes).
|
||||
};
|
||||
const int target_fixups[] = {
|
||||
0, // dummy_before should be unchanged
|
||||
-12, // dummy_between should be -12
|
||||
-8, // dummy_inside should be -8
|
||||
-8 // dummy_after should be -8
|
||||
};
|
||||
|
||||
for (int label_idx = 0; label_idx < 4; label_idx++) {
|
||||
for (int pos_idx = 0; pos_idx < 3; pos_idx++) {
|
||||
int label_pos = label_positions[label_idx][pos_idx] + pos_fixups[pos_idx];
|
||||
int jump_address = *reinterpret_cast<uint32_t*>(
|
||||
array_optimized->GetDataStartAddress() + label_pos);
|
||||
int expected_jump_address =
|
||||
labels[label_idx]->pos() + target_fixups[label_idx];
|
||||
CHECK_EQ(expected_jump_address, jump_address);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef CHECK_PARSE_ERROR
|
||||
#undef CHECK_SIMPLE
|
||||
#undef CHECK_MIN_MAX
|
||||
|
67
tools/regexp-sequences.py
Executable file
67
tools/regexp-sequences.py
Executable file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2019 the V8 project authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
"""
|
||||
python %prog trace-file
|
||||
|
||||
Parses output generated by v8 with flag --trace-regexp-bytecodes and generates
|
||||
a list of the most common sequences.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import sys
|
||||
import re
|
||||
import collections
|
||||
|
||||
def parse(file, seqlen):
|
||||
# example:
|
||||
# pc = 00, sp = 0, curpos = 0, curchar = 0000000a ..., bc = PUSH_BT, 02, 00, 00, 00, e8, 00, 00, 00 .......
|
||||
rx = re.compile(r'pc = (?P<pc>[0-9a-f]+), sp = (?P<sp>\d+), '
|
||||
r'curpos = (?P<curpos>\d+), curchar = (?P<char_hex>[0-9a-f]+) '
|
||||
r'(:?\.|\()(?P<char>\.|\w)(:?\.|\)), bc = (?P<bc>\w+), .*')
|
||||
total = 0
|
||||
bc_cnt = [None] * seqlen
|
||||
for i in xrange(seqlen):
|
||||
bc_cnt[i] = {}
|
||||
last = [None] * seqlen
|
||||
with open(file) as f:
|
||||
l = f.readline()
|
||||
while l:
|
||||
l = l.strip()
|
||||
if l.startswith("Start bytecode interpreter"):
|
||||
for i in xrange(seqlen):
|
||||
last[i] = collections.deque(maxlen=i+1)
|
||||
|
||||
match = rx.search(l)
|
||||
if match:
|
||||
total += 1
|
||||
bc = match.group('bc')
|
||||
for i in xrange(seqlen):
|
||||
last[i].append(bc)
|
||||
key = ' --> '.join(last[i])
|
||||
bc_cnt[i][key] = bc_cnt[i].get(key,0) + 1
|
||||
|
||||
l = f.readline()
|
||||
return bc_cnt, total
|
||||
|
||||
def print_most_common(d, seqlen, total):
|
||||
sorted_d = sorted(d.items(), key=lambda kv: kv[1], reverse=True)
|
||||
for (k,v) in sorted_d:
|
||||
if v*100/total < 1.0:
|
||||
return
|
||||
print("{}: {} ({} %)".format(k,v,(v*100/total)))
|
||||
|
||||
def main(argv):
|
||||
max_seq = 7
|
||||
bc_cnt, total = parse(argv[1],max_seq)
|
||||
for i in xrange(max_seq):
|
||||
print()
|
||||
print("Most common of length {}".format(i+1))
|
||||
print()
|
||||
print_most_common(bc_cnt[i], i, total)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv)
|
Loading…
Reference in New Issue
Block a user