[regexp] Add dedicated flags for printing regexp code and bytecode
Printing regexp code used to behind the generic --print-code flag, but there was no way to distinguish between irregexp-generated code; and printing regexp bytecode was not supported at all (the --trace-regexp-bytecodes flag *did* exist, but prints the execution trace at runtime and not the generated bytecode sequence). This CL adds two new flags: --print-regexp-code --print-regexp-bytecode Regexp code is no longer printed as part of --print-code. Example output for --print-regexp-bytecode: generated bytecode for regexp pattern: .(?<!^.) 0x1ddcc614cbd0 0 PUSH_BT, 02, 00, 00, 00, c0, 00, 00, 00 ....... 0x1ddcc614cbd8 8 LOAD_CURRENT_CHAR, 11, 00, 00, 00, b0, 00, 00, 00 ....... 0x1ddcc614cbe0 10 CHECK_CHAR, 18, 0a, 00, 00, b0, 00, 00, 00 ....... 0x1ddcc614cbe8 18 CHECK_CHAR, 18, 0d, 00, 00, b0, 00, 00, 00 ....... 0x1ddcc614cbf0 20 PUSH_CP, 01, 00, 00, 00 ... Bug: chromium:996391 Change-Id: I731defbd7cf9ed29753a39bb1d7205dc136ca950 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1773249 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Peter Marshall <petermarshall@chromium.org> Cr-Commit-Position: refs/heads/master@{#63442}
This commit is contained in:
parent
e39c701963
commit
eebb18d33e
@ -1514,7 +1514,6 @@ DEFINE_BOOL(trace_elements_transitions, false, "trace elements transitions")
|
||||
DEFINE_BOOL(trace_creation_allocation_sites, false,
|
||||
"trace the creation of allocation sites")
|
||||
|
||||
// codegen-ia32.cc / codegen-arm.cc
|
||||
DEFINE_BOOL(print_code, false, "print generated code")
|
||||
DEFINE_BOOL(print_opt_code, false, "print optimized code")
|
||||
DEFINE_STRING(print_opt_code_filter, "*", "filter for printing optimized code")
|
||||
@ -1522,6 +1521,8 @@ DEFINE_BOOL(print_code_verbose, false, "print more information for code")
|
||||
DEFINE_BOOL(print_builtin_code, false, "print generated code for builtins")
|
||||
DEFINE_STRING(print_builtin_code_filter, "*",
|
||||
"filter for printing builtin code")
|
||||
DEFINE_BOOL(print_regexp_code, false, "print generated regexp code")
|
||||
DEFINE_BOOL(print_regexp_bytecode, false, "print generated regexp bytecode")
|
||||
DEFINE_BOOL(print_builtin_size, false, "print code size for builtins")
|
||||
|
||||
#ifdef ENABLE_DISASSEMBLER
|
||||
@ -1538,6 +1539,7 @@ DEFINE_IMPLICATION(print_all_code, print_code)
|
||||
DEFINE_IMPLICATION(print_all_code, print_opt_code)
|
||||
DEFINE_IMPLICATION(print_all_code, print_code_verbose)
|
||||
DEFINE_IMPLICATION(print_all_code, print_builtin_code)
|
||||
DEFINE_IMPLICATION(print_all_code, print_regexp_code)
|
||||
DEFINE_IMPLICATION(print_all_code, code_comments)
|
||||
#endif
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
|
||||
#define V8_REGEXP_REGEXP_BYTECODES_H_
|
||||
|
||||
#include "src/base/macros.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
@ -70,14 +72,40 @@ const int BYTECODE_SHIFT = 8;
|
||||
V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
|
||||
V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) static const int BC_##name = code;
|
||||
#define COUNT(...) +1
|
||||
static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
|
||||
#undef COUNT
|
||||
|
||||
// Just making sure we assigned values above properly. They should be
|
||||
// contiguous, strictly increasing, and start at 0.
|
||||
// TODO(jgruber): Do not explicitly assign values, instead generate them
|
||||
// implicitly from the list order.
|
||||
STATIC_ASSERT(kRegExpBytecodeCount == 53);
|
||||
|
||||
#define DECLARE_BYTECODES(name, code, length) \
|
||||
static constexpr int BC_##name = code;
|
||||
BYTECODE_ITERATOR(DECLARE_BYTECODES)
|
||||
#undef DECLARE_BYTECODES
|
||||
|
||||
#define DECLARE_BYTECODE_LENGTH(name, code, length) \
|
||||
static const int BC_##name##_LENGTH = length;
|
||||
static constexpr int kRegExpBytecodeLengths[] = {
|
||||
#define DECLARE_BYTECODE_LENGTH(name, code, length) length,
|
||||
BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
|
||||
#undef DECLARE_BYTECODE_LENGTH
|
||||
};
|
||||
|
||||
inline constexpr int RegExpBytecodeLength(int bytecode) {
|
||||
return kRegExpBytecodeLengths[bytecode];
|
||||
}
|
||||
|
||||
static const char* const kRegExpBytecodeNames[] = {
|
||||
#define DECLARE_BYTECODE_NAME(name, ...) #name,
|
||||
BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
|
||||
#undef DECLARE_BYTECODE_NAME
|
||||
};
|
||||
|
||||
inline const char* RegExpBytecodeName(int bytecode) {
|
||||
return kRegExpBytecodeNames[bytecode];
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -5,13 +5,11 @@
|
||||
#include "src/regexp/regexp-compiler.h"
|
||||
|
||||
#include "src/base/safe_conversions.h"
|
||||
#include "src/diagnostics/code-tracer.h"
|
||||
#include "src/execution/isolate.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/regexp/regexp-macro-assembler-arch.h"
|
||||
#include "src/regexp/regexp-macro-assembler-tracer.h"
|
||||
#include "src/strings/unicode-inl.h"
|
||||
#include "src/utils/ostreams.h"
|
||||
#include "src/zone/zone-list-inl.h"
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
@ -273,13 +271,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
|
||||
Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
|
||||
isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
|
||||
work_list_ = nullptr;
|
||||
#ifdef ENABLE_DISASSEMBLER
|
||||
if (FLAG_print_code && code->IsCode()) {
|
||||
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
|
||||
OFStream os(trace_scope.file());
|
||||
Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
if (FLAG_trace_regexp_assembler) {
|
||||
delete macro_assembler_;
|
||||
|
@ -30,9 +30,10 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||
int len, Vector<const uc16> subject,
|
||||
bool unicode) {
|
||||
namespace {
|
||||
|
||||
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
|
||||
Vector<const uc16> subject, bool unicode) {
|
||||
Address offset_a =
|
||||
reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
|
||||
Address offset_b =
|
||||
@ -42,9 +43,8 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||
offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
|
||||
}
|
||||
|
||||
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||
int len, Vector<const uint8_t> subject,
|
||||
bool unicode) {
|
||||
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
|
||||
Vector<const uint8_t> subject, bool unicode) {
|
||||
// For Latin1 characters the unicode flag makes no difference.
|
||||
for (int i = 0; i < len; i++) {
|
||||
unsigned int old_char = subject[from++];
|
||||
@ -63,42 +63,48 @@ static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
|
||||
return true;
|
||||
}
|
||||
|
||||
void DisassembleSingleBytecode(const byte* code_base, const byte* pc) {
|
||||
PrintF("%s", RegExpBytecodeName(*pc));
|
||||
|
||||
// Args and the bytecode as hex.
|
||||
for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
|
||||
PrintF(", %02x", pc[i]);
|
||||
}
|
||||
PrintF(" ");
|
||||
|
||||
// Args as ascii.
|
||||
for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
|
||||
unsigned char b = pc[i];
|
||||
PrintF("%c", std::isprint(b) ? b : '.');
|
||||
}
|
||||
PrintF("\n");
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void TraceInterpreter(const byte* code_base, const byte* pc,
|
||||
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
|
||||
int stack_depth, int current_position,
|
||||
uint32_t current_char, int bytecode_length,
|
||||
const char* bytecode_name) {
|
||||
if (FLAG_trace_regexp_bytecodes) {
|
||||
bool printable = (current_char < 127 && current_char >= 32);
|
||||
const bool printable = std::isprint(current_char);
|
||||
const char* format =
|
||||
printable
|
||||
? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s"
|
||||
: "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
|
||||
? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
|
||||
: "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
|
||||
PrintF(format, pc - code_base, stack_depth, current_position, current_char,
|
||||
printable ? current_char : '.', bytecode_name);
|
||||
for (int i = 0; i < bytecode_length; i++) {
|
||||
printf(", %02x", pc[i]);
|
||||
}
|
||||
printf(" ");
|
||||
for (int i = 1; i < bytecode_length; i++) {
|
||||
unsigned char b = pc[i];
|
||||
if (b < 127 && b >= 32) {
|
||||
printf("%c", b);
|
||||
} else {
|
||||
printf(".");
|
||||
}
|
||||
}
|
||||
printf("\n");
|
||||
printable ? current_char : '.');
|
||||
|
||||
DisassembleSingleBytecode(code_base, pc);
|
||||
}
|
||||
}
|
||||
#endif // DEBUG
|
||||
|
||||
static int32_t Load32Aligned(const byte* pc) {
|
||||
int32_t Load32Aligned(const byte* pc) {
|
||||
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
|
||||
return *reinterpret_cast<const int32_t*>(pc);
|
||||
}
|
||||
|
||||
static int32_t Load16Aligned(const byte* pc) {
|
||||
int32_t Load16Aligned(const byte* pc) {
|
||||
DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
|
||||
return *reinterpret_cast<const uint16_t*>(pc);
|
||||
}
|
||||
@ -140,8 +146,6 @@ class BacktrackStack {
|
||||
DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
|
||||
};
|
||||
|
||||
namespace {
|
||||
|
||||
IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
|
||||
RegExp::CallOrigin call_origin) {
|
||||
CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
|
||||
@ -269,7 +273,7 @@ IrregexpInterpreter::Result HandleInterrupts(
|
||||
// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
|
||||
// potentially be executed unconditionally, reducing memory stall.
|
||||
#define ADVANCE(name) \
|
||||
next_pc = pc + BC_##name##_LENGTH; \
|
||||
next_pc = pc + RegExpBytecodeLength(BC_##name); \
|
||||
DECODE()
|
||||
#define SET_PC_FROM_OFFSET(offset) \
|
||||
next_pc = code_base + offset; \
|
||||
@ -278,8 +282,8 @@ IrregexpInterpreter::Result HandleInterrupts(
|
||||
#ifdef DEBUG
|
||||
#define BYTECODE(name) \
|
||||
BC_LABEL(name) \
|
||||
TraceInterpreter(code_base, pc, backtrack_stack.sp(), current, current_char, \
|
||||
BC_##name##_LENGTH, #name);
|
||||
MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
|
||||
current_char, RegExpBytecodeLength(BC_##name), #name);
|
||||
#else
|
||||
#define BYTECODE(name) BC_LABEL(name)
|
||||
#endif // DEBUG
|
||||
@ -779,6 +783,25 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
|
||||
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
void IrregexpInterpreter::Disassemble(ByteArray byte_array,
|
||||
const std::string& pattern) {
|
||||
DisallowHeapAllocation no_gc;
|
||||
|
||||
PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern.c_str());
|
||||
|
||||
const byte* const code_base = byte_array.GetDataStartAddress();
|
||||
const int byte_array_length = byte_array.length();
|
||||
ptrdiff_t offset = 0;
|
||||
|
||||
while (offset < byte_array_length) {
|
||||
const byte* const pc = code_base + offset;
|
||||
PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
|
||||
DisassembleSingleBytecode(code_base, pc);
|
||||
offset += RegExpBytecodeLength(*pc);
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
IrregexpInterpreter::Result IrregexpInterpreter::Match(
|
||||
Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
|
||||
|
@ -41,6 +41,8 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
|
||||
int registers_length, int start_position,
|
||||
RegExp::CallOrigin call_origin);
|
||||
|
||||
static void Disassemble(ByteArray byte_array, const std::string& pattern);
|
||||
|
||||
private:
|
||||
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
|
||||
int* registers, int registers_length, int start_position,
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include "src/regexp/regexp.h"
|
||||
|
||||
#include "src/codegen/compilation-cache.h"
|
||||
#include "src/diagnostics/code-tracer.h"
|
||||
#include "src/heap/heap-inl.h"
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
#include "src/regexp/regexp-bytecode-generator.h"
|
||||
@ -14,6 +15,7 @@
|
||||
#include "src/regexp/regexp-macro-assembler-arch.h"
|
||||
#include "src/regexp/regexp-parser.h"
|
||||
#include "src/strings/string-search.h"
|
||||
#include "src/utils/ostreams.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -572,14 +574,15 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
|
||||
|
||||
subject = String::Flatten(isolate, subject);
|
||||
|
||||
// Prepare space for the return values.
|
||||
#ifdef DEBUG
|
||||
if (FLAG_regexp_interpret_all && FLAG_trace_regexp_bytecodes) {
|
||||
if (FLAG_trace_regexp_bytecodes && regexp->ShouldProduceBytecode()) {
|
||||
String pattern = regexp->Pattern();
|
||||
PrintF("\n\nRegexp match: /%s/\n\n", pattern.ToCString().get());
|
||||
PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
|
||||
}
|
||||
#endif
|
||||
|
||||
// Prepare space for the return values.
|
||||
int required_registers = RegExp::IrregexpPrepare(isolate, regexp, subject);
|
||||
if (required_registers < 0) {
|
||||
// Compiling failed with an exception.
|
||||
@ -830,6 +833,26 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
|
||||
RegExpCompiler::CompilationResult result = compiler.Assemble(
|
||||
isolate, macro_assembler.get(), node, data->capture_count, pattern);
|
||||
|
||||
// Code / bytecode printing.
|
||||
{
|
||||
#ifdef ENABLE_DISASSEMBLER
|
||||
if (FLAG_print_regexp_code &&
|
||||
data->compilation_target == RegExpCompilationTarget::kNative) {
|
||||
CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
|
||||
OFStream os(trace_scope.file());
|
||||
Handle<Code> c(Code::cast(result.code), isolate);
|
||||
auto pattern_cstring = pattern->ToCString();
|
||||
c->Disassemble(pattern_cstring.get(), os);
|
||||
}
|
||||
#endif
|
||||
if (FLAG_print_regexp_bytecode &&
|
||||
data->compilation_target == RegExpCompilationTarget::kBytecode) {
|
||||
Handle<ByteArray> bytecode(ByteArray::cast(result.code), isolate);
|
||||
auto pattern_cstring = pattern->ToCString();
|
||||
IrregexpInterpreter::Disassemble(*bytecode, pattern_cstring.get());
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_correctness_fuzzer_suppressions &&
|
||||
strncmp(result.error_message, "Stack overflow", 15) == 0) {
|
||||
FATAL("Aborting on stack overflow");
|
||||
|
Loading…
Reference in New Issue
Block a user