[tools][wasm] wami: full-module annotated hex dump

This adds a bunch of tracing hooks to the module decoder and uses
them to support "annotated hexdump" output for full modules in wami:

$ out/x64.release/wami my_module.wasm --full-hexdump

Change-Id: I5821d940b5ec236df9708eecd0124172d8893ffd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3754741
Reviewed-by: Manos Koukoutos <manoskouk@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81791}
This commit is contained in:
Jakob Kummerow 2022-07-18 20:36:30 +02:00 committed by V8 LUCI CQ
parent 4cd8ebdc06
commit d4a4d69cca
17 changed files with 691 additions and 113 deletions

View File

@ -52,7 +52,7 @@ void Histogram::AddSample(int sample) {
}
}
V8_EXPORT_PRIVATE void* Histogram::CreateHistogram() const {
void* Histogram::CreateHistogram() const {
return counters_->CreateHistogram(name_, min_, max_, num_buckets_);
}

View File

@ -155,7 +155,7 @@ class StatsCounter {
class Histogram {
public:
// Add a single sample to this histogram.
void AddSample(int sample);
V8_EXPORT_PRIVATE void AddSample(int sample);
// Returns true if this histogram is enabled.
bool Enabled() { return histogram_ != nullptr; }

View File

@ -27,7 +27,7 @@ namespace wasm {
// which is used in module-instantiate. We merge two distinct functionalities
// in one class to reduce the number of WasmFullDecoder instantiations, and thus
// V8 binary code size.
class ConstantExpressionInterface {
class V8_EXPORT_PRIVATE ConstantExpressionInterface {
public:
static constexpr Decoder::ValidateFlag validate = Decoder::kFullValidation;
static constexpr DecodingMode decoding_mode = kConstantExpression;

View File

@ -183,17 +183,26 @@ class Decoder {
// Reads a 8-bit unsigned integer (byte) and advances {pc_}.
uint8_t consume_u8(const char* name = "uint8_t") {
return consume_little_endian<uint8_t>(name);
return consume_little_endian<uint8_t, kTrace>(name);
}
template <class Tracer>
uint8_t consume_u8(const char* name, Tracer& tracer) {
tracer.Bytes(pc_, sizeof(uint8_t));
tracer.Description(name);
return consume_little_endian<uint8_t, kNoTrace>(name);
}
// Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
uint16_t consume_u16(const char* name = "uint16_t") {
return consume_little_endian<uint16_t>(name);
return consume_little_endian<uint16_t, kTrace>(name);
}
// Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
uint32_t consume_u32(const char* name = "uint32_t") {
return consume_little_endian<uint32_t>(name);
template <class Tracer>
uint32_t consume_u32(const char* name, Tracer& tracer) {
tracer.Bytes(pc_, sizeof(uint32_t));
tracer.Description(name);
return consume_little_endian<uint32_t, kNoTrace>(name);
}
// Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
@ -204,6 +213,16 @@ class Decoder {
pc_ += length;
return result;
}
template <class Tracer>
uint32_t consume_u32v(const char* name, Tracer& tracer) {
uint32_t length = 0;
uint32_t result =
read_leb<uint32_t, kFullValidation, kNoTrace>(pc_, &length, name);
tracer.Bytes(pc_, length);
tracer.Description(name);
pc_ += length;
return result;
}
// Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
int32_t consume_i32v(const char* name = "var_int32") {
@ -215,10 +234,13 @@ class Decoder {
}
// Reads a LEB128 variable-length unsigned 64-bit integer and advances {pc_}.
uint64_t consume_u64v(const char* name = "var_uint64") {
template <class Tracer>
uint64_t consume_u64v(const char* name, Tracer& tracer) {
uint32_t length = 0;
uint64_t result =
read_leb<uint64_t, kFullValidation, kTrace>(pc_, &length, name);
read_leb<uint64_t, kFullValidation, kNoTrace>(pc_, &length, name);
tracer.Bytes(pc_, length);
tracer.Description(name);
pc_ += length;
return result;
}
@ -242,6 +264,12 @@ class Decoder {
pc_ = end_;
}
}
template <class Tracer>
void consume_bytes(uint32_t size, const char* name, Tracer& tracer) {
tracer.Bytes(pc_, size);
tracer.Description(name);
consume_bytes(size, nullptr);
}
// Check that at least {size} bytes exist between {pc_} and {end_}.
bool checkAvailable(uint32_t size) {
@ -397,9 +425,9 @@ class Decoder {
return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc));
}
template <typename IntType>
template <typename IntType, TraceFlag trace>
IntType consume_little_endian(const char* name) {
TRACE(" +%u %-20s: ", pc_offset(), name);
TRACE_IF(trace, " +%u %-20s: ", pc_offset(), name);
if (!checkAvailable(sizeof(IntType))) {
traceOffEnd();
pc_ = end_;
@ -407,7 +435,7 @@ class Decoder {
}
IntType val = read_little_endian<IntType, kNoValidation>(pc_, name);
traceByteRange(pc_, pc_ + sizeof(IntType));
TRACE("= %d\n", val);
TRACE_IF(trace, "= %d\n", val);
pc_ += sizeof(IntType);
return val;
}

View File

@ -293,9 +293,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
}
}
HeapType consume_heap_type(Decoder* decoder, const WasmModule* module,
const WasmFeatures& enabled);
// Read a value type starting at address {pc} using {decoder}.
// No bytes are consumed.
// The length of the read value type is written in {length}.

View File

@ -16,17 +16,6 @@ namespace v8 {
namespace internal {
namespace wasm {
namespace value_type_reader {
HeapType consume_heap_type(Decoder* decoder, const WasmModule* module,
const WasmFeatures& enabled) {
uint32_t length;
HeapType result = value_type_reader::read_heap_type<Decoder::kFullValidation>(
decoder, decoder->pc(), &length, module, enabled);
decoder->consume_bytes(length, "heap type");
return result;
}
} // namespace value_type_reader
bool DecodeLocalDecls(const WasmFeatures& enabled, BodyLocalDecls* decls,
const WasmModule* module, const byte* start,
const byte* end) {

View File

@ -40,6 +40,26 @@ class NoTracer {
void StartOffset(uint32_t offset) {}
void ElementOffset(uint32_t offset) {}
void DataOffset(uint32_t offset) {}
// Hooks for annotated hex dumps.
void Bytes(const byte* start, uint32_t count) {}
void Description(const char* desc) {}
void Description(const char* desc, size_t length) {}
void Description(uint32_t number) {}
void Description(ValueType type) {}
void Description(HeapType type) {}
void Description(const FunctionSig* sig) {}
void NextLine() {}
void NextLineIfFull() {}
void NextLineIfNonEmpty() {}
void InitializerExpression(const byte* start, const byte* end,
ValueType expected_type) {}
void FunctionBody(const WasmFunction* func, const byte* start) {}
void FunctionName(uint32_t func_index) {}
void NameSection(const byte* start, const byte* end, uint32_t offset) {}
};
constexpr char kNameString[] = "name";
@ -74,14 +94,23 @@ inline bool validate_utf8(Decoder* decoder, WireBytesRef string) {
// Reads a length-prefixed string, checking that it is within bounds. Returns
// the offset of the string, and the length as an out parameter.
template <class Tracer>
inline WireBytesRef consume_string(Decoder* decoder,
unibrow::Utf8Variant grammar,
const char* name) {
uint32_t length = decoder->consume_u32v("string length");
const char* name, Tracer& tracer) {
tracer.Description(name);
uint32_t length = decoder->consume_u32v(" length:", tracer);
tracer.Description(length);
tracer.NextLine();
uint32_t offset = decoder->pc_offset();
const byte* string_start = decoder->pc();
// Consume bytes before validation to guarantee that the string is not oob.
if (length > 0) {
tracer.Bytes(decoder->pc(), length);
tracer.Description(name);
tracer.Description(": ");
tracer.Description(reinterpret_cast<const char*>(decoder->pc()), length);
tracer.NextLine();
decoder->consume_bytes(length, name);
if (decoder->ok()) {
switch (grammar) {
@ -103,12 +132,23 @@ inline WireBytesRef consume_string(Decoder* decoder,
return {offset, decoder->failed() ? 0 : length};
}
inline WireBytesRef consume_utf8_string(Decoder* decoder, const char* name) {
return consume_string(decoder, unibrow::Utf8Variant::kUtf8, name);
inline WireBytesRef consume_string(Decoder* decoder,
unibrow::Utf8Variant grammar,
const char* name) {
NoTracer no_tracer;
return consume_string(decoder, grammar, name, no_tracer);
}
inline SectionCode IdentifyUnknownSectionInternal(Decoder* decoder) {
WireBytesRef string = consume_utf8_string(decoder, "section name");
template <class Tracer>
inline WireBytesRef consume_utf8_string(Decoder* decoder, const char* name,
Tracer& tracer) {
return consume_string(decoder, unibrow::Utf8Variant::kUtf8, name, tracer);
}
template <class Tracer>
inline SectionCode IdentifyUnknownSectionInternal(Decoder* decoder,
Tracer& tracer) {
WireBytesRef string = consume_utf8_string(decoder, "section name", tracer);
if (decoder->failed()) {
return kUnknownSectionCode;
}
@ -143,10 +183,12 @@ inline SectionCode IdentifyUnknownSectionInternal(Decoder* decoder) {
// An iterator over the sections in a wasm binary module.
// Automatically skips all unknown sections.
template <class Tracer>
class WasmSectionIterator {
public:
explicit WasmSectionIterator(Decoder* decoder)
explicit WasmSectionIterator(Decoder* decoder, Tracer& tracer)
: decoder_(decoder),
tracer_(tracer),
section_code_(kUnknownSectionCode),
section_start_(decoder->pc()),
section_end_(decoder->pc()) {
@ -195,6 +237,7 @@ class WasmSectionIterator {
private:
Decoder* decoder_;
Tracer& tracer_;
SectionCode section_code_;
const byte* section_start_;
const byte* payload_start_;
@ -208,9 +251,14 @@ class WasmSectionIterator {
return;
}
section_start_ = decoder_->pc();
uint8_t section_code = decoder_->consume_u8("section code");
tracer_.NextLine(); // Empty line before next section.
uint8_t section_code = decoder_->consume_u8("section kind: ", tracer_);
tracer_.Description(SectionName(static_cast<SectionCode>(section_code)));
tracer_.NextLine();
// Read and check the section size.
uint32_t section_length = decoder_->consume_u32v("section length");
uint32_t section_length = decoder_->consume_u32v("section length", tracer_);
tracer_.Description(section_length);
tracer_.NextLine();
payload_start_ = decoder_->pc();
if (decoder_->checkAvailable(section_length)) {
@ -229,7 +277,7 @@ class WasmSectionIterator {
// beyond the end of the section.
const byte* module_end = decoder_->end();
decoder_->set_end(section_end_);
section_code = IdentifyUnknownSectionInternal(decoder_);
section_code = IdentifyUnknownSectionInternal(decoder_, tracer_);
if (decoder_->ok()) decoder_->set_end(module_end);
// As a side effect, the above function will forward the decoder to after
// the identifier string.
@ -242,9 +290,9 @@ class WasmSectionIterator {
: static_cast<SectionCode>(section_code);
if (section_code_ == kUnknownSectionCode && section_end_ > decoder_->pc()) {
// skip to the end of the unknown section.
// Skip to the end of the unknown section.
uint32_t remaining = static_cast<uint32_t>(section_end_ - decoder_->pc());
decoder_->consume_bytes(remaining, "section payload");
decoder_->consume_bytes(remaining, "section payload", tracer_);
}
}
};
@ -320,7 +368,8 @@ class ModuleDecoderTemplate : public Decoder {
Reset(bytes, offset);
const byte* pos = pc_;
uint32_t magic_word = consume_u32("wasm magic");
uint32_t magic_word = consume_u32("wasm magic", tracer_);
tracer_.NextLine();
#define BYTES(x) (x & 0xFF), (x >> 8) & 0xFF, (x >> 16) & 0xFF, (x >> 24) & 0xFF
if (magic_word != kWasmMagic) {
errorf(pos,
@ -331,7 +380,8 @@ class ModuleDecoderTemplate : public Decoder {
pos = pc_;
{
uint32_t magic_version = consume_u32("wasm version");
uint32_t magic_version = consume_u32("wasm version", tracer_);
tracer_.NextLine();
if (magic_version != kWasmVersion) {
errorf(pos,
"expected version %02x %02x %02x %02x, "
@ -537,9 +587,24 @@ class ModuleDecoderTemplate : public Decoder {
}
}
static constexpr const char* TypeKindName(uint8_t kind) {
switch (kind) {
// clang-format off
case kWasmFunctionTypeCode: return "func";
case kWasmStructTypeCode: return "struct";
case kWasmArrayTypeCode: return "array";
case kWasmFunctionNominalCode: return "function-nominal";
case kWasmStructNominalCode: return "struct-nominal";
case kWasmArrayNominalCode: return "array-nominal";
default: return "unknown";
// clang-format on
}
}
TypeDefinition consume_base_type_definition() {
DCHECK(enabled_features_.has_gc());
uint8_t kind = consume_u8("type kind");
uint8_t kind = consume_u8(" kind: ", tracer_);
tracer_.Description(TypeKindName(kind));
switch (kind) {
case kWasmFunctionTypeCode: {
const FunctionSig* sig = consume_sig(module_->signature_zone.get());
@ -556,10 +621,12 @@ class ModuleDecoderTemplate : public Decoder {
case kWasmFunctionNominalCode:
case kWasmArrayNominalCode:
case kWasmStructNominalCode:
tracer_.NextLine();
errorf(pc() - 1,
"mixing nominal and isorecursive types is not allowed");
return {};
default:
tracer_.NextLine();
errorf(pc() - 1, "unknown type form: %d", kind);
return {};
}
@ -577,7 +644,8 @@ class ModuleDecoderTemplate : public Decoder {
TypeDefinition consume_nominal_type_definition() {
DCHECK(enabled_features_.has_gc());
size_t num_types = module_->types.size();
uint8_t kind = consume_u8("type kind");
uint8_t kind = consume_u8(" kind: ", tracer_);
tracer_.Description(TypeKindName(kind));
switch (kind) {
case kWasmFunctionNominalCode: {
const FunctionSig* sig = consume_sig(module_->signature_zone.get());
@ -623,10 +691,12 @@ class ModuleDecoderTemplate : public Decoder {
case kWasmStructTypeCode:
case kWasmSubtypeCode:
case kWasmRecursiveTypeGroupCode:
tracer_.NextLine();
errorf(pc() - 1,
"mixing nominal and isorecursive types is not allowed");
return {};
default:
tracer_.NextLine();
errorf(pc() - 1, "unknown type form: %d", kind);
return {};
}
@ -636,12 +706,17 @@ class ModuleDecoderTemplate : public Decoder {
DCHECK(enabled_features_.has_gc());
uint8_t kind = read_u8<Decoder::kFullValidation>(pc(), "type kind");
if (kind == kWasmSubtypeCode) {
consume_bytes(1, "subtype definition");
consume_bytes(1, " subtype, ", tracer_);
constexpr uint32_t kMaximumSupertypes = 1;
uint32_t supertype_count =
consume_count("supertype count", kMaximumSupertypes);
uint32_t supertype =
supertype_count == 1 ? consume_u32v("supertype") : kNoSuperType;
uint32_t supertype = supertype_count == 1
? consume_u32v("supertype", tracer_)
: kNoSuperType;
if (supertype_count == 1) {
tracer_.Description(supertype);
tracer_.NextLine();
}
if (!check_supertype(supertype)) return {};
TypeDefinition type = consume_base_type_definition();
type.supertype = supertype;
@ -662,10 +737,14 @@ class ModuleDecoderTemplate : public Decoder {
TRACE("DecodeSignature[%d] module+%d\n", i,
static_cast<int>(pc_ - start_));
uint8_t opcode = read_u8<kFullValidation>(pc(), "signature definition");
tracer_.Bytes(pc_, 1);
tracer_.TypeOffset(pc_offset());
tracer_.Description(" kind: ");
tracer_.Description(TypeKindName(opcode));
tracer_.NextLine();
switch (opcode) {
case kWasmFunctionTypeCode: {
tracer_.TypeOffset(pc_offset());
consume_bytes(1);
consume_bytes(1, "function");
const FunctionSig* sig = consume_sig(module_->signature_zone.get());
if (!ok()) break;
module_->add_signature(sig, kNoSuperType);
@ -721,18 +800,19 @@ class ModuleDecoderTemplate : public Decoder {
static_cast<int>(pc_ - start_));
uint8_t kind = read_u8<Decoder::kFullValidation>(pc(), "type kind");
if (kind == kWasmRecursiveTypeGroupCode) {
consume_bytes(1, "rec. group definition");
consume_bytes(1, "rec. group definition", tracer_);
tracer_.NextLine();
uint32_t group_size =
consume_count("recursive group size", kV8MaxWasmTypes);
if (module_->types.size() + group_size > kV8MaxWasmTypes) {
errorf(pc(), "Type definition count exeeds maximum %zu",
errorf(pc(), "Type definition count exceeds maximum %zu",
kV8MaxWasmTypes);
return;
}
// Reserve space for the current recursive group, so we are
// allowed to reference its elements.
module_->types.reserve(module_->types.size() + group_size);
for (uint32_t i = 0; i < group_size; i++) {
for (uint32_t j = 0; j < group_size; j++) {
tracer_.TypeOffset(pc_offset());
TypeDefinition type = consume_subtype_definition();
if (ok()) module_->add_type(type);
@ -796,10 +876,11 @@ class ModuleDecoderTemplate : public Decoder {
});
WasmImport* import = &module_->import_table.back();
const byte* pos = pc_;
import->module_name = consume_utf8_string(this, "module name");
import->field_name = consume_utf8_string(this, "field name");
import->module_name = consume_utf8_string(this, "module name", tracer_);
import->field_name = consume_utf8_string(this, "field name", tracer_);
import->kind =
static_cast<ImportExportKindCode>(consume_u8("import kind"));
static_cast<ImportExportKindCode>(consume_u8("kind: ", tracer_));
tracer_.Description(ExternalKindName(import->kind));
switch (import->kind) {
case kExternalFunction: {
// ===== Imported function ===========================================
@ -861,6 +942,7 @@ class ModuleDecoderTemplate : public Decoder {
if (global->mutability) {
module_->num_imported_mutable_globals++;
}
tracer_.NextLine();
break;
}
case kExternalTag: {
@ -907,6 +989,7 @@ class ModuleDecoderTemplate : public Decoder {
false, // exported
false}); // declared
WasmFunction* function = &module_->functions.back();
tracer_.FunctionName(module_->num_imported_functions + i);
function->sig_index = consume_sig_index(module_.get(), &function->sig);
if (!ok()) return;
}
@ -979,6 +1062,9 @@ class ModuleDecoderTemplate : public Decoder {
for (uint32_t i = 0; ok() && i < export_table_count; ++i) {
TRACE("DecodeExportTable[%d] module+%d\n", i,
static_cast<int>(pc_ - start_));
tracer_.Description("export #");
tracer_.Description(i);
tracer_.NextLine();
module_->export_table.push_back({
{0, 0}, // name
@ -987,15 +1073,17 @@ class ModuleDecoderTemplate : public Decoder {
});
WasmExport* exp = &module_->export_table.back();
exp->name = consume_utf8_string(this, "field name");
exp->name = consume_utf8_string(this, "field name", tracer_);
const byte* pos = pc();
exp->kind = static_cast<ImportExportKindCode>(consume_u8("export kind"));
exp->kind =
static_cast<ImportExportKindCode>(consume_u8("kind: ", tracer_));
tracer_.Description(ExternalKindName(exp->kind));
tracer_.Description(" ");
switch (exp->kind) {
case kExternalFunction: {
WasmFunction* func = nullptr;
exp->index =
consume_func_index(module_.get(), &func, "export function index");
exp->index = consume_func_index(module_.get(), &func);
if (failed()) break;
DCHECK_NOT_NULL(func);
@ -1013,7 +1101,7 @@ class ModuleDecoderTemplate : public Decoder {
break;
}
case kExternalMemory: {
uint32_t index = consume_u32v("memory index");
uint32_t index = consume_u32v("memory index", tracer_);
// TODO(titzer): This should become more regular
// once we support multiple memories.
if (!module_->has_memory || index != 0) {
@ -1043,6 +1131,7 @@ class ModuleDecoderTemplate : public Decoder {
errorf(pos, "invalid export kind 0x%02x", exp->kind);
break;
}
tracer_.NextLine();
}
// Check for duplicate exports (except for asm.js).
if (ok() && origin_ == kWasmOrigin && module_->export_table.size() > 1) {
@ -1079,8 +1168,8 @@ class ModuleDecoderTemplate : public Decoder {
tracer_.StartOffset(pc_offset());
WasmFunction* func;
const byte* pos = pc_;
module_->start_function_index =
consume_func_index(module_.get(), &func, "start function index");
module_->start_function_index = consume_func_index(module_.get(), &func);
tracer_.NextLine();
if (func &&
(func->sig->parameter_count() > 0 || func->sig->return_count() > 0)) {
error(pos, "invalid start function: non-zero parameter or return count");
@ -1088,12 +1177,13 @@ class ModuleDecoderTemplate : public Decoder {
}
void DecodeElementSection() {
uint32_t element_count =
consume_count("element count", FLAG_wasm_max_table_size);
uint32_t segment_count =
consume_count("segment count", FLAG_wasm_max_table_size);
for (uint32_t i = 0; i < element_count; ++i) {
for (uint32_t i = 0; i < segment_count; ++i) {
tracer_.ElementOffset(pc_offset());
WasmElemSegment segment = consume_element_segment_header();
tracer_.NextLineIfNonEmpty();
if (failed()) return;
DCHECK_NE(segment.type, kWasmBottom);
@ -1118,15 +1208,22 @@ class ModuleDecoderTemplate : public Decoder {
// function compilation.
CalculateGlobalOffsets(module_.get());
uint32_t code_section_start = pc_offset();
uint32_t functions_count = consume_u32v("functions count");
uint32_t functions_count = consume_u32v("functions count", tracer_);
tracer_.Description(functions_count);
tracer_.NextLine();
CheckFunctionsCount(functions_count, code_section_start);
auto inst_traces_it = this->inst_traces_.begin();
std::vector<std::pair<uint32_t, uint32_t>> inst_traces;
for (uint32_t i = 0; ok() && i < functions_count; ++i) {
tracer_.Description("function #");
tracer_.FunctionName(module_->num_imported_functions + i);
tracer_.NextLine();
const byte* pos = pc();
uint32_t size = consume_u32v("body size");
uint32_t size = consume_u32v("body size", tracer_);
tracer_.Description(size);
tracer_.NextLine();
if (size > kV8MaxWasmFunctionSize) {
errorf(pos, "size %u > maximum function size %zu", size,
kV8MaxWasmFunctionSize);
@ -1183,6 +1280,7 @@ class ModuleDecoderTemplate : public Decoder {
WasmFunction* function =
&module_->functions[index + module_->num_imported_functions];
function->code = {offset, length};
tracer_.FunctionBody(function, pc_ - (pc_offset() - offset));
if (verify_functions) {
ModuleWireBytes bytes(module_start_, module_end_);
VerifyFunctionBody(module_->signature_zone->allocator(),
@ -1230,7 +1328,9 @@ class ModuleDecoderTemplate : public Decoder {
}
}
uint32_t source_length = consume_u32v("source size");
uint32_t source_length = consume_u32v("source size", tracer_);
tracer_.Description(source_length);
tracer_.NextLine();
uint32_t source_offset = pc_offset();
if (is_active) {
@ -1241,6 +1341,9 @@ class ModuleDecoderTemplate : public Decoder {
WasmDataSegment* segment = &module_->data_segments.back();
tracer_.Bytes(pc_, source_length);
tracer_.Description("segment data");
tracer_.NextLine();
consume_bytes(source_length, "segment data");
if (failed()) break;
@ -1249,6 +1352,8 @@ class ModuleDecoderTemplate : public Decoder {
}
void DecodeNameSection() {
tracer_.NameSection(pc_, end_,
buffer_offset_ + static_cast<uint32_t>(pc_ - start_));
// TODO(titzer): find a way to report name errors as warnings.
// Ignore all but the first occurrence of name section.
if (!has_seen_unordered_section(kNameSectionCode)) {
@ -1268,9 +1373,11 @@ class ModuleDecoderTemplate : public Decoder {
// Decode module name, ignore the rest.
// Function and local names will be decoded when needed.
NoTracer tracing_already_done;
if (name_type == NameSectionKindCode::kModuleCode) {
WireBytesRef name = consume_string(
&inner, unibrow::Utf8Variant::kLossyUtf8, "module name");
WireBytesRef name =
consume_string(&inner, unibrow::Utf8Variant::kLossyUtf8,
"module name", tracing_already_done);
if (inner.ok() && validate_utf8(&inner, name)) {
module_->name = name;
}
@ -1285,7 +1392,8 @@ class ModuleDecoderTemplate : public Decoder {
void DecodeSourceMappingURLSection() {
Decoder inner(start_, pc_, end_, buffer_offset_);
WireBytesRef url = wasm::consume_utf8_string(&inner, "module name");
WireBytesRef url =
wasm::consume_utf8_string(&inner, "module name", tracer_);
if (inner.ok() &&
module_->debug_symbols.type != WasmDebugSymbols::Type::SourceMap) {
module_->debug_symbols = {WasmDebugSymbols::Type::SourceMap, url};
@ -1297,7 +1405,7 @@ class ModuleDecoderTemplate : public Decoder {
void DecodeExternalDebugInfoSection() {
Decoder inner(start_, pc_, end_, buffer_offset_);
WireBytesRef url =
wasm::consume_utf8_string(&inner, "external symbol file");
wasm::consume_utf8_string(&inner, "external symbol file", tracer_);
// If there is an explicit source map, prefer it over DWARF info.
if (inner.ok() &&
module_->debug_symbols.type != WasmDebugSymbols::Type::SourceMap) {
@ -1547,6 +1655,7 @@ class ModuleDecoderTemplate : public Decoder {
void DecodeDataCountSection() {
module_->num_declared_data_segments =
consume_count("data segments count", kV8MaxWasmDataSegments);
tracer_.NextLineIfNonEmpty();
}
void DecodeTagSection() {
@ -1575,7 +1684,7 @@ class ModuleDecoderTemplate : public Decoder {
static_cast<int>(pc_ - start_));
// TODO(12868): Throw if the string's utf-16 length > String::kMaxLength.
WireBytesRef pos = wasm::consume_string(this, unibrow::Utf8Variant::kWtf8,
"string literal");
"string literal", tracer_);
module_->stringref_literals.emplace_back(pos);
}
}
@ -1634,7 +1743,7 @@ class ModuleDecoderTemplate : public Decoder {
offset += 8;
Decoder decoder(start_ + offset, end_, offset);
WasmSectionIterator section_iter(&decoder);
WasmSectionIterator section_iter(&decoder, tracer_);
while (ok()) {
// Shift the offset by the section header length
@ -1824,6 +1933,7 @@ class ModuleDecoderTemplate : public Decoder {
uint32_t consume_sig_index(WasmModule* module, const FunctionSig** sig) {
const byte* pos = pc_;
uint32_t sig_index = consume_u32v("signature index");
tracer_.Bytes(pos, static_cast<uint32_t>(pc_ - pos));
if (!module->has_signature(sig_index)) {
errorf(pos, "signature index %u out of bounds (%d signatures)", sig_index,
static_cast<int>(module->types.size()));
@ -1831,6 +1941,8 @@ class ModuleDecoderTemplate : public Decoder {
return 0;
}
*sig = module->signature(sig_index);
tracer_.Description(*sig);
tracer_.NextLine();
return sig_index;
}
@ -1847,7 +1959,13 @@ class ModuleDecoderTemplate : public Decoder {
uint32_t consume_count(const char* name, size_t maximum) {
const byte* p = pc_;
uint32_t count = consume_u32v(name);
uint32_t count = consume_u32v(name, tracer_);
tracer_.Description(count);
if (count == 1) {
tracer_.Description(": ");
} else {
tracer_.NextLine();
}
if (count > maximum) {
errorf(p, "%s of %u exceeds internal limit of %zu", name, count, maximum);
return static_cast<uint32_t>(maximum);
@ -1855,29 +1973,29 @@ class ModuleDecoderTemplate : public Decoder {
return count;
}
uint32_t consume_func_index(WasmModule* module, WasmFunction** func,
const char* name) {
return consume_index(name, &module->functions, func);
uint32_t consume_func_index(WasmModule* module, WasmFunction** func) {
return consume_index("function", &module->functions, func);
}
uint32_t consume_global_index(WasmModule* module, WasmGlobal** global) {
return consume_index("global index", &module->globals, global);
return consume_index("global", &module->globals, global);
}
uint32_t consume_table_index(WasmModule* module, WasmTable** table) {
return consume_index("table index", &module->tables, table);
return consume_index("table", &module->tables, table);
}
uint32_t consume_tag_index(WasmModule* module, WasmTag** tag) {
return consume_index("tag index", &module->tags, tag);
return consume_index("tag", &module->tags, tag);
}
template <typename T>
uint32_t consume_index(const char* name, std::vector<T>* vector, T** ptr) {
const byte* pos = pc_;
uint32_t index = consume_u32v(name);
uint32_t index = consume_u32v("index:", tracer_);
tracer_.Description(index);
if (index >= vector->size()) {
errorf(pos, "%s %u out of bounds (%d entr%s)", name, index,
errorf(pos, "%s index %u out of bounds (%d entr%s)", name, index,
static_cast<int>(vector->size()),
vector->size() == 1 ? "y" : "ies");
*ptr = nullptr;
@ -1888,7 +2006,10 @@ class ModuleDecoderTemplate : public Decoder {
}
uint8_t validate_table_flags(const char* name) {
tracer_.Bytes(pc_, 1);
uint8_t flags = consume_u8("table limits flags");
tracer_.Description(flags == kNoMaximum ? " no maximum" : " with maximum");
tracer_.NextLine();
static_assert(kNoMaximum < kWithMaximum);
if (V8_UNLIKELY(flags > kWithMaximum)) {
errorf(pc() - 1, "invalid %s limits flags", name);
@ -1897,6 +2018,7 @@ class ModuleDecoderTemplate : public Decoder {
}
uint8_t validate_memory_flags(bool* has_shared_memory, bool* is_memory64) {
tracer_.Bytes(pc_, 1);
uint8_t flags = consume_u8("memory limits flags");
*has_shared_memory = false;
switch (flags) {
@ -1933,6 +2055,10 @@ class ModuleDecoderTemplate : public Decoder {
errorf(pc() - 1, "invalid memory limits flags 0x%x", flags);
break;
}
if (*has_shared_memory) tracer_.Description(" shared");
if (*is_memory64) tracer_.Description(" mem64");
tracer_.Description((flags & 1) ? " with maximum" : " no maximum");
tracer_.NextLine();
return flags;
}
@ -1945,8 +2071,8 @@ class ModuleDecoderTemplate : public Decoder {
// integer. All V8 limits are still within uint32_t range though.
const bool is_memory64 =
flags == kMemory64NoMaximum || flags == kMemory64WithMaximum;
uint64_t initial_64 = is_memory64 ? consume_u64v("initial size")
: consume_u32v("initial size");
uint64_t initial_64 = is_memory64 ? consume_u64v("initial size", tracer_)
: consume_u32v("initial size", tracer_);
if (initial_64 > max_initial) {
errorf(pos,
"initial %s size (%" PRIu64
@ -1954,11 +2080,13 @@ class ModuleDecoderTemplate : public Decoder {
name, initial_64, units, max_initial);
}
*initial = static_cast<uint32_t>(initial_64);
tracer_.Description(*initial);
tracer_.NextLine();
if (flags & 1) {
*has_max = true;
pos = pc();
uint64_t maximum_64 = is_memory64 ? consume_u64v("maximum size")
: consume_u32v("maximum size");
uint64_t maximum_64 = is_memory64 ? consume_u64v("maximum size", tracer_)
: consume_u32v("maximum size", tracer_);
if (maximum_64 > max_maximum) {
errorf(pos,
"maximum %s size (%" PRIu64
@ -1971,6 +2099,8 @@ class ModuleDecoderTemplate : public Decoder {
name, maximum_64, units, *initial, units);
}
*maximum = static_cast<uint32_t>(maximum_64);
tracer_.Description(*maximum);
tracer_.NextLine();
} else {
*has_max = false;
*maximum = max_initial;
@ -2000,6 +2130,7 @@ class ModuleDecoderTemplate : public Decoder {
return {}; \
}
tracer_.NextLineIfNonEmpty();
// To avoid initializing a {WasmFullDecoder} for the most common
// expressions, we replicate their decoding and validation here. The
// manually handled cases correspond to {ConstantExpression}'s kinds.
@ -2017,6 +2148,7 @@ class ModuleDecoderTemplate : public Decoder {
if (V8_UNLIKELY(failed())) return {};
if (V8_LIKELY(lookahead(1 + length, kExprEnd))) {
TYPE_CHECK(kWasmI32)
tracer_.InitializerExpression(pc_, pc_ + length + 2, kWasmI32);
consume_bytes(length + 2);
return ConstantExpression::I32Const(value);
}
@ -2037,6 +2169,7 @@ class ModuleDecoderTemplate : public Decoder {
: kWasmFuncRef;
TYPE_CHECK(type)
module_->functions[index].declared = true;
tracer_.InitializerExpression(pc_, pc_ + length + 2, type);
consume_bytes(length + 2);
return ConstantExpression::RefFunc(index);
}
@ -2048,6 +2181,8 @@ class ModuleDecoderTemplate : public Decoder {
if (V8_UNLIKELY(failed())) return {};
if (V8_LIKELY(lookahead(1 + length, kExprEnd))) {
TYPE_CHECK(ValueType::RefNull(type))
tracer_.InitializerExpression(pc_, pc_ + length + 2,
ValueType::RefNull(type));
consume_bytes(length + 2);
return ConstantExpression::RefNull(type.representation());
}
@ -2070,6 +2205,7 @@ class ModuleDecoderTemplate : public Decoder {
decoder.DecodeFunctionBody();
tracer_.InitializerExpression(pc_, decoder.end(), expected);
this->pc_ = decoder.end();
if (decoder.failed()) {
@ -2088,7 +2224,11 @@ class ModuleDecoderTemplate : public Decoder {
// Read a mutability flag
bool consume_mutability() {
tracer_.Bytes(pc_, 1);
byte val = consume_u8("mutability");
tracer_.Description(val == 0 ? " immutable"
: val == 1 ? " mutable"
: " invalid");
if (val > 1) error(pc_ - 1, "invalid mutability");
return val != 0;
}
@ -2096,25 +2236,32 @@ class ModuleDecoderTemplate : public Decoder {
ValueType consume_value_type() {
uint32_t type_length;
ValueType result = value_type_reader::read_value_type<kFullValidation>(
this, this->pc(), &type_length, module_.get(),
this, pc_, &type_length, module_.get(),
origin_ == kWasmOrigin ? enabled_features_ : WasmFeatures::None());
tracer_.Bytes(pc_, type_length);
tracer_.Description(result);
consume_bytes(type_length, "value type");
return result;
}
HeapType consume_super_type() {
return value_type_reader::consume_heap_type(this, module_.get(),
enabled_features_);
uint32_t type_length;
HeapType result = value_type_reader::read_heap_type<kFullValidation>(
this, pc_, &type_length, module_.get(), enabled_features_);
tracer_.Bytes(pc_, type_length);
tracer_.Description(result);
consume_bytes(type_length, "heap type");
return result;
}
ValueType consume_storage_type() {
uint8_t opcode = read_u8<kFullValidation>(this->pc());
switch (opcode) {
case kI8Code:
consume_bytes(1, "i8");
consume_bytes(1, " i8", tracer_);
return kWasmI8;
case kI16Code:
consume_bytes(1, "i16");
consume_bytes(1, " i16", tracer_);
return kWasmI16;
default:
// It is not a packed type, so it has to be a value type.
@ -2133,6 +2280,7 @@ class ModuleDecoderTemplate : public Decoder {
}
const FunctionSig* consume_sig(Zone* zone) {
tracer_.NextLine();
// Parse parameter types.
uint32_t param_count =
consume_count("param count", kV8MaxWasmFunctionParams);
@ -2140,16 +2288,20 @@ class ModuleDecoderTemplate : public Decoder {
std::vector<ValueType> params;
for (uint32_t i = 0; ok() && i < param_count; ++i) {
params.push_back(consume_value_type());
tracer_.NextLineIfFull();
}
std::vector<ValueType> returns;
tracer_.NextLineIfNonEmpty();
// Parse return types.
std::vector<ValueType> returns;
uint32_t return_count =
consume_count("return count", kV8MaxWasmFunctionReturns);
if (failed()) return nullptr;
for (uint32_t i = 0; ok() && i < return_count; ++i) {
returns.push_back(consume_value_type());
tracer_.NextLineIfFull();
}
tracer_.NextLineIfNonEmpty();
if (failed()) return nullptr;
// FunctionSig stores the return types first.
@ -2162,13 +2314,15 @@ class ModuleDecoderTemplate : public Decoder {
}
const StructType* consume_struct(Zone* zone) {
uint32_t field_count = consume_count("field count", kV8MaxWasmStructFields);
uint32_t field_count =
consume_count(", field count", kV8MaxWasmStructFields);
if (failed()) return nullptr;
ValueType* fields = zone->NewArray<ValueType>(field_count);
bool* mutabilities = zone->NewArray<bool>(field_count);
for (uint32_t i = 0; ok() && i < field_count; ++i) {
fields[i] = consume_storage_type();
mutabilities[i] = consume_mutability();
tracer_.NextLine();
}
if (failed()) return nullptr;
uint32_t* offsets = zone->NewArray<uint32_t>(field_count);
@ -2178,6 +2332,7 @@ class ModuleDecoderTemplate : public Decoder {
const ArrayType* consume_array(Zone* zone) {
ValueType element_type = consume_storage_type();
bool mutability = consume_mutability();
tracer_.NextLine();
if (failed()) return nullptr;
return zone->New<ArrayType>(element_type, mutability);
}
@ -2186,6 +2341,7 @@ class ModuleDecoderTemplate : public Decoder {
uint32_t consume_exception_attribute() {
const byte* pos = pc_;
uint32_t attribute = consume_u32v("exception attribute");
tracer_.Bytes(pos, static_cast<uint32_t>(pc_ - pos));
if (attribute != kExceptionAttribute) {
errorf(pos, "exception attribute %u not supported", attribute);
return 0;
@ -2210,7 +2366,7 @@ class ModuleDecoderTemplate : public Decoder {
kHasTableIndexOrIsDeclarativeMask |
kExpressionsAsElementsMask;
uint32_t flag = consume_u32v("flag");
uint32_t flag = consume_u32v("flag: ", tracer_);
if ((flag & kFullMask) != flag) {
errorf(pos, "illegal flag value %u. Must be between 0 and 7", flag);
return {};
@ -2222,6 +2378,11 @@ class ModuleDecoderTemplate : public Decoder {
: WasmElemSegment::kStatusPassive
: WasmElemSegment::kStatusActive;
const bool is_active = status == WasmElemSegment::kStatusActive;
// clang-format off
tracer_.Description(status == WasmElemSegment::kStatusActive ? "active" :
status == WasmElemSegment::kStatusPassive ? "passive," :
"declarative,");
// clang-format on
WasmElemSegment::ElementType element_type =
flag & kExpressionsAsElementsMask
@ -2230,7 +2391,11 @@ class ModuleDecoderTemplate : public Decoder {
const bool has_table_index =
is_active && (flag & kHasTableIndexOrIsDeclarativeMask);
uint32_t table_index = has_table_index ? consume_u32v("table index") : 0;
uint32_t table_index = 0;
if (has_table_index) {
table_index = consume_u32v(", table index", tracer_);
tracer_.Description(table_index);
}
if (is_active && table_index >= module_->tables.size()) {
errorf(pos, "out of bounds%s table index %u",
has_table_index ? " implicit" : "", table_index);
@ -2241,6 +2406,8 @@ class ModuleDecoderTemplate : public Decoder {
ConstantExpression offset;
if (is_active) {
tracer_.Description(", offset:");
tracer_.NextLine();
offset = consume_init_expr(module_.get(), kWasmI32);
// Failed to parse offset initializer, return early.
if (failed()) return {};
@ -2251,6 +2418,7 @@ class ModuleDecoderTemplate : public Decoder {
is_active && !(flag & kHasTableIndexOrIsDeclarativeMask);
ValueType type;
if (element_type == WasmElemSegment::kExpressionElements) {
if (!backwards_compatible_mode) tracer_.Description(" element type:");
type =
backwards_compatible_mode ? kWasmFuncRef : consume_reference_type();
if (is_active && !IsSubtypeOf(type, table_type, this->module_.get())) {
@ -2264,7 +2432,7 @@ class ModuleDecoderTemplate : public Decoder {
if (!backwards_compatible_mode) {
// We have to check that there is an element kind of type Function. All
// other element kinds are not valid yet.
uint8_t val = consume_u8("element kind");
uint8_t val = consume_u8(" element type: function", tracer_);
if (static_cast<ImportExportKindCode>(val) != kExternalFunction) {
errorf(pos, "illegal element kind 0x%x. Must be 0x%x", val,
kExternalFunction);
@ -2302,7 +2470,13 @@ class ModuleDecoderTemplate : public Decoder {
void consume_data_segment_header(bool* is_active, uint32_t* index,
ConstantExpression* offset) {
const byte* pos = pc();
uint32_t flag = consume_u32v("flag");
uint32_t flag = consume_u32v("flag: ", tracer_);
tracer_.Description(flag == SegmentFlags::kActiveNoIndex ? "active no index"
: flag == SegmentFlags::kPassive ? "passive"
: flag == SegmentFlags::kActiveWithIndex
? "active with index"
: "unknown");
tracer_.NextLine();
// Some flag values are only valid for specific proposals.
if (flag != SegmentFlags::kActiveNoIndex &&
@ -2326,7 +2500,8 @@ class ModuleDecoderTemplate : public Decoder {
}
if (flag == SegmentFlags::kActiveWithIndex) {
*is_active = true;
*index = consume_u32v("memory index");
*index = consume_u32v("memory index", tracer_);
tracer_.Description(*index);
*offset = consume_init_expr(module_.get(), expected_type);
}
}
@ -2334,8 +2509,8 @@ class ModuleDecoderTemplate : public Decoder {
uint32_t consume_element_func_index(ValueType expected) {
WasmFunction* func = nullptr;
const byte* initial_pc = pc();
uint32_t index =
consume_func_index(module_.get(), &func, "element function index");
uint32_t index = consume_func_index(module_.get(), &func);
tracer_.NextLine();
if (failed()) return index;
DCHECK_NOT_NULL(func);
DCHECK_EQ(index, func->func_index);

View File

@ -200,7 +200,8 @@ size_t ModuleDecoder::IdentifyUnknownSection(ModuleDecoder* decoder,
SectionCode* result) {
if (!decoder->ok()) return 0;
decoder->impl_->Reset(bytes, offset);
*result = IdentifyUnknownSectionInternal(decoder->impl_.get());
NoTracer no_tracer;
*result = IdentifyUnknownSectionInternal(decoder->impl_.get(), no_tracer);
return decoder->impl_->pc() - bytes.begin();
}
@ -337,7 +338,8 @@ bool FindNameSection(Decoder* decoder) {
static constexpr int kModuleHeaderSize = 8;
decoder->consume_bytes(kModuleHeaderSize, "module header");
WasmSectionIterator section_iter(decoder);
NoTracer no_tracer;
WasmSectionIterator section_iter(decoder, no_tracer);
while (decoder->ok() && section_iter.more() &&
section_iter.section_code() != kNameSectionCode) {

View File

@ -55,6 +55,10 @@ class StringBuilder {
const char* start() const { return start_; }
const char* cursor() const { return cursor_; }
size_t length() const { return static_cast<size_t>(cursor_ - start_); }
void rewind_to_start() {
remaining_bytes_ += length();
cursor_ = start_;
}
protected:
enum OnGrowth : bool { kKeepOldChunks, kReplacePreviousChunk };
@ -62,10 +66,6 @@ class StringBuilder {
// Useful for subclasses that divide the text into ranges, e.g. lines.
explicit StringBuilder(OnGrowth on_growth) : on_growth_(on_growth) {}
void start_here() { start_ = cursor_; }
void rewind_to_start() {
remaining_bytes_ += length();
cursor_ = start_;
}
private:
void Grow() {

View File

@ -580,6 +580,23 @@ class OffsetsProvider {
void DataOffset(uint32_t offset) { data_offsets_.push_back(offset); }
// Unused by this tracer:
void Bytes(const byte* start, uint32_t count) {}
void Description(const char* desc) {}
void Description(const char* desc, size_t length) {}
void Description(uint32_t number) {}
void Description(ValueType type) {}
void Description(HeapType type) {}
void Description(const FunctionSig* sig) {}
void NextLine() {}
void NextLineIfFull() {}
void NextLineIfNonEmpty() {}
void InitializerExpression(const byte* start, const byte* end,
ValueType expected_type) {}
void FunctionBody(const WasmFunction* func, const byte* start) {}
void FunctionName(uint32_t func_index) {}
void NameSection(const byte* start, const byte* end, uint32_t offset) {}
#define GETTER(name) \
uint32_t name##_offset(uint32_t index) { \
if (!enabled_) return 0; \

View File

@ -85,7 +85,7 @@ inline uint64_t max_mem_bytes() {
return uint64_t{max_mem_pages()} * kWasmPageSize;
}
uint32_t max_table_init_entries();
V8_EXPORT_PRIVATE uint32_t max_table_init_entries();
size_t max_module_size();
} // namespace wasm

View File

@ -713,7 +713,8 @@ struct WasmFunctionName {
const WasmName name_;
};
std::ostream& operator<<(std::ostream& os, const WasmFunctionName& name);
V8_EXPORT_PRIVATE std::ostream& operator<<(std::ostream& os,
const WasmFunctionName& name);
V8_EXPORT_PRIVATE bool IsWasmCodegenAllowed(Isolate* isolate,
Handle<Context> context);

View File

@ -82,7 +82,7 @@ function assertConversionError(bytes, imports, msg) {
]).end().toBuffer(), f_error('invalid local index: 0 @+24'));
assertCompileError(
builder().addStart(0).toBuffer(),
'start function index 0 out of bounds (0 entries) @+10');
'function index 0 out of bounds (0 entries) @+10');
})();
function import_error(index, module, func, msg) {

View File

@ -46,7 +46,7 @@ assertThrows(() => {instantiate(kSig_i_v, [kExprI32Const, 0]);});
assertThrows(
() => builder.instantiate(), WebAssembly.CompileError,
'WebAssembly.Module(): ' +
'start function index 1 out of bounds (1 entry) @+20');
'function index 1 out of bounds (1 entry) @+20');
})();

View File

@ -3344,7 +3344,7 @@ TEST_F(WasmModuleVerifyTest, DeclarativeElementSegmentWithInvalidIndex) {
U32V_1(1)), // func index
// code ------------------------------------------------------------------
ONE_EMPTY_BODY};
EXPECT_FAILURE_WITH_MSG(data, "element function index 1 out of bounds");
EXPECT_FAILURE_WITH_MSG(data, "function index 1 out of bounds");
}
TEST_F(WasmModuleVerifyTest, DataCountSectionCorrectPlacement) {

View File

@ -28,6 +28,12 @@ struct MockStreamingResult {
MockStreamingResult() = default;
};
class NoTracer {
public:
void Bytes(const byte* start, uint32_t count) {}
void Description(const char* desc) {}
};
class MockStreamingProcessor : public StreamingProcessor {
public:
explicit MockStreamingProcessor(MockStreamingResult* result)
@ -36,12 +42,13 @@ class MockStreamingProcessor : public StreamingProcessor {
bool ProcessModuleHeader(base::Vector<const uint8_t> bytes,
uint32_t offset) override {
Decoder decoder(bytes.begin(), bytes.end());
uint32_t magic_word = decoder.consume_u32("wasm magic");
NoTracer no_tracer;
uint32_t magic_word = decoder.consume_u32("wasm magic", no_tracer);
if (decoder.failed() || magic_word != kWasmMagic) {
result_->error = WasmError(0, "expected wasm magic");
return false;
}
uint32_t magic_version = decoder.consume_u32("wasm version");
uint32_t magic_version = decoder.consume_u32("wasm version", no_tracer);
if (decoder.failed() || magic_version != kWasmVersion) {
result_->error = WasmError(4, "expected wasm version");
return false;

View File

@ -38,6 +38,9 @@ int PrintHelp(char** argv) {
<< " --single-hexdump FUNC_INDEX\n"
<< " Dump function FUNC_INDEX in annotated hex format\n"
<< " --full-hexdump\n"
<< " Dump full module in annotated hex format\n"
<< "The module name must be a file name.\n";
return 1;
}
@ -189,6 +192,345 @@ class ExtendedFunctionDis : public FunctionBodyDisassembler {
}
};
// A variant of ModuleDisassembler that produces "annotated hex dump" format,
// e.g.:
// 0x01, 0x70, 0x00, // table count 1: funcref no maximum
class HexDumpModuleDis {
public:
using DumpingModuleDecoder = ModuleDecoderTemplate<HexDumpModuleDis>;
HexDumpModuleDis(MultiLineStringBuilder& out, const WasmModule* module,
NamesProvider* names, const ModuleWireBytes wire_bytes,
AccountingAllocator* allocator)
: out_(out),
module_(module),
names_(names),
wire_bytes_(wire_bytes),
allocator_(allocator),
zone_(allocator, "disassembler") {
for (const WasmImport& import : module->import_table) {
switch (import.kind) {
// clang-format off
case kExternalFunction: break;
case kExternalTable: next_table_index_++; break;
case kExternalMemory: break;
case kExternalGlobal: next_global_index_++; break;
case kExternalTag: next_tag_index_++; break;
// clang-format on
}
}
}
// Public entrypoint.
void PrintModule() {
constexpr bool verify_functions = false;
DumpingModuleDecoder decoder(WasmFeatures::All(), wire_bytes_.start(),
wire_bytes_.end(), kWasmOrigin, *this);
decoder_ = &decoder;
decoder.DecodeModule(nullptr, allocator_, verify_functions);
if (total_bytes_ != wire_bytes_.length()) {
std::cerr << "WARNING: OUTPUT INCOMPLETE. Disassembled " << total_bytes_
<< " out of " << wire_bytes_.length() << " bytes.\n";
// TODO(jkummerow): Would it be helpful to DCHECK here?
}
}
// Tracer hooks.
void Bytes(const byte* start, uint32_t count) {
if (count > kMaxBytesPerLine) {
DCHECK_EQ(queue_, nullptr);
queue_ = start;
queue_length_ = count;
total_bytes_ += count;
return;
}
if (line_bytes_ == 0) out_ << " ";
PrintHexBytes(out_, count, start);
line_bytes_ += count;
total_bytes_ += count;
}
void Description(const char* desc) { description_ << desc; }
void Description(const char* desc, size_t length) {
description_.write(desc, length);
}
void Description(uint32_t number) {
if (description_.length() != 0) description_ << " ";
description_ << number;
}
void Description(ValueType type) {
if (description_.length() != 0) description_ << " ";
names_->PrintValueType(description_, type);
}
void Description(HeapType type) {
if (description_.length() != 0) description_ << " ";
names_->PrintHeapType(description_, type);
}
void Description(const FunctionSig* sig) {
PrintSignatureOneLine(description_, sig, 0 /* ignored */, names_, false);
}
void FunctionName(uint32_t func_index) {
description_ << func_index << " ";
names_->PrintFunctionName(description_, func_index,
NamesProvider::kDevTools);
}
void NextLineIfFull() {
if (queue_ || line_bytes_ >= kPadBytes) NextLine();
}
void NextLineIfNonEmpty() {
if (queue_ || line_bytes_ > 0) NextLine();
}
void NextLine() {
if (queue_) {
// Print queued hex bytes first, unless there have also been unqueued
// bytes.
if (line_bytes_ > 0) {
// Keep the queued bytes together on the next line.
for (; line_bytes_ < kPadBytes; line_bytes_++) {
out_ << " ";
}
out_ << " // ";
out_.write(description_.start(), description_.length());
out_.NextLine(kDontCareAboutOffsets);
}
while (queue_length_ > kMaxBytesPerLine) {
out_ << " ";
PrintHexBytes(out_, kMaxBytesPerLine, queue_);
out_.NextLine(kDontCareAboutOffsets);
queue_length_ -= kMaxBytesPerLine;
queue_ += kMaxBytesPerLine;
}
if (queue_length_ > 0) {
out_ << " ";
PrintHexBytes(out_, queue_length_, queue_);
}
if (line_bytes_ == 0) {
if (queue_length_ > kPadBytes) {
out_.NextLine(kDontCareAboutOffsets);
out_ << " // ";
} else {
for (uint32_t i = queue_length_; i < kPadBytes; i++) {
out_ << " ";
}
out_ << " // ";
}
out_.write(description_.start(), description_.length());
}
queue_ = nullptr;
} else {
// No queued bytes; just write the accumulated description.
if (description_.length() != 0) {
if (line_bytes_ == 0) out_ << " ";
for (; line_bytes_ < kPadBytes; line_bytes_++) {
out_ << " ";
}
out_ << " // ";
out_.write(description_.start(), description_.length());
}
}
out_.NextLine(kDontCareAboutOffsets);
line_bytes_ = 0;
description_.rewind_to_start();
}
// We don't care about offsets, but we can use these hooks to provide
// helpful indexing comments in long lists.
void TypeOffset(uint32_t offset) {
if (module_->types.size() > 3) {
description_ << "type #" << next_type_index_ << " ";
names_->PrintTypeName(description_, next_type_index_);
next_type_index_++;
}
}
void ImportOffset(uint32_t offset) {
description_ << "import #" << next_import_index_++;
NextLine();
}
void TableOffset(uint32_t offset) {
if (module_->tables.size() > 3) {
description_ << "table #" << next_table_index_++;
}
}
void MemoryOffset(uint32_t offset) {}
void TagOffset(uint32_t offset) {
if (module_->tags.size() > 3) {
description_ << "tag #" << next_tag_index_++ << ":";
}
}
void GlobalOffset(uint32_t offset) {
description_ << "global #" << next_global_index_++ << ":";
}
void StartOffset(uint32_t offset) {}
void ElementOffset(uint32_t offset) {
if (module_->elem_segments.size() > 3) {
description_ << "segment #" << next_segment_index_++;
NextLine();
}
}
void DataOffset(uint32_t offset) {
if (module_->data_segments.size() > 3) {
description_ << "data segment #" << next_data_segment_index_++;
NextLine();
}
}
// The following two hooks give us an opportunity to call the hex-dumping
// function body disassembler for initializers and functions.
void InitializerExpression(const byte* start, const byte* end,
ValueType expected_type) {
WasmFeatures detected;
auto sig = FixedSizeSignature<ValueType>::Returns(expected_type);
uint32_t offset = decoder_->pc_offset();
ExtendedFunctionDis d(&zone_, module_, 0, &detected, &sig, start, end,
offset, names_);
d.HexdumpConstantExpression(out_);
total_bytes_ += static_cast<size_t>(end - start);
}
void FunctionBody(const WasmFunction* func, const byte* start) {
const byte* end = start + func->code.length();
WasmFeatures detected;
uint32_t offset = static_cast<uint32_t>(start - decoder_->start());
ExtendedFunctionDis d(&zone_, module_, func->func_index, &detected,
func->sig, start, end, offset, names_);
d.HexDump(out_, FunctionBodyDisassembler::kSkipHeader);
total_bytes_ += func->code.length();
}
// We have to do extra work for the name section here, because the regular
// decoder mostly just skips over it.
void NameSection(const byte* start, const byte* end, uint32_t offset) {
Decoder decoder(start, end, offset);
while (decoder.ok() && decoder.more()) {
uint8_t name_type = decoder.consume_u8("name type: ", *this);
Description(NameTypeName(name_type));
NextLine();
uint32_t payload_length = decoder.consume_u32v("payload length:", *this);
Description(payload_length);
NextLine();
if (!decoder.checkAvailable(payload_length)) break;
switch (name_type) {
case kModuleCode:
consume_string(&decoder, unibrow::Utf8Variant::kLossyUtf8,
"module name", *this);
break;
case kFunctionCode:
case kTypeCode:
case kTableCode:
case kMemoryCode:
case kGlobalCode:
case kElementSegmentCode:
case kDataSegmentCode:
case kTagCode:
DumpNameMap(decoder);
break;
case kLocalCode:
case kLabelCode:
case kFieldCode:
DumpIndirectNameMap(decoder);
break;
default:
Bytes(decoder.pc(), payload_length);
NextLine();
decoder.consume_bytes(payload_length);
break;
}
}
}
// TODO(jkummerow): Consider using an OnFirstError() override to offer
// help when decoding fails.
private:
static constexpr uint32_t kDontCareAboutOffsets = 0;
static constexpr uint32_t kMaxBytesPerLine = 8;
static constexpr uint32_t kPadBytes = 4;
void PrintHexBytes(StringBuilder& out, uint32_t num_bytes,
const byte* start) {
char* ptr = out.allocate(num_bytes * 6);
PrintHexBytesCore(ptr, num_bytes, start);
}
void DumpNameMap(Decoder& decoder) {
uint32_t count = decoder.consume_u32v("names count", *this);
Description(count);
NextLine();
for (uint32_t i = 0; i < count; i++) {
uint32_t index = decoder.consume_u32v("index", *this);
Description(index);
Description(" ");
consume_string(&decoder, unibrow::Utf8Variant::kLossyUtf8, "name", *this);
if (!decoder.ok()) break;
}
}
void DumpIndirectNameMap(Decoder& decoder) {
uint32_t outer_count = decoder.consume_u32v("outer count", *this);
Description(outer_count);
NextLine();
for (uint32_t i = 0; i < outer_count; i++) {
uint32_t outer_index = decoder.consume_u32v("outer index", *this);
Description(outer_index);
uint32_t inner_count = decoder.consume_u32v(" inner count", *this);
Description(inner_count);
NextLine();
for (uint32_t j = 0; j < inner_count; j++) {
uint32_t inner_index = decoder.consume_u32v("inner index", *this);
Description(inner_index);
Description(" ");
consume_string(&decoder, unibrow::Utf8Variant::kLossyUtf8, "name",
*this);
if (!decoder.ok()) break;
}
if (!decoder.ok()) break;
}
}
static constexpr const char* NameTypeName(uint8_t name_type) {
switch (name_type) {
// clang-format off
case kModuleCode: return "module";
case kFunctionCode: return "function";
case kTypeCode: return "type";
case kTableCode: return "table";
case kMemoryCode: return "memory";
case kGlobalCode: return "global";
case kElementSegmentCode: return "element segment";
case kDataSegmentCode: return "data segment";
case kTagCode: return "tag";
case kLocalCode: return "local";
case kLabelCode: return "label";
case kFieldCode: return "field";
default: return "unknown";
// clang-format on
}
}
MultiLineStringBuilder& out_;
const WasmModule* module_;
NamesProvider* names_;
const ModuleWireBytes wire_bytes_;
AccountingAllocator* allocator_;
Zone zone_;
StringBuilder description_;
const byte* queue_{nullptr};
uint32_t queue_length_{0};
uint32_t line_bytes_{0};
size_t total_bytes_{0};
DumpingModuleDecoder* decoder_{nullptr};
uint32_t next_type_index_{0};
uint32_t next_import_index_{0};
uint32_t next_table_index_{0};
uint32_t next_global_index_{0};
uint32_t next_tag_index_{0};
uint32_t next_segment_index_{0};
uint32_t next_data_segment_index_{0};
};
////////////////////////////////////////////////////////////////////////////////
class FormatConverter {
@ -257,7 +599,9 @@ class FormatConverter {
// 18 = kMinNameLength + strlen(" section: ").
std::cout << std::setw(18) << std::left << "Module size: ";
std::cout << std::setw(digits) << std::right << module_size << " bytes\n";
for (WasmSectionIterator it(&decoder); it.more(); it.advance(true)) {
NoTracer no_tracer;
for (WasmSectionIterator it(&decoder, no_tracer); it.more();
it.advance(true)) {
const char* name = SectionName(it.section_code());
size_t name_len = strlen(name);
std::cout << SectionName(it.section_code()) << " section: ";
@ -315,6 +659,12 @@ class FormatConverter {
md.PrintModule({0, 2});
}
void HexdumpForModule(MultiLineStringBuilder& out) {
DCHECK(ok_);
HexDumpModuleDis md(out, module(), names(), wire_bytes_, &allocator_);
md.PrintModule();
}
private:
byte* start() { return raw_bytes_.data(); }
byte* end() { return start() + raw_bytes_.size(); }
@ -343,6 +693,7 @@ enum class Action {
kListFunctions,
kSectionStats,
kFullWat,
kFullHexdump,
kSingleWat,
kSingleHexdump,
};
@ -387,6 +738,14 @@ void WatForModule(const Options& options) {
sb.DumpToStdout();
}
void HexdumpForModule(const Options& options) {
FormatConverter fc(options.filename);
if (!fc.ok()) return;
MultiLineStringBuilder sb;
fc.HexdumpForModule(sb);
sb.DumpToStdout();
}
bool ParseInt(char* s, int* out) {
char* end;
if (s[0] == '\0') return false;
@ -411,6 +770,8 @@ int ParseOptions(int argc, char** argv, Options* options) {
options->action = Action::kSectionStats;
} else if (strcmp(argv[i], "--full-wat") == 0) {
options->action = Action::kFullWat;
} else if (strcmp(argv[i], "--full-hexdump") == 0) {
options->action = Action::kFullHexdump;
} else if (strcmp(argv[i], "--single-wat") == 0) {
options->action = Action::kSingleWat;
if (i == argc - 1 || !ParseInt(argv[++i], &options->func_index)) {
@ -456,6 +817,7 @@ int main(int argc, char** argv) {
case Action::kSingleWat: WatForFunction(options); break;
case Action::kSingleHexdump: HexdumpForFunction(options); break;
case Action::kFullWat: WatForModule(options); break;
case Action::kFullHexdump: HexdumpForModule(options); break;
case Action::kUnset: UNREACHABLE();
// clang-format on
}