[asm.js] Refactor parser identifier memory management.

This unifies the memory management of identifier strings passed between
the scanner, parser and module builder. The following scheme is used:
 - The scanner does not create copies of identifier strings itself, it
   exposes a reference to the current identifier. This reference becomes
   invalid as soon as the scanner advanced.
 - The parser preserves a single copy of each identifier that is stored
   in any data structure. That copy is allocated in the zone, lifetime
   is coupled to that of the zone.
 - The module builder can use all such identifiers by reference, as long
   as its lifetime is also coupled to the same zone.

Note that the module builder still creates redundant copies for some
identifiers (in order to maintain backwards compatibility with the old
AST-based parser). This can be fixed once the "old validator" has been
removed.

R=clemensh@chromium.org
BUG=v8:6127

Change-Id: I8611d162e87730045a6061d08c3fe841daae8a7d
Reviewed-on: https://chromium-review.googlesource.com/484439
Commit-Queue: Michael Starzinger <mstarzinger@chromium.org>
Reviewed-by: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#44836}
This commit is contained in:
Michael Starzinger 2017-04-25 11:59:09 +02:00 committed by Commit Bot
parent 0b804e385e
commit c2abd807e4
4 changed files with 37 additions and 49 deletions

View File

@ -235,21 +235,16 @@ uint32_t AsmJsParser::VarIndex(VarInfo* info) {
return info->index + static_cast<uint32_t>(global_imports_.size()); return info->index + static_cast<uint32_t>(global_imports_.size());
} }
void AsmJsParser::AddGlobalImport(std::string name, AsmType* type, void AsmJsParser::AddGlobalImport(Vector<const char> name, AsmType* type,
ValueType vtype, bool mutable_variable, ValueType vtype, bool mutable_variable,
VarInfo* info) { VarInfo* info) {
// TODO(bradnelson): Refactor memory management here.
// AsmModuleBuilder should really own import names.
char* name_data = zone()->NewArray<char>(name.size());
memcpy(name_data, name.data(), name.size());
// Allocate a separate variable for the import. // Allocate a separate variable for the import.
// TODO(mstarzinger): Consider using the imported global directly instead of // TODO(mstarzinger): Consider using the imported global directly instead of
// allocating a separate global variable for immutable (i.e. const) imports. // allocating a separate global variable for immutable (i.e. const) imports.
DeclareGlobal(info, mutable_variable, type, vtype); DeclareGlobal(info, mutable_variable, type, vtype);
// Record the need to initialize the global from the import. // Record the need to initialize the global from the import.
global_imports_.push_back({name_data, name.size(), vtype, info}); global_imports_.push_back({name, vtype, info});
} }
void AsmJsParser::DeclareGlobal(VarInfo* info, bool mutable_variable, void AsmJsParser::DeclareGlobal(VarInfo* info, bool mutable_variable,
@ -276,6 +271,13 @@ uint32_t AsmJsParser::TempVariable(int index) {
return function_temp_locals_offset_ + index; return function_temp_locals_offset_ + index;
} }
Vector<const char> AsmJsParser::CopyCurrentIdentifierString() {
const std::string& str = scanner_.GetIdentifierString();
char* buffer = zone()->NewArray<char>(str.size());
str.copy(buffer, str.size());
return Vector<const char>(buffer, static_cast<int>(str.size()));
}
void AsmJsParser::SkipSemicolon() { void AsmJsParser::SkipSemicolon() {
if (Check(';')) { if (Check(';')) {
// Had a semicolon. // Had a semicolon.
@ -363,13 +365,9 @@ void AsmJsParser::ValidateModule() {
// Add start function to init things. // Add start function to init things.
WasmFunctionBuilder* start = module_builder_->AddFunction(); WasmFunctionBuilder* start = module_builder_->AddFunction();
module_builder_->MarkStartFunction(start); module_builder_->MarkStartFunction(start);
for (auto global_import : global_imports_) { for (auto& global_import : global_imports_) {
// TODO(bradnelson): Reuse parse buffer memory / make wasm-module-builder
// managed the memory for the import name (currently have to keep our
// own memory for it).
uint32_t import_index = module_builder_->AddGlobalImport( uint32_t import_index = module_builder_->AddGlobalImport(
global_import.import_name, global_import.import_name.start(), global_import.import_name.length(),
static_cast<int>(global_import.import_name_size),
global_import.value_type); global_import.value_type);
start->EmitWithVarInt(kExprGetGlobal, import_index); start->EmitWithVarInt(kExprGetGlobal, import_index);
start->EmitWithVarInt(kExprSetGlobal, VarIndex(global_import.var_info)); start->EmitWithVarInt(kExprSetGlobal, VarIndex(global_import.var_info));
@ -533,31 +531,24 @@ bool AsmJsParser::ValidateModuleVarImport(VarInfo* info,
if (Check('+')) { if (Check('+')) {
EXPECT_TOKENf(foreign_name_); EXPECT_TOKENf(foreign_name_);
EXPECT_TOKENf('.'); EXPECT_TOKENf('.');
AddGlobalImport(scanner_.GetIdentifierString(), AsmType::Double(), kWasmF64, Vector<const char> name = CopyCurrentIdentifierString();
mutable_variable, info); AddGlobalImport(name, AsmType::Double(), kWasmF64, mutable_variable, info);
scanner_.Next(); scanner_.Next();
return true; return true;
} else if (Check(foreign_name_)) { } else if (Check(foreign_name_)) {
EXPECT_TOKENf('.'); EXPECT_TOKENf('.');
std::string import_name = scanner_.GetIdentifierString(); Vector<const char> name = CopyCurrentIdentifierString();
scanner_.Next(); scanner_.Next();
if (Check('|')) { if (Check('|')) {
if (!CheckForZero()) { if (!CheckForZero()) {
FAILf("Expected |0 type annotation for foreign integer import"); FAILf("Expected |0 type annotation for foreign integer import");
} }
AddGlobalImport(import_name, AsmType::Int(), kWasmI32, mutable_variable, AddGlobalImport(name, AsmType::Int(), kWasmI32, mutable_variable, info);
info);
return true; return true;
} }
info->kind = VarKind::kImportedFunction; info->kind = VarKind::kImportedFunction;
info->import = info->import = new (zone()->New(sizeof(FunctionImportInfo)))
new (zone()->New(sizeof(FunctionImportInfo))) FunctionImportInfo( FunctionImportInfo({name, WasmModuleBuilder::SignatureMap(zone())});
{nullptr, 0, WasmModuleBuilder::SignatureMap(zone())});
// TODO(bradnelson): Refactor memory management here.
// AsmModuleBuilder should really own import names.
info->import->function_name = zone()->NewArray<char>(import_name.size());
memcpy(info->import->function_name, import_name.data(), import_name.size());
info->import->function_name_size = import_name.size();
return true; return true;
} }
return false; return false;
@ -629,7 +620,7 @@ void AsmJsParser::ValidateExport() {
// clang format on // clang format on
if (Check('{')) { if (Check('{')) {
for (;;) { for (;;) {
std::string name = scanner_.GetIdentifierString(); Vector<const char> name = CopyCurrentIdentifierString();
if (!scanner_.IsGlobal() && !scanner_.IsLocal()) { if (!scanner_.IsGlobal() && !scanner_.IsLocal()) {
FAIL("Illegal export name"); FAIL("Illegal export name");
} }
@ -642,8 +633,7 @@ void AsmJsParser::ValidateExport() {
if (info->kind != VarKind::kFunction) { if (info->kind != VarKind::kFunction) {
FAIL("Expected function"); FAIL("Expected function");
} }
info->function_builder->ExportAs( info->function_builder->ExportAs(name);
{name.c_str(), static_cast<int>(name.size())});
if (Check(',')) { if (Check(',')) {
if (!Peek('}')) { if (!Peek('}')) {
continue; continue;
@ -727,7 +717,7 @@ void AsmJsParser::ValidateFunction() {
FAIL("Expected function name"); FAIL("Expected function name");
} }
std::string function_name_raw = scanner_.GetIdentifierString(); Vector<const char> function_name_str = CopyCurrentIdentifierString();
AsmJsScanner::token_t function_name = Consume(); AsmJsScanner::token_t function_name = Consume();
VarInfo* function_info = GetVarInfo(function_name); VarInfo* function_info = GetVarInfo(function_name);
if (function_info->kind == VarKind::kUnused) { if (function_info->kind == VarKind::kUnused) {
@ -741,12 +731,7 @@ void AsmJsParser::ValidateFunction() {
} }
function_info->function_defined = true; function_info->function_defined = true;
// TODO(bradnelson): Cleanup memory management here. function_info->function_builder->SetName(function_name_str);
// WasmModuleBuilder should own these.
char* function_name_chr = zone()->NewArray<char>(function_name_raw.size());
memcpy(function_name_chr, function_name_raw.data(), function_name_raw.size());
function_info->function_builder->SetName(
{function_name_chr, static_cast<int>(function_name_raw.size())});
current_function_builder_ = function_info->function_builder; current_function_builder_ = function_info->function_builder;
return_type_ = nullptr; return_type_ = nullptr;
@ -2099,7 +2084,6 @@ AsmType* AsmJsParser::ValidateCall() {
} else if (t->IsA(AsmType::Double())) { } else if (t->IsA(AsmType::Double())) {
param_types.push_back(AsmType::Double()); param_types.push_back(AsmType::Double());
} else { } else {
std::string a = t->Name();
FAILn("Bad function argument type"); FAILn("Bad function argument type");
} }
if (!Peek(')')) { if (!Peek(')')) {
@ -2170,9 +2154,8 @@ AsmType* AsmJsParser::ValidateCall() {
index = it->second; index = it->second;
} else { } else {
index = module_builder_->AddImport( index = module_builder_->AddImport(
function_info->import->function_name, function_info->import->function_name.start(),
static_cast<uint32_t>(function_info->import->function_name_size), function_info->import->function_name.length(), sig);
sig);
function_info->import->cache[sig] = index; function_info->import->cache[sig] = index;
} }
current_function_builder_->AddAsmWasmOffset(call_pos, to_number_pos); current_function_builder_->AddAsmWasmOffset(call_pos, to_number_pos);

View File

@ -57,8 +57,7 @@ class AsmJsParser {
// clang-format on // clang-format on
struct FunctionImportInfo { struct FunctionImportInfo {
char* function_name; Vector<const char> function_name;
size_t function_name_size;
WasmModuleBuilder::SignatureMap cache; WasmModuleBuilder::SignatureMap cache;
}; };
@ -76,8 +75,7 @@ class AsmJsParser {
}; };
struct GlobalImport { struct GlobalImport {
char* import_name; Vector<const char> import_name;
size_t import_name_size;
ValueType value_type; ValueType value_type;
VarInfo* var_info; VarInfo* var_info;
}; };
@ -156,8 +154,8 @@ class AsmJsParser {
// statements it's attached to. // statements it's attached to.
AsmJsScanner::token_t pending_label_; AsmJsScanner::token_t pending_label_;
// Global imports. // Global imports. The list of imported variables that are copied during
// NOTE: Holds the strings referenced in wasm-module-builder for imports. // module instantiation into a corresponding global variable.
ZoneLinkedList<GlobalImport> global_imports_; ZoneLinkedList<GlobalImport> global_imports_;
Zone* zone() { return zone_; } Zone* zone() { return zone_; }
@ -228,13 +226,15 @@ class AsmJsParser {
ValueType vtype, ValueType vtype,
const WasmInitExpr& init = WasmInitExpr()); const WasmInitExpr& init = WasmInitExpr());
void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type); void DeclareStdlibFunc(VarInfo* info, VarKind kind, AsmType* type);
void AddGlobalImport(Vector<const char> name, AsmType* type, ValueType vtype,
bool mutable_variable, VarInfo* info);
// Allocates a temporary local variable. The given {index} is absolute within // Allocates a temporary local variable. The given {index} is absolute within
// the function body, consider using {TemporaryVariableScope} when nesting. // the function body, consider using {TemporaryVariableScope} when nesting.
uint32_t TempVariable(int index); uint32_t TempVariable(int index);
void AddGlobalImport(std::string name, AsmType* type, ValueType vtype, // Preserves a copy of the scanner's current identifier string in the zone.
bool mutable_variable, VarInfo* info); Vector<const char> CopyCurrentIdentifierString();
// Use to set up block stack layers (including synthetic ones for if-else). // Use to set up block stack layers (including synthetic ones for if-else).
// Begin/Loop/End below are implemented with these plus code generation. // Begin/Loop/End below are implemented with these plus code generation.

View File

@ -43,12 +43,15 @@ class V8_EXPORT_PRIVATE AsmJsScanner {
void Next(); void Next();
// Back up by one token. // Back up by one token.
void Rewind(); void Rewind();
// Get raw string for current identifier.
// Get raw string for current identifier. Note that the returned string will
// become invalid when the scanner advances, create a copy to preserve it.
const std::string& GetIdentifierString() const { const std::string& GetIdentifierString() const {
// Identifier strings don't work after a rewind. // Identifier strings don't work after a rewind.
DCHECK(!rewind_); DCHECK(!rewind_);
return identifier_string_; return identifier_string_;
} }
// Check if we just passed a newline. // Check if we just passed a newline.
bool IsPrecededByNewline() const { bool IsPrecededByNewline() const {
// Newline tracking doesn't work if you back up. // Newline tracking doesn't work if you back up.

View File

@ -225,6 +225,7 @@ class V8_EXPORT_PRIVATE WasmModuleBuilder : public ZoneObject {
explicit WasmModuleBuilder(Zone* zone); explicit WasmModuleBuilder(Zone* zone);
// Building methods. // Building methods.
// TODO(mstarzinger): Refactor to take Vector<const char> instead.
uint32_t AddImport(const char* name, int name_length, FunctionSig* sig); uint32_t AddImport(const char* name, int name_length, FunctionSig* sig);
void SetImportName(uint32_t index, const char* name, int name_length) { void SetImportName(uint32_t index, const char* name, int name_length) {
function_imports_[index].name = name; function_imports_[index].name = name;
@ -233,6 +234,7 @@ class V8_EXPORT_PRIVATE WasmModuleBuilder : public ZoneObject {
WasmFunctionBuilder* AddFunction(FunctionSig* sig = nullptr); WasmFunctionBuilder* AddFunction(FunctionSig* sig = nullptr);
uint32_t AddGlobal(ValueType type, bool exported, bool mutability = true, uint32_t AddGlobal(ValueType type, bool exported, bool mutability = true,
const WasmInitExpr& init = WasmInitExpr()); const WasmInitExpr& init = WasmInitExpr());
// TODO(mstarzinger): Refactor to take Vector<const char> instead.
uint32_t AddGlobalImport(const char* name, int name_length, ValueType type); uint32_t AddGlobalImport(const char* name, int name_length, ValueType type);
void AddDataSegment(const byte* data, uint32_t size, uint32_t dest); void AddDataSegment(const byte* data, uint32_t size, uint32_t dest);
uint32_t AddSignature(FunctionSig* sig); uint32_t AddSignature(FunctionSig* sig);