[wasm] Use a consistent hash for wire bytes

We use a hash for different things:
- dumping wasm module to file,
- generating the script name, and
- computing prefix hash / wire byte hash for caching.

Two of them were using the StringHasher, one use base::hash_range.
For experimental PGO support, we will also need a hash value, so unify
this to use the same hash everywhere.
Since the result of base::hash_range is platform-dependent, use the
StringHasher everywhere.

R=thibaudm@chromium.org

Bug: v8:13209
Change-Id: Iae8c2385264ecedd4daea16d7f9221bc94650eef
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3855310
Reviewed-by: Thibaud Michaud <thibaudm@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82766}
This commit is contained in:
Clemens Backes 2022-08-29 10:34:58 +02:00 committed by V8 LUCI CQ
parent 5c152a0f7b
commit 918b46afb4
6 changed files with 24 additions and 31 deletions

View File

@ -2813,7 +2813,7 @@ bool AsyncStreamingProcessor::ProcessModuleHeader(
FinishAsyncCompileJobWithError(decoder_.FinishDecoding(false).error());
return false;
}
prefix_hash_ = NativeModuleCache::WireBytesHash(bytes);
prefix_hash_ = GetWireBytesHash(bytes);
return true;
}
@ -2830,8 +2830,7 @@ bool AsyncStreamingProcessor::ProcessSection(SectionCode section_code,
}
if (before_code_section_) {
// Combine section hashes until code section.
prefix_hash_ = base::hash_combine(prefix_hash_,
NativeModuleCache::WireBytesHash(bytes));
prefix_hash_ = base::hash_combine(prefix_hash_, GetWireBytesHash(bytes));
}
if (section_code == SectionCode::kUnknownSectionCode) {
size_t bytes_consumed = ModuleDecoder::IdentifyUnknownSection(

View File

@ -341,10 +341,11 @@ class ModuleDecoderTemplate : public Decoder {
path += base::OS::DirectorySeparator();
}
}
// File are named `HASH.{ok,failed}.wasm`.
size_t hash = base::hash_range(module_bytes.begin(), module_bytes.end());
// File are named `<hash>.{ok,failed}.wasm`.
// Limit the hash to 8 characters (32 bits).
uint32_t hash = static_cast<uint32_t>(GetWireBytesHash(module_bytes));
base::EmbeddedVector<char, 32> buf;
SNPrintF(buf, "%016zx.%s.wasm", hash, ok() ? "ok" : "failed");
SNPrintF(buf, "%08x.%s.wasm", hash, ok() ? "ok" : "failed");
path += buf.begin();
size_t rv = 0;
if (FILE* file = base::OS::FOpen(path.c_str(), "wb")) {

View File

@ -18,7 +18,6 @@
#include "src/objects/heap-number.h"
#include "src/objects/managed-inl.h"
#include "src/objects/objects-inl.h"
#include "src/strings/string-hasher-inl.h"
#include "src/utils/ostreams.h"
#include "src/wasm/function-compiler.h"
#include "src/wasm/module-compiler.h"
@ -285,20 +284,13 @@ void NativeModuleCache::Erase(NativeModule* native_module) {
cache_cv_.NotifyAll();
}
// static
size_t NativeModuleCache::WireBytesHash(base::Vector<const uint8_t> bytes) {
return StringHasher::HashSequentialString(
reinterpret_cast<const char*>(bytes.begin()), bytes.length(),
kZeroHashSeed);
}
// static
size_t NativeModuleCache::PrefixHash(base::Vector<const uint8_t> wire_bytes) {
// Compute the hash as a combined hash of the sections up to the code section
// header, to mirror the way streaming compilation does it.
Decoder decoder(wire_bytes.begin(), wire_bytes.end());
decoder.consume_bytes(8, "module header");
size_t hash = NativeModuleCache::WireBytesHash(wire_bytes.SubVector(0, 8));
size_t hash = GetWireBytesHash(wire_bytes.SubVector(0, 8));
SectionCode section_id = SectionCode::kUnknownSectionCode;
while (decoder.ok() && decoder.more()) {
section_id = static_cast<SectionCode>(decoder.consume_u8());
@ -309,8 +301,8 @@ size_t NativeModuleCache::PrefixHash(base::Vector<const uint8_t> wire_bytes) {
}
const uint8_t* payload_start = decoder.pc();
decoder.consume_bytes(section_size, "section payload");
size_t section_hash = NativeModuleCache::WireBytesHash(
base::Vector<const uint8_t>(payload_start, section_size));
size_t section_hash =
GetWireBytesHash(base::VectorOf(payload_start, section_size));
hash = base::hash_combine(hash, section_hash);
}
return hash;
@ -791,10 +783,8 @@ Handle<Script> CreateWasmScript(Isolate* isolate,
->NewStringFromUtf8(source_url, AllocationType::kOld)
.ToHandleChecked();
} else {
int hash = StringHasher::HashSequentialString(
reinterpret_cast<const char*>(wire_bytes.begin()), wire_bytes.length(),
kZeroHashSeed);
// Limit the printed hash to 8 characters.
uint32_t hash = static_cast<uint32_t>(GetWireBytesHash(wire_bytes));
base::EmbeddedVector<char, 32> buffer;
if (module->name.is_empty()) {
// Build the URL in the form "wasm://wasm/<hash>".

View File

@ -109,8 +109,6 @@ class NativeModuleCache {
bool empty() { return map_.empty(); }
static size_t WireBytesHash(base::Vector<const uint8_t> bytes);
// Hash the wire bytes up to the code section header. Used as a heuristic to
// avoid streaming compilation of modules that are likely already in the
// cache. See {GetStreamingCompilationOwnership}. Assumes that the bytes have

View File

@ -682,4 +682,10 @@ int JumpTableOffset(const WasmModule* module, int func_index) {
declared_function_index(module, func_index));
}
size_t GetWireBytesHash(base::Vector<const uint8_t> wire_bytes) {
return StringHasher::HashSequentialString(
reinterpret_cast<const char*>(wire_bytes.begin()), wire_bytes.length(),
kZeroHashSeed);
}
} // namespace v8::internal::wasm

View File

@ -26,13 +26,11 @@
#include "src/wasm/wasm-init-expr.h"
#include "src/wasm/wasm-limits.h"
namespace v8 {
namespace internal {
namespace v8::internal {
class WasmModuleObject;
}
namespace wasm {
namespace v8::internal::wasm {
using WasmName = base::Vector<const char>;
@ -810,8 +808,9 @@ class TruncatedUserString {
size_t PrintSignature(base::Vector<char> buffer, const wasm::FunctionSig*,
char delimiter = ':');
} // namespace wasm
} // namespace internal
} // namespace v8
V8_EXPORT_PRIVATE size_t
GetWireBytesHash(base::Vector<const uint8_t> wire_bytes);
} // namespace v8::internal::wasm
#endif // V8_WASM_WASM_MODULE_H_