[stringrefs] Fold wtf8 policy into instruction set

Instead of having e.g. `string.new_wtf8` that takes an immediate
specifying the particular UTF-8 flavor to parse, make one instruction
per flavor.

See https://github.com/WebAssembly/stringref/pull/46.

Bug: v8:12868
Change-Id: I2e9f2735c557b2352b6e75314037e473710d87a9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3892695
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Andy Wingo <wingo@igalia.com>
Cr-Commit-Position: refs/heads/main@{#83170}
This commit is contained in:
Andy Wingo 2022-09-13 16:52:44 +02:00 committed by V8 LUCI CQ
parent 6946d1dedb
commit 8366df73c3
16 changed files with 433 additions and 451 deletions

View File

@ -808,20 +808,20 @@ transitioning javascript builtin ExperimentalWasmConvertStringToArray(
} }
builtin WasmStringNewWtf8( builtin WasmStringNewWtf8(
offset: uint32, size: uint32, memory: Smi, policy: Smi): String { offset: uint32, size: uint32, memory: Smi, utf8Variant: Smi): String {
const instance = LoadInstanceFromFrame(); const instance = LoadInstanceFromFrame();
tail runtime::WasmStringNewWtf8( tail runtime::WasmStringNewWtf8(
LoadContextFromInstance(instance), instance, memory, policy, LoadContextFromInstance(instance), instance, memory, utf8Variant,
WasmUint32ToNumber(offset), WasmUint32ToNumber(size)); WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
} }
builtin WasmStringNewWtf8Array( builtin WasmStringNewWtf8Array(
start: uint32, end: uint32, array: WasmArray, policy: Smi): String { start: uint32, end: uint32, array: WasmArray, utf8Variant: Smi): String {
const context = LoadContextFromFrame(); const context = LoadContextFromFrame();
try { try {
if (array.length < end) goto OffsetOutOfRange; if (array.length < end) goto OffsetOutOfRange;
if (end < start) goto OffsetOutOfRange; if (end < start) goto OffsetOutOfRange;
tail runtime::WasmStringNewWtf8Array( tail runtime::WasmStringNewWtf8Array(
context, policy, array, SmiFromUint32(start), SmiFromUint32(end)); context, utf8Variant, array, SmiFromUint32(start), SmiFromUint32(end));
} label OffsetOutOfRange deferred { } label OffsetOutOfRange deferred {
const error = MessageTemplate::kWasmTrapArrayOutOfBounds; const error = MessageTemplate::kWasmTrapArrayOutOfBounds;
runtime::ThrowWasmError(context, SmiConstant(error)); runtime::ThrowWasmError(context, SmiConstant(error));
@ -863,18 +863,18 @@ builtin WasmStringMeasureWtf8(string: String): int32 {
return Signed(ChangeNumberToUint32(result)); return Signed(ChangeNumberToUint32(result));
} }
builtin WasmStringEncodeWtf8( builtin WasmStringEncodeWtf8(
string: String, offset: uint32, memory: Smi, policy: Smi): uint32 { string: String, offset: uint32, memory: Smi, utf8Variant: Smi): uint32 {
const instance = LoadInstanceFromFrame(); const instance = LoadInstanceFromFrame();
const result = runtime::WasmStringEncodeWtf8( const result = runtime::WasmStringEncodeWtf8(
LoadContextFromInstance(instance), instance, memory, policy, string, LoadContextFromInstance(instance), instance, memory, utf8Variant, string,
WasmUint32ToNumber(offset)); WasmUint32ToNumber(offset));
return ChangeNumberToUint32(result); return ChangeNumberToUint32(result);
} }
builtin WasmStringEncodeWtf8Array( builtin WasmStringEncodeWtf8Array(
string: String, array: WasmArray, start: uint32, policy: Smi): uint32 { string: String, array: WasmArray, start: uint32, utf8Variant: Smi): uint32 {
const instance = LoadInstanceFromFrame(); const instance = LoadInstanceFromFrame();
const result = runtime::WasmStringEncodeWtf8Array( const result = runtime::WasmStringEncodeWtf8Array(
LoadContextFromInstance(instance), policy, string, array, LoadContextFromInstance(instance), utf8Variant, string, array,
WasmUint32ToNumber(start)); WasmUint32ToNumber(start));
return ChangeNumberToUint32(result); return ChangeNumberToUint32(result);
} }
@ -985,7 +985,7 @@ struct NewPositionAndBytesWritten {
} }
builtin WasmStringViewWtf8Encode( builtin WasmStringViewWtf8Encode(
addr: uint32, pos: uint32, bytes: uint32, view: ByteArray, memory: Smi, addr: uint32, pos: uint32, bytes: uint32, view: ByteArray, memory: Smi,
policy: Smi): NewPositionAndBytesWritten { utf8Variant: Smi): NewPositionAndBytesWritten {
const start = WasmStringViewWtf8Advance(view, pos, 0); const start = WasmStringViewWtf8Advance(view, pos, 0);
const end = WasmStringViewWtf8Advance(view, start, bytes); const end = WasmStringViewWtf8Advance(view, start, bytes);
const instance = LoadInstanceFromFrame(); const instance = LoadInstanceFromFrame();
@ -999,7 +999,7 @@ builtin WasmStringViewWtf8Encode(
// Always call out to run-time, to catch invalid addr. // Always call out to run-time, to catch invalid addr.
runtime::WasmStringViewWtf8Encode( runtime::WasmStringViewWtf8Encode(
context, instance, policy, view, WasmUint32ToNumber(addr), context, instance, utf8Variant, view, WasmUint32ToNumber(addr),
WasmUint32ToNumber(start), WasmUint32ToNumber(end)); WasmUint32ToNumber(start), WasmUint32ToNumber(end));
return NewPositionAndBytesWritten{ return NewPositionAndBytesWritten{

View File

@ -5732,19 +5732,19 @@ void WasmGraphBuilder::ArrayCopy(Node* dst_array, Node* dst_index,
} }
Node* WasmGraphBuilder::StringNewWtf8(uint32_t memory, Node* WasmGraphBuilder::StringNewWtf8(uint32_t memory,
wasm::StringRefWtf8Policy policy, unibrow::Utf8Variant variant,
Node* offset, Node* size) { Node* offset, Node* size) {
return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf8, Operator::kNoDeopt, return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf8, Operator::kNoDeopt,
offset, size, gasm_->SmiConstant(memory), offset, size, gasm_->SmiConstant(memory),
gasm_->SmiConstant(static_cast<int32_t>(policy))); gasm_->SmiConstant(static_cast<int32_t>(variant)));
} }
Node* WasmGraphBuilder::StringNewWtf8Array(wasm::StringRefWtf8Policy policy, Node* WasmGraphBuilder::StringNewWtf8Array(unibrow::Utf8Variant variant,
Node* array, Node* start, Node* array, Node* start,
Node* end) { Node* end) {
return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf8Array, return gasm_->CallBuiltin(Builtin::kWasmStringNewWtf8Array,
Operator::kNoDeopt, start, end, array, Operator::kNoDeopt, start, end, array,
gasm_->SmiConstant(static_cast<int32_t>(policy))); gasm_->SmiConstant(static_cast<int32_t>(variant)));
} }
Node* WasmGraphBuilder::StringNewWtf16(uint32_t memory, Node* offset, Node* WasmGraphBuilder::StringNewWtf16(uint32_t memory, Node* offset,
@ -5794,7 +5794,7 @@ Node* WasmGraphBuilder::StringMeasureWtf16(Node* string,
} }
Node* WasmGraphBuilder::StringEncodeWtf8(uint32_t memory, Node* WasmGraphBuilder::StringEncodeWtf8(uint32_t memory,
wasm::StringRefWtf8Policy policy, unibrow::Utf8Variant variant,
Node* string, CheckForNull null_check, Node* string, CheckForNull null_check,
Node* offset, Node* offset,
wasm::WasmCodePosition position) { wasm::WasmCodePosition position) {
@ -5803,13 +5803,13 @@ Node* WasmGraphBuilder::StringEncodeWtf8(uint32_t memory,
} }
return gasm_->CallBuiltin(Builtin::kWasmStringEncodeWtf8, Operator::kNoDeopt, return gasm_->CallBuiltin(Builtin::kWasmStringEncodeWtf8, Operator::kNoDeopt,
string, offset, gasm_->SmiConstant(memory), string, offset, gasm_->SmiConstant(memory),
gasm_->SmiConstant(policy)); gasm_->SmiConstant(static_cast<int32_t>(variant)));
} }
Node* WasmGraphBuilder::StringEncodeWtf8Array( Node* WasmGraphBuilder::StringEncodeWtf8Array(
wasm::StringRefWtf8Policy policy, Node* string, unibrow::Utf8Variant variant, Node* string, CheckForNull string_null_check,
CheckForNull string_null_check, Node* array, CheckForNull array_null_check, Node* array, CheckForNull array_null_check, Node* start,
Node* start, wasm::WasmCodePosition position) { wasm::WasmCodePosition position) {
if (string_null_check == kWithNullCheck) { if (string_null_check == kWithNullCheck) {
string = AssertNotNull(string, position); string = AssertNotNull(string, position);
} }
@ -5818,7 +5818,7 @@ Node* WasmGraphBuilder::StringEncodeWtf8Array(
} }
return gasm_->CallBuiltin(Builtin::kWasmStringEncodeWtf8Array, return gasm_->CallBuiltin(Builtin::kWasmStringEncodeWtf8Array,
Operator::kNoDeopt, string, array, start, Operator::kNoDeopt, string, array, start,
gasm_->SmiConstant(policy)); gasm_->SmiConstant(static_cast<int32_t>(variant)));
} }
Node* WasmGraphBuilder::StringEncodeWtf16(uint32_t memory, Node* string, Node* WasmGraphBuilder::StringEncodeWtf16(uint32_t memory, Node* string,
@ -5900,15 +5900,16 @@ Node* WasmGraphBuilder::StringViewWtf8Advance(Node* view,
} }
void WasmGraphBuilder::StringViewWtf8Encode( void WasmGraphBuilder::StringViewWtf8Encode(
uint32_t memory, wasm::StringRefWtf8Policy policy, Node* view, uint32_t memory, unibrow::Utf8Variant variant, Node* view,
CheckForNull null_check, Node* addr, Node* pos, Node* bytes, CheckForNull null_check, Node* addr, Node* pos, Node* bytes,
Node** next_pos, Node** bytes_written, wasm::WasmCodePosition position) { Node** next_pos, Node** bytes_written, wasm::WasmCodePosition position) {
if (null_check == kWithNullCheck) { if (null_check == kWithNullCheck) {
view = AssertNotNull(view, position); view = AssertNotNull(view, position);
} }
Node* pair = gasm_->CallBuiltin( Node* pair =
Builtin::kWasmStringViewWtf8Encode, Operator::kNoDeopt, addr, pos, bytes, gasm_->CallBuiltin(Builtin::kWasmStringViewWtf8Encode, Operator::kNoDeopt,
view, gasm_->SmiConstant(memory), gasm_->SmiConstant(policy)); addr, pos, bytes, view, gasm_->SmiConstant(memory),
gasm_->SmiConstant(static_cast<int32_t>(variant)));
*next_pos = gasm_->Projection(0, pair); *next_pos = gasm_->Projection(0, pair);
*bytes_written = gasm_->Projection(1, pair); *bytes_written = gasm_->Projection(1, pair);
} }

View File

@ -516,9 +516,9 @@ class WasmGraphBuilder {
void BrOnI31(Node* object, Node* rtt, WasmTypeCheckConfig config, void BrOnI31(Node* object, Node* rtt, WasmTypeCheckConfig config,
Node** match_control, Node** match_effect, Node** match_control, Node** match_effect,
Node** no_match_control, Node** no_match_effect); Node** no_match_control, Node** no_match_effect);
Node* StringNewWtf8(uint32_t memory, wasm::StringRefWtf8Policy policy, Node* StringNewWtf8(uint32_t memory, unibrow::Utf8Variant variant,
Node* offset, Node* size); Node* offset, Node* size);
Node* StringNewWtf8Array(wasm::StringRefWtf8Policy policy, Node* array, Node* StringNewWtf8Array(unibrow::Utf8Variant variant, Node* array,
Node* start, Node* end); Node* start, Node* end);
Node* StringNewWtf16(uint32_t memory, Node* offset, Node* size); Node* StringNewWtf16(uint32_t memory, Node* offset, Node* size);
Node* StringNewWtf16Array(Node* array, Node* start, Node* end); Node* StringNewWtf16Array(Node* array, Node* start, Node* end);
@ -529,10 +529,10 @@ class WasmGraphBuilder {
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringMeasureWtf16(Node* string, CheckForNull null_check, Node* StringMeasureWtf16(Node* string, CheckForNull null_check,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringEncodeWtf8(uint32_t memory, wasm::StringRefWtf8Policy policy, Node* StringEncodeWtf8(uint32_t memory, unibrow::Utf8Variant variant,
Node* string, CheckForNull null_check, Node* offset, Node* string, CheckForNull null_check, Node* offset,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringEncodeWtf8Array(wasm::StringRefWtf8Policy policy, Node* string, Node* StringEncodeWtf8Array(unibrow::Utf8Variant variant, Node* string,
CheckForNull string_null_check, Node* array, CheckForNull string_null_check, Node* array,
CheckForNull array_null_check, Node* start, CheckForNull array_null_check, Node* start,
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
@ -553,7 +553,7 @@ class WasmGraphBuilder {
wasm::WasmCodePosition position); wasm::WasmCodePosition position);
Node* StringViewWtf8Advance(Node* view, CheckForNull null_check, Node* pos, Node* StringViewWtf8Advance(Node* view, CheckForNull null_check, Node* pos,
Node* bytes, wasm::WasmCodePosition position); Node* bytes, wasm::WasmCodePosition position);
void StringViewWtf8Encode(uint32_t memory, wasm::StringRefWtf8Policy policy, void StringViewWtf8Encode(uint32_t memory, unibrow::Utf8Variant variant,
Node* view, CheckForNull null_check, Node* addr, Node* view, CheckForNull null_check, Node* addr,
Node* pos, Node* bytes, Node** next_pos, Node* pos, Node* bytes, Node** next_pos,
Node** bytes_written, Node** bytes_written,

View File

@ -854,20 +854,6 @@ RUNTIME_FUNCTION(Runtime_WasmCreateResumePromise) {
return *result; return *result;
} }
namespace {
unibrow::Utf8Variant Utf8VariantFromWtf8Policy(
wasm::StringRefWtf8Policy policy) {
switch (policy) {
case wasm::kWtf8PolicyReject:
return unibrow::Utf8Variant::kUtf8;
case wasm::kWtf8PolicyAccept:
return unibrow::Utf8Variant::kWtf8;
case wasm::kWtf8PolicyReplace:
return unibrow::Utf8Variant::kLossyUtf8;
}
}
} // namespace
// Returns the new string if the operation succeeds. Otherwise throws an // Returns the new string if the operation succeeds. Otherwise throws an
// exception and returns an empty result. // exception and returns an empty result.
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) { RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
@ -876,16 +862,16 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8) {
HandleScope scope(isolate); HandleScope scope(isolate);
WasmInstanceObject instance = WasmInstanceObject::cast(args[0]); WasmInstanceObject instance = WasmInstanceObject::cast(args[0]);
uint32_t memory = args.positive_smi_value_at(1); uint32_t memory = args.positive_smi_value_at(1);
uint32_t policy_value = args.positive_smi_value_at(2); uint32_t utf8_variant_value = args.positive_smi_value_at(2);
uint32_t offset = NumberToUint32(args[3]); uint32_t offset = NumberToUint32(args[3]);
uint32_t size = NumberToUint32(args[4]); uint32_t size = NumberToUint32(args[4]);
DCHECK_EQ(memory, 0); DCHECK_EQ(memory, 0);
USE(memory); USE(memory);
DCHECK(policy_value <= wasm::kLastWtf8Policy); DCHECK(utf8_variant_value <=
static_cast<uint32_t>(unibrow::Utf8Variant::kLastUtf8Variant));
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value); auto utf8_variant = static_cast<unibrow::Utf8Variant>(utf8_variant_value);
auto utf8_variant = Utf8VariantFromWtf8Policy(policy);
uint64_t mem_size = instance.memory_size(); uint64_t mem_size = instance.memory_size();
if (!base::IsInBounds<uint64_t>(offset, size, mem_size)) { if (!base::IsInBounds<uint64_t>(offset, size, mem_size)) {
@ -902,14 +888,14 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf8Array) {
ClearThreadInWasmScope flag_scope(isolate); ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(4, args.length()); DCHECK_EQ(4, args.length());
HandleScope scope(isolate); HandleScope scope(isolate);
uint32_t policy_value = args.positive_smi_value_at(0); uint32_t utf8_variant_value = args.positive_smi_value_at(0);
Handle<WasmArray> array(WasmArray::cast(args[1]), isolate); Handle<WasmArray> array(WasmArray::cast(args[1]), isolate);
uint32_t start = NumberToUint32(args[2]); uint32_t start = NumberToUint32(args[2]);
uint32_t end = NumberToUint32(args[3]); uint32_t end = NumberToUint32(args[3]);
DCHECK(policy_value <= wasm::kLastWtf8Policy); DCHECK(utf8_variant_value <=
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value); static_cast<uint32_t>(unibrow::Utf8Variant::kLastUtf8Variant));
auto utf8_variant = Utf8VariantFromWtf8Policy(policy); auto utf8_variant = static_cast<unibrow::Utf8Variant>(utf8_variant_value);
RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromUtf8( RETURN_RESULT_OR_FAILURE(isolate, isolate->factory()->NewStringFromUtf8(
array, start, end, utf8_variant)); array, start, end, utf8_variant));
@ -1023,7 +1009,7 @@ bool HasUnpairedSurrogate(base::Vector<const base::uc16> wtf16) {
// TODO(12868): Consider unifying with api.cc:String::WriteUtf8. // TODO(12868): Consider unifying with api.cc:String::WriteUtf8.
template <typename T> template <typename T>
int EncodeWtf8(base::Vector<char> bytes, size_t offset, int EncodeWtf8(base::Vector<char> bytes, size_t offset,
base::Vector<const T> wtf16, wasm::StringRefWtf8Policy policy, base::Vector<const T> wtf16, unibrow::Utf8Variant variant,
MessageTemplate* message, MessageTemplate out_of_bounds) { MessageTemplate* message, MessageTemplate out_of_bounds) {
// The first check is a quick estimate to decide whether the second check // The first check is a quick estimate to decide whether the second check
// is worth the computation. // is worth the computation.
@ -1034,16 +1020,16 @@ int EncodeWtf8(base::Vector<char> bytes, size_t offset,
} }
bool replace_invalid = false; bool replace_invalid = false;
switch (policy) { switch (variant) {
case wasm::kWtf8PolicyAccept: case unibrow::Utf8Variant::kWtf8:
break; break;
case wasm::kWtf8PolicyReject: case unibrow::Utf8Variant::kUtf8:
if (HasUnpairedSurrogate(wtf16)) { if (HasUnpairedSurrogate(wtf16)) {
*message = MessageTemplate::kWasmTrapStringIsolatedSurrogate; *message = MessageTemplate::kWasmTrapStringIsolatedSurrogate;
return -1; return -1;
} }
break; break;
case wasm::kWtf8PolicyReplace: case unibrow::Utf8Variant::kLossyUtf8:
replace_invalid = true; replace_invalid = true;
break; break;
default: default:
@ -1061,7 +1047,7 @@ int EncodeWtf8(base::Vector<char> bytes, size_t offset,
return static_cast<int>(dst - dst_start); return static_cast<int>(dst - dst_start);
} }
template <typename GetWritableBytes> template <typename GetWritableBytes>
Object EncodeWtf8(Isolate* isolate, wasm::StringRefWtf8Policy policy, Object EncodeWtf8(Isolate* isolate, unibrow::Utf8Variant variant,
Handle<String> string, GetWritableBytes get_writable_bytes, Handle<String> string, GetWritableBytes get_writable_bytes,
size_t offset, MessageTemplate out_of_bounds_message) { size_t offset, MessageTemplate out_of_bounds_message) {
string = String::Flatten(isolate, string); string = String::Flatten(isolate, string);
@ -1072,9 +1058,9 @@ Object EncodeWtf8(Isolate* isolate, wasm::StringRefWtf8Policy policy,
String::FlatContent content = string->GetFlatContent(no_gc); String::FlatContent content = string->GetFlatContent(no_gc);
base::Vector<char> dst = get_writable_bytes(no_gc); base::Vector<char> dst = get_writable_bytes(no_gc);
written = content.IsOneByte() written = content.IsOneByte()
? EncodeWtf8(dst, offset, content.ToOneByteVector(), policy, ? EncodeWtf8(dst, offset, content.ToOneByteVector(), variant,
&message, out_of_bounds_message) &message, out_of_bounds_message)
: EncodeWtf8(dst, offset, content.ToUC16Vector(), policy, : EncodeWtf8(dst, offset, content.ToUC16Vector(), variant,
&message, out_of_bounds_message); &message, out_of_bounds_message);
} }
if (written < 0) { if (written < 0) {
@ -1128,21 +1114,22 @@ RUNTIME_FUNCTION(Runtime_WasmStringEncodeWtf8) {
HandleScope scope(isolate); HandleScope scope(isolate);
WasmInstanceObject instance = WasmInstanceObject::cast(args[0]); WasmInstanceObject instance = WasmInstanceObject::cast(args[0]);
uint32_t memory = args.positive_smi_value_at(1); uint32_t memory = args.positive_smi_value_at(1);
uint32_t policy_value = args.positive_smi_value_at(2); uint32_t utf8_variant_value = args.positive_smi_value_at(2);
Handle<String> string(String::cast(args[3]), isolate); Handle<String> string(String::cast(args[3]), isolate);
uint32_t offset = NumberToUint32(args[4]); uint32_t offset = NumberToUint32(args[4]);
DCHECK_EQ(memory, 0); DCHECK_EQ(memory, 0);
USE(memory); USE(memory);
DCHECK(policy_value <= wasm::kLastWtf8Policy); DCHECK(utf8_variant_value <=
static_cast<uint32_t>(unibrow::Utf8Variant::kLastUtf8Variant));
char* memory_start = reinterpret_cast<char*>(instance.memory_start()); char* memory_start = reinterpret_cast<char*>(instance.memory_start());
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value); auto utf8_variant = static_cast<unibrow::Utf8Variant>(utf8_variant_value);
auto get_writable_bytes = auto get_writable_bytes =
[&](const DisallowGarbageCollection&) -> base::Vector<char> { [&](const DisallowGarbageCollection&) -> base::Vector<char> {
return {memory_start, instance.memory_size()}; return {memory_start, instance.memory_size()};
}; };
return EncodeWtf8(isolate, policy, string, get_writable_bytes, offset, return EncodeWtf8(isolate, utf8_variant, string, get_writable_bytes, offset,
MessageTemplate::kWasmTrapMemOutOfBounds); MessageTemplate::kWasmTrapMemOutOfBounds);
} }
@ -1150,18 +1137,19 @@ RUNTIME_FUNCTION(Runtime_WasmStringEncodeWtf8Array) {
ClearThreadInWasmScope flag_scope(isolate); ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(4, args.length()); DCHECK_EQ(4, args.length());
HandleScope scope(isolate); HandleScope scope(isolate);
uint32_t policy_value = args.positive_smi_value_at(0); uint32_t utf8_variant_value = args.positive_smi_value_at(0);
Handle<String> string(String::cast(args[1]), isolate); Handle<String> string(String::cast(args[1]), isolate);
Handle<WasmArray> array(WasmArray::cast(args[2]), isolate); Handle<WasmArray> array(WasmArray::cast(args[2]), isolate);
uint32_t start = NumberToUint32(args[3]); uint32_t start = NumberToUint32(args[3]);
DCHECK(policy_value <= wasm::kLastWtf8Policy); DCHECK(utf8_variant_value <=
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value); static_cast<uint32_t>(unibrow::Utf8Variant::kLastUtf8Variant));
auto utf8_variant = static_cast<unibrow::Utf8Variant>(utf8_variant_value);
auto get_writable_bytes = auto get_writable_bytes =
[&](const DisallowGarbageCollection&) -> base::Vector<char> { [&](const DisallowGarbageCollection&) -> base::Vector<char> {
return {reinterpret_cast<char*>(array->ElementAddress(0)), array->length()}; return {reinterpret_cast<char*>(array->ElementAddress(0)), array->length()};
}; };
return EncodeWtf8(isolate, policy, string, get_writable_bytes, start, return EncodeWtf8(isolate, utf8_variant, string, get_writable_bytes, start,
MessageTemplate::kWasmTrapArrayOutOfBounds); MessageTemplate::kWasmTrapArrayOutOfBounds);
} }
@ -1215,13 +1203,13 @@ RUNTIME_FUNCTION(Runtime_WasmStringAsWtf8) {
int wtf8_length = MeasureWtf8(isolate, string); int wtf8_length = MeasureWtf8(isolate, string);
Handle<ByteArray> array = isolate->factory()->NewByteArray(wtf8_length); Handle<ByteArray> array = isolate->factory()->NewByteArray(wtf8_length);
wasm::StringRefWtf8Policy policy = wasm::kWtf8PolicyAccept; auto utf8_variant = unibrow::Utf8Variant::kWtf8;
auto get_writable_bytes = auto get_writable_bytes =
[&](const DisallowGarbageCollection&) -> base::Vector<char> { [&](const DisallowGarbageCollection&) -> base::Vector<char> {
return {reinterpret_cast<char*>(array->GetDataStartAddress()), return {reinterpret_cast<char*>(array->GetDataStartAddress()),
static_cast<size_t>(wtf8_length)}; static_cast<size_t>(wtf8_length)};
}; };
EncodeWtf8(isolate, policy, string, get_writable_bytes, 0, EncodeWtf8(isolate, utf8_variant, string, get_writable_bytes, 0,
MessageTemplate::kWasmTrapArrayOutOfBounds); MessageTemplate::kWasmTrapArrayOutOfBounds);
return *array; return *array;
} }
@ -1231,17 +1219,18 @@ RUNTIME_FUNCTION(Runtime_WasmStringViewWtf8Encode) {
DCHECK_EQ(6, args.length()); DCHECK_EQ(6, args.length());
HandleScope scope(isolate); HandleScope scope(isolate);
WasmInstanceObject instance = WasmInstanceObject::cast(args[0]); WasmInstanceObject instance = WasmInstanceObject::cast(args[0]);
uint32_t policy_value = args.positive_smi_value_at(1); uint32_t utf8_variant_value = args.positive_smi_value_at(1);
Handle<ByteArray> array(ByteArray::cast(args[2]), isolate); Handle<ByteArray> array(ByteArray::cast(args[2]), isolate);
uint32_t addr = NumberToUint32(args[3]); uint32_t addr = NumberToUint32(args[3]);
uint32_t start = NumberToUint32(args[4]); uint32_t start = NumberToUint32(args[4]);
uint32_t end = NumberToUint32(args[5]); uint32_t end = NumberToUint32(args[5]);
DCHECK(policy_value <= wasm::kLastWtf8Policy); DCHECK(utf8_variant_value <=
static_cast<uint32_t>(unibrow::Utf8Variant::kLastUtf8Variant));
DCHECK_LE(start, end); DCHECK_LE(start, end);
DCHECK(base::IsInBounds<size_t>(start, end - start, array->length())); DCHECK(base::IsInBounds<size_t>(start, end - start, array->length()));
auto policy = static_cast<wasm::StringRefWtf8Policy>(policy_value); auto utf8_variant = static_cast<unibrow::Utf8Variant>(utf8_variant_value);
size_t length = end - start; size_t length = end - start;
if (!base::IsInBounds<size_t>(addr, length, instance.memory_size())) { if (!base::IsInBounds<size_t>(addr, length, instance.memory_size())) {
@ -1254,9 +1243,9 @@ RUNTIME_FUNCTION(Runtime_WasmStringViewWtf8Encode) {
byte* dst = memory_start + addr; byte* dst = memory_start + addr;
std::vector<size_t> surrogates; std::vector<size_t> surrogates;
if (policy != wasm::kWtf8PolicyAccept) { if (utf8_variant != unibrow::Utf8Variant::kWtf8) {
unibrow::Wtf8::ScanForSurrogates({src, length}, &surrogates); unibrow::Wtf8::ScanForSurrogates({src, length}, &surrogates);
if (policy == wasm::kWtf8PolicyReject && !surrogates.empty()) { if (utf8_variant == unibrow::Utf8Variant::kUtf8 && !surrogates.empty()) {
return ThrowWasmError(isolate, return ThrowWasmError(isolate,
MessageTemplate::kWasmTrapStringIsolatedSurrogate); MessageTemplate::kWasmTrapStringIsolatedSurrogate);
} }
@ -1266,7 +1255,7 @@ RUNTIME_FUNCTION(Runtime_WasmStringViewWtf8Encode) {
for (size_t surrogate : surrogates) { for (size_t surrogate : surrogates) {
DCHECK_LT(surrogate, length); DCHECK_LT(surrogate, length);
DCHECK_EQ(policy, wasm::kWtf8PolicyReplace); DCHECK_EQ(utf8_variant, unibrow::Utf8Variant::kLossyUtf8);
unibrow::Utf8::Encode(reinterpret_cast<char*>(dst + surrogate), unibrow::Utf8::Encode(reinterpret_cast<char*>(dst + surrogate),
unibrow::Utf8::kBadChar, 0, false); unibrow::Utf8::kBadChar, 0, false);
} }

View File

@ -154,16 +154,17 @@ class Latin1 {
}; };
enum class Utf8Variant : uint8_t { enum class Utf8Variant : uint8_t {
kLossyUtf8, // Lossy UTF-8: Any byte sequence can be decoded without
// error, replacing invalid UTF-8 with the replacement
// character (U+FFFD). Any sequence of codepoints can be
// encoded without error, replacing surrogates with U+FFFD.
#if V8_ENABLE_WEBASSEMBLY #if V8_ENABLE_WEBASSEMBLY
kUtf8, // UTF-8. Decoding an invalid byte sequence or encoding a kUtf8, // UTF-8. Decoding an invalid byte sequence or encoding a
// surrogate codepoint signals an error. // surrogate codepoint signals an error.
kWtf8, // WTF-8: like UTF-8, but allows isolated (but not paired) kWtf8, // WTF-8: like UTF-8, but allows isolated (but not paired)
// surrogate codepoints to be encoded and decoded. // surrogate codepoints to be encoded and decoded.
#endif #endif
kLossyUtf8, // Lossy UTF-8: Any byte sequence can be decoded without
// error, replacing invalid UTF-8 with the replacement
// character (U+FFFD). Any sequence of codepoints can be
// encoded without error, replacing surrogates with U+FFFD.
kLastUtf8Variant = kLossyUtf8
}; };
class V8_EXPORT_PRIVATE Utf8 { class V8_EXPORT_PRIVATE Utf8 {

View File

@ -6262,19 +6262,20 @@ class LiftoffCompiler {
} }
void StringNewWtf8(FullDecoder* decoder, void StringNewWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& imm,
const Value& offset, const Value& size, Value* result) { const unibrow::Utf8Variant variant, const Value& offset,
const Value& size, Value* result) {
LiftoffRegList pinned; LiftoffRegList pinned;
LiftoffRegister memory_reg = LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(memory_reg, imm.memory.index); LoadSmi(memory_reg, imm.index);
LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0); LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0);
LiftoffRegister policy_reg = LiftoffRegister variant_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.policy.value)); LoadSmi(variant_reg, static_cast<int32_t>(variant));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0); LiftoffAssembler::VarState variant_var(kSmiKind, variant_reg, 0);
CallRuntimeStub( CallRuntimeStub(
WasmCode::kWasmStringNewWtf8, WasmCode::kWasmStringNewWtf8,
@ -6283,7 +6284,7 @@ class LiftoffCompiler {
__ cache_state()->stack_state.end()[-2], // offset __ cache_state()->stack_state.end()[-2], // offset
__ cache_state()->stack_state.end()[-1], // size __ cache_state()->stack_state.end()[-1], // size
memory_var, memory_var,
policy_var, variant_var,
}, },
decoder->position()); decoder->position());
__ cache_state()->stack_state.pop_back(2); __ cache_state()->stack_state.pop_back(2);
@ -6294,7 +6295,7 @@ class LiftoffCompiler {
} }
void StringNewWtf8Array(FullDecoder* decoder, void StringNewWtf8Array(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant,
const Value& array, const Value& start, const Value& array, const Value& start,
const Value& end, Value* result) { const Value& end, Value* result) {
LiftoffRegList pinned; LiftoffRegList pinned;
@ -6304,10 +6305,10 @@ class LiftoffCompiler {
MaybeEmitNullCheck(decoder, array_reg.gp(), pinned, array.type); MaybeEmitNullCheck(decoder, array_reg.gp(), pinned, array.type);
LiftoffAssembler::VarState array_var(kRef, array_reg, 0); LiftoffAssembler::VarState array_var(kRef, array_reg, 0);
LiftoffRegister policy_reg = LiftoffRegister variant_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.value)); LoadSmi(variant_reg, static_cast<int32_t>(variant));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0); LiftoffAssembler::VarState variant_var(kSmiKind, variant_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringNewWtf8Array, CallRuntimeStub(WasmCode::kWasmStringNewWtf8Array,
MakeSig::Returns(kRef).Params(kI32, kI32, kRef, kSmiKind), MakeSig::Returns(kRef).Params(kI32, kI32, kRef, kSmiKind),
@ -6315,7 +6316,7 @@ class LiftoffCompiler {
__ cache_state()->stack_state.end()[-2], // start __ cache_state()->stack_state.end()[-2], // start
__ cache_state()->stack_state.end()[-1], // end __ cache_state()->stack_state.end()[-1], // end
array_var, array_var,
policy_var, variant_var,
}, },
decoder->position()); decoder->position());
__ cache_state()->stack_state.pop_back(3); __ cache_state()->stack_state.pop_back(3);
@ -6395,20 +6396,20 @@ class LiftoffCompiler {
} }
void StringMeasureWtf8(FullDecoder* decoder, void StringMeasureWtf8(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant, const Value& str,
const Value& str, Value* result) { Value* result) {
LiftoffRegList pinned; LiftoffRegList pinned;
LiftoffRegister string_reg = pinned.set(__ PopToRegister(pinned)); LiftoffRegister string_reg = pinned.set(__ PopToRegister(pinned));
MaybeEmitNullCheck(decoder, string_reg.gp(), pinned, str.type); MaybeEmitNullCheck(decoder, string_reg.gp(), pinned, str.type);
LiftoffAssembler::VarState string_var(kRef, string_reg, 0); LiftoffAssembler::VarState string_var(kRef, string_reg, 0);
WasmCode::RuntimeStubId stub_id; WasmCode::RuntimeStubId stub_id;
switch (imm.value) { switch (variant) {
case kWtf8PolicyReject: case unibrow::Utf8Variant::kUtf8:
stub_id = WasmCode::kWasmStringMeasureUtf8; stub_id = WasmCode::kWasmStringMeasureUtf8;
break; break;
case kWtf8PolicyAccept: case unibrow::Utf8Variant::kLossyUtf8:
case kWtf8PolicyReplace: case unibrow::Utf8Variant::kWtf8:
stub_id = WasmCode::kWasmStringMeasureWtf8; stub_id = WasmCode::kWasmStringMeasureWtf8;
break; break;
} }
@ -6436,8 +6437,9 @@ class LiftoffCompiler {
} }
void StringEncodeWtf8(FullDecoder* decoder, void StringEncodeWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& imm,
const Value& str, const Value& offset, Value* result) { const unibrow::Utf8Variant variant, const Value& str,
const Value& offset, Value* result) {
LiftoffRegList pinned; LiftoffRegList pinned;
LiftoffAssembler::VarState& offset_var = LiftoffAssembler::VarState& offset_var =
@ -6450,13 +6452,13 @@ class LiftoffCompiler {
LiftoffRegister memory_reg = LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(memory_reg, imm.memory.index); LoadSmi(memory_reg, imm.index);
LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0); LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0);
LiftoffRegister policy_reg = LiftoffRegister variant_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.policy.value)); LoadSmi(variant_reg, static_cast<int32_t>(variant));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0); LiftoffAssembler::VarState variant_var(kSmiKind, variant_reg, 0);
CallRuntimeStub( CallRuntimeStub(
WasmCode::kWasmStringEncodeWtf8, WasmCode::kWasmStringEncodeWtf8,
@ -6465,7 +6467,7 @@ class LiftoffCompiler {
string_var, string_var,
offset_var, offset_var,
memory_var, memory_var,
policy_var, variant_var,
}, },
decoder->position()); decoder->position());
__ DropValues(2); __ DropValues(2);
@ -6476,7 +6478,7 @@ class LiftoffCompiler {
} }
void StringEncodeWtf8Array(FullDecoder* decoder, void StringEncodeWtf8Array(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant,
const Value& str, const Value& array, const Value& str, const Value& array,
const Value& start, Value* result) { const Value& start, Value* result) {
LiftoffRegList pinned; LiftoffRegList pinned;
@ -6494,10 +6496,10 @@ class LiftoffCompiler {
LiftoffAssembler::VarState& start_var = LiftoffAssembler::VarState& start_var =
__ cache_state()->stack_state.end()[-1]; __ cache_state()->stack_state.end()[-1];
LiftoffRegister policy_reg = LiftoffRegister variant_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.value)); LoadSmi(variant_reg, static_cast<int32_t>(variant));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0); LiftoffAssembler::VarState variant_var(kSmiKind, variant_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringEncodeWtf8Array, CallRuntimeStub(WasmCode::kWasmStringEncodeWtf8Array,
MakeSig::Returns(kI32).Params(kRef, kRef, kI32, kSmiKind), MakeSig::Returns(kI32).Params(kRef, kRef, kI32, kSmiKind),
@ -6505,7 +6507,7 @@ class LiftoffCompiler {
string_var, string_var,
array_var, array_var,
start_var, start_var,
policy_var, variant_var,
}, },
decoder->position()); decoder->position());
__ DropValues(3); __ DropValues(3);
@ -6737,7 +6739,8 @@ class LiftoffCompiler {
} }
void StringViewWtf8Encode(FullDecoder* decoder, void StringViewWtf8Encode(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& imm,
const unibrow::Utf8Variant variant,
const Value& view, const Value& addr, const Value& view, const Value& addr,
const Value& pos, const Value& bytes, const Value& pos, const Value& bytes,
Value* next_pos, Value* bytes_written) { Value* next_pos, Value* bytes_written) {
@ -6757,13 +6760,13 @@ class LiftoffCompiler {
LiftoffRegister memory_reg = LiftoffRegister memory_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(memory_reg, imm.memory.index); LoadSmi(memory_reg, imm.index);
LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0); LiftoffAssembler::VarState memory_var(kSmiKind, memory_reg, 0);
LiftoffRegister policy_reg = LiftoffRegister variant_reg =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LoadSmi(policy_reg, static_cast<int32_t>(imm.policy.value)); LoadSmi(variant_reg, static_cast<int32_t>(variant));
LiftoffAssembler::VarState policy_var(kSmiKind, policy_reg, 0); LiftoffAssembler::VarState variant_var(kSmiKind, variant_reg, 0);
CallRuntimeStub(WasmCode::kWasmStringViewWtf8Encode, CallRuntimeStub(WasmCode::kWasmStringViewWtf8Encode,
MakeSig::Returns(kI32, kI32) MakeSig::Returns(kI32, kI32)
@ -6774,7 +6777,7 @@ class LiftoffCompiler {
bytes_var, bytes_var,
view_var, view_var,
memory_var, memory_var,
policy_var, variant_var,
}, },
decoder->position()); decoder->position());
__ DropValues(4); __ DropValues(4);

View File

@ -17,6 +17,7 @@
#include "src/base/small-vector.h" #include "src/base/small-vector.h"
#include "src/base/strings.h" #include "src/base/strings.h"
#include "src/base/v8-fallthrough.h" #include "src/base/v8-fallthrough.h"
#include "src/strings/unicode.h"
#include "src/utils/bit-vector.h" #include "src/utils/bit-vector.h"
#include "src/wasm/decoder.h" #include "src/wasm/decoder.h"
#include "src/wasm/function-body-decoder.h" #include "src/wasm/function-body-decoder.h"
@ -788,33 +789,6 @@ struct StringConstImmediate {
} }
}; };
template <Decoder::ValidateFlag validate>
struct Wtf8PolicyImmediate {
StringRefWtf8Policy value;
const uint32_t length = 1;
Wtf8PolicyImmediate(Decoder* decoder, const byte* pc) {
uint8_t u8 = decoder->read_u8<validate>(pc, "wtf8 policy");
if (!VALIDATE(u8 <= kLastWtf8Policy)) {
DecodeError<validate>(
decoder, pc, "expected wtf8 policy 0, 1, or 2, but found %u", u8);
}
value = static_cast<StringRefWtf8Policy>(u8);
}
};
template <Decoder::ValidateFlag validate>
struct EncodeWtf8Immediate {
MemoryIndexImmediate<validate> memory;
Wtf8PolicyImmediate<validate> policy;
uint32_t length;
EncodeWtf8Immediate(Decoder* decoder, const byte* pc)
: memory(decoder, pc),
policy(decoder, pc + memory.length),
length(memory.length + policy.length) {}
};
template <Decoder::ValidateFlag validate> template <Decoder::ValidateFlag validate>
struct PcForErrors { struct PcForErrors {
explicit PcForErrors(const byte* /* pc */) {} explicit PcForErrors(const byte* /* pc */) {}
@ -1112,20 +1086,22 @@ struct ControlBase : public PcForErrors<validate> {
uint32_t br_depth) \ uint32_t br_depth) \
F(BrOnNonArray, const Value& object, Value* value_on_fallthrough, \ F(BrOnNonArray, const Value& object, Value* value_on_fallthrough, \
uint32_t br_depth) \ uint32_t br_depth) \
F(StringNewWtf8, const EncodeWtf8Immediate<validate>& imm, \ F(StringNewWtf8, const MemoryIndexImmediate<validate>& memory, \
const Value& offset, const Value& size, Value* result) \ const unibrow::Utf8Variant variant, const Value& offset, \
F(StringNewWtf8Array, const Wtf8PolicyImmediate<validate>& imm, \ const Value& size, Value* result) \
F(StringNewWtf8Array, const unibrow::Utf8Variant variant, \
const Value& array, const Value& start, const Value& end, Value* result) \ const Value& array, const Value& start, const Value& end, Value* result) \
F(StringNewWtf16, const MemoryIndexImmediate<validate>& imm, \ F(StringNewWtf16, const MemoryIndexImmediate<validate>& memory, \
const Value& offset, const Value& size, Value* result) \ const Value& offset, const Value& size, Value* result) \
F(StringNewWtf16Array, const Value& array, const Value& start, \ F(StringNewWtf16Array, const Value& array, const Value& start, \
const Value& end, Value* result) \ const Value& end, Value* result) \
F(StringMeasureWtf8, const Wtf8PolicyImmediate<validate>& imm, \ F(StringMeasureWtf8, const unibrow::Utf8Variant variant, const Value& str, \
const Value& str, Value* result) \ Value* result) \
F(StringMeasureWtf16, const Value& str, Value* result) \ F(StringMeasureWtf16, const Value& str, Value* result) \
F(StringEncodeWtf8, const EncodeWtf8Immediate<validate>& memory, \ F(StringEncodeWtf8, const MemoryIndexImmediate<validate>& memory, \
const Value& str, const Value& address, Value* result) \ const unibrow::Utf8Variant variant, const Value& str, \
F(StringEncodeWtf8Array, const Wtf8PolicyImmediate<validate>& imm, \ const Value& address, Value* result) \
F(StringEncodeWtf8Array, const unibrow::Utf8Variant variant, \
const Value& str, const Value& array, const Value& start, Value* result) \ const Value& str, const Value& array, const Value& start, Value* result) \
F(StringEncodeWtf16, const MemoryIndexImmediate<validate>& memory, \ F(StringEncodeWtf16, const MemoryIndexImmediate<validate>& memory, \
const Value& str, const Value& address, Value* result) \ const Value& str, const Value& address, Value* result) \
@ -1137,9 +1113,10 @@ struct ControlBase : public PcForErrors<validate> {
F(StringAsWtf8, const Value& str, Value* result) \ F(StringAsWtf8, const Value& str, Value* result) \
F(StringViewWtf8Advance, const Value& view, const Value& pos, \ F(StringViewWtf8Advance, const Value& view, const Value& pos, \
const Value& bytes, Value* result) \ const Value& bytes, Value* result) \
F(StringViewWtf8Encode, const EncodeWtf8Immediate<validate>& memory, \ F(StringViewWtf8Encode, const MemoryIndexImmediate<validate>& memory, \
const Value& view, const Value& addr, const Value& pos, \ const unibrow::Utf8Variant variant, const Value& view, const Value& addr, \
const Value& bytes, Value* next_pos, Value* bytes_written) \ const Value& pos, const Value& bytes, Value* next_pos, \
Value* bytes_written) \
F(StringViewWtf8Slice, const Value& view, const Value& start, \ F(StringViewWtf8Slice, const Value& view, const Value& start, \
const Value& end, Value* result) \ const Value& end, Value* result) \
F(StringAsWtf16, const Value& str, Value* result) \ F(StringAsWtf16, const Value& str, Value* result) \
@ -1577,10 +1554,6 @@ class WasmDecoder : public Decoder {
return true; return true;
} }
bool Validate(const byte* pc, EncodeWtf8Immediate<validate>& imm) {
return Validate(pc, imm.memory);
}
bool Validate(const byte* pc, StringConstImmediate<validate>& imm) { bool Validate(const byte* pc, StringConstImmediate<validate>& imm) {
if (!VALIDATE(imm.index < module_->stringref_literals.size())) { if (!VALIDATE(imm.index < module_->stringref_literals.size())) {
DecodeError(pc, "Invalid string literal index: %u", imm.index); DecodeError(pc, "Invalid string literal index: %u", imm.index);
@ -1667,7 +1640,6 @@ class WasmDecoder : public Decoder {
void SimdLane(SimdLaneImmediate<validate>& imm) {} void SimdLane(SimdLaneImmediate<validate>& imm) {}
void Field(FieldImmediate<validate>& imm) {} void Field(FieldImmediate<validate>& imm) {}
void Length(IndexImmediate<validate>& imm) {} void Length(IndexImmediate<validate>& imm) {}
void Wtf8Policy(Wtf8PolicyImmediate<validate>& imm) {}
void TagIndex(TagIndexImmediate<validate>& imm) {} void TagIndex(TagIndexImmediate<validate>& imm) {}
void FunctionIndex(IndexImmediate<validate>& imm) {} void FunctionIndex(IndexImmediate<validate>& imm) {}
@ -2083,6 +2055,15 @@ class WasmDecoder : public Decoder {
case kExprExternExternalize: case kExprExternExternalize:
case kExprArrayLen: case kExprArrayLen:
return length; return length;
case kExprStringNewUtf8:
case kExprStringNewLossyUtf8:
case kExprStringNewWtf8:
case kExprStringEncodeUtf8:
case kExprStringEncodeLossyUtf8:
case kExprStringEncodeWtf8:
case kExprStringViewWtf8EncodeUtf8:
case kExprStringViewWtf8EncodeLossyUtf8:
case kExprStringViewWtf8EncodeWtf8:
case kExprStringNewWtf16: case kExprStringNewWtf16:
case kExprStringEncodeWtf16: case kExprStringEncodeWtf16:
case kExprStringViewWtf16Encode: { case kExprStringViewWtf16Encode: {
@ -2090,26 +2071,19 @@ class WasmDecoder : public Decoder {
if (io) io->MemoryIndex(imm); if (io) io->MemoryIndex(imm);
return length + imm.length; return length + imm.length;
} }
case kExprStringNewWtf8:
case kExprStringEncodeWtf8:
case kExprStringViewWtf8Encode: {
EncodeWtf8Immediate<validate> imm(decoder, pc + length);
if (io) io->MemoryIndex(imm.memory);
if (io) io->Wtf8Policy(imm.policy);
return length + imm.length;
}
case kExprStringConst: { case kExprStringConst: {
StringConstImmediate<validate> imm(decoder, pc + length); StringConstImmediate<validate> imm(decoder, pc + length);
if (io) io->StringConst(imm); if (io) io->StringConst(imm);
return length + imm.length; return length + imm.length;
} }
case kExprStringMeasureUtf8:
case kExprStringMeasureWtf8:
case kExprStringNewUtf8Array:
case kExprStringNewLossyUtf8Array:
case kExprStringNewWtf8Array: case kExprStringNewWtf8Array:
case kExprStringEncodeUtf8Array:
case kExprStringEncodeLossyUtf8Array:
case kExprStringEncodeWtf8Array: case kExprStringEncodeWtf8Array:
case kExprStringMeasureWtf8: {
Wtf8PolicyImmediate<validate> imm(decoder, pc + length);
if (io) io->Wtf8Policy(imm);
return length + imm.length;
}
case kExprStringMeasureWtf16: case kExprStringMeasureWtf16:
case kExprStringConcat: case kExprStringConcat:
case kExprStringEq: case kExprStringEq:
@ -2314,6 +2288,7 @@ class WasmDecoder : public Decoder {
} }
case kExprStringConst: case kExprStringConst:
return { 0, 1 }; return { 0, 1 };
case kExprStringMeasureUtf8:
case kExprStringMeasureWtf8: case kExprStringMeasureWtf8:
case kExprStringMeasureWtf16: case kExprStringMeasureWtf16:
case kExprStringIsUSVSequence: case kExprStringIsUSVSequence:
@ -2323,6 +2298,8 @@ class WasmDecoder : public Decoder {
case kExprStringViewWtf16Length: case kExprStringViewWtf16Length:
case kExprStringViewIterNext: case kExprStringViewIterNext:
return { 1, 1 }; return { 1, 1 };
case kExprStringNewUtf8:
case kExprStringNewLossyUtf8:
case kExprStringNewWtf8: case kExprStringNewWtf8:
case kExprStringNewWtf16: case kExprStringNewWtf16:
case kExprStringConcat: case kExprStringConcat:
@ -2332,9 +2309,15 @@ class WasmDecoder : public Decoder {
case kExprStringViewIterRewind: case kExprStringViewIterRewind:
case kExprStringViewIterSlice: case kExprStringViewIterSlice:
return { 2, 1 }; return { 2, 1 };
case kExprStringNewUtf8Array:
case kExprStringNewLossyUtf8Array:
case kExprStringNewWtf8Array: case kExprStringNewWtf8Array:
case kExprStringNewWtf16Array: case kExprStringNewWtf16Array:
case kExprStringEncodeUtf8:
case kExprStringEncodeLossyUtf8:
case kExprStringEncodeWtf8: case kExprStringEncodeWtf8:
case kExprStringEncodeUtf8Array:
case kExprStringEncodeLossyUtf8Array:
case kExprStringEncodeWtf8Array: case kExprStringEncodeWtf8Array:
case kExprStringEncodeWtf16: case kExprStringEncodeWtf16:
case kExprStringEncodeWtf16Array: case kExprStringEncodeWtf16Array:
@ -2344,7 +2327,9 @@ class WasmDecoder : public Decoder {
return { 3, 1 }; return { 3, 1 };
case kExprStringViewWtf16Encode: case kExprStringViewWtf16Encode:
return { 4, 1 }; return { 4, 1 };
case kExprStringViewWtf8Encode: case kExprStringViewWtf8EncodeUtf8:
case kExprStringViewWtf8EncodeLossyUtf8:
case kExprStringViewWtf8EncodeWtf8:
return { 4, 2 }; return { 4, 2 };
default: default:
UNREACHABLE(); UNREACHABLE();
@ -3692,7 +3677,7 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
WasmOpcode full_opcode = this->template read_prefixed_opcode<validate>( WasmOpcode full_opcode = this->template read_prefixed_opcode<validate>(
this->pc_, &opcode_length, "gc index"); this->pc_, &opcode_length, "gc index");
trace_msg->AppendOpcode(full_opcode); trace_msg->AppendOpcode(full_opcode);
if (full_opcode >= kExprStringNewWtf8) { if (full_opcode >= kExprStringNewUtf8) {
CHECK_PROTOTYPE_OPCODE(stringref); CHECK_PROTOTYPE_OPCODE(stringref);
return DecodeStringRefOpcode(full_opcode, opcode_length); return DecodeStringRefOpcode(full_opcode, opcode_length);
} else { } else {
@ -5146,22 +5131,108 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
enum class WasmArrayAccess { kRead, kWrite }; enum class WasmArrayAccess { kRead, kWrite };
int DecodeStringNewWtf8(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
MemoryIndexImmediate<validate> memory(this, this->pc_ + opcode_length);
if (!this->Validate(this->pc_ + opcode_length, memory)) return 0;
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
Value offset = Peek(1, 0, addr_type);
Value size = Peek(0, 1, kWasmI32);
Value result = CreateValue(ValueType::Ref(HeapType::kString));
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf8, memory, variant, offset,
size, &result);
Drop(2);
Push(result);
return opcode_length + memory.length;
}
int DecodeStringMeasureWtf8(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
Value str = Peek(0, 0, kWasmStringRef);
Value result = CreateValue(kWasmI32);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringMeasureWtf8, variant, str,
&result);
Drop(str);
Push(result);
return opcode_length;
}
int DecodeStringEncodeWtf8(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
MemoryIndexImmediate<validate> memory(this, this->pc_ + opcode_length);
if (!this->Validate(this->pc_ + opcode_length, memory)) return 0;
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
Value str = Peek(1, 0, kWasmStringRef);
Value addr = Peek(0, 1, addr_type);
Value result = CreateValue(kWasmI32);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringEncodeWtf8, memory, variant, str,
addr, &result);
Drop(2);
Push(result);
return opcode_length + memory.length;
}
int DecodeStringViewWtf8Encode(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
MemoryIndexImmediate<validate> memory(this, this->pc_ + opcode_length);
if (!this->Validate(this->pc_ + opcode_length, memory)) return 0;
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32;
Value view = Peek(3, 0, kWasmStringViewWtf8);
Value addr = Peek(2, 1, addr_type);
Value pos = Peek(1, 2, kWasmI32);
Value bytes = Peek(0, 3, kWasmI32);
Value next_pos = CreateValue(kWasmI32);
Value bytes_out = CreateValue(kWasmI32);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringViewWtf8Encode, memory, variant,
view, addr, pos, bytes, &next_pos,
&bytes_out);
Drop(4);
Push(next_pos);
Push(bytes_out);
return opcode_length + memory.length;
}
int DecodeStringNewWtf8Array(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
Value array = PeekPackedArray(2, 0, kWasmI8, WasmArrayAccess::kRead);
Value start = Peek(1, 1, kWasmI32);
Value end = Peek(0, 2, kWasmI32);
Value result = CreateValue(ValueType::Ref(HeapType::kString));
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf8Array, variant, array,
start, end, &result);
Drop(3);
Push(result);
return opcode_length;
}
int DecodeStringEncodeWtf8Array(unibrow::Utf8Variant variant,
uint32_t opcode_length) {
NON_CONST_ONLY
Value str = Peek(2, 0, kWasmStringRef);
Value array = PeekPackedArray(1, 1, kWasmI8, WasmArrayAccess::kWrite);
Value start = Peek(0, 2, kWasmI32);
Value result = CreateValue(kWasmI32);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringEncodeWtf8Array, variant, str,
array, start, &result);
Drop(3);
Push(result);
return opcode_length;
}
int DecodeStringRefOpcode(WasmOpcode opcode, uint32_t opcode_length) { int DecodeStringRefOpcode(WasmOpcode opcode, uint32_t opcode_length) {
switch (opcode) { switch (opcode) {
case kExprStringNewWtf8: { case kExprStringNewUtf8:
NON_CONST_ONLY return DecodeStringNewWtf8(unibrow::Utf8Variant::kUtf8, opcode_length);
EncodeWtf8Immediate<validate> imm(this, this->pc_ + opcode_length); case kExprStringNewLossyUtf8:
if (!this->Validate(this->pc_ + opcode_length, imm)) return 0; return DecodeStringNewWtf8(unibrow::Utf8Variant::kLossyUtf8,
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32; opcode_length);
Value offset = Peek(1, 0, addr_type); case kExprStringNewWtf8:
Value size = Peek(0, 1, kWasmI32); return DecodeStringNewWtf8(unibrow::Utf8Variant::kWtf8, opcode_length);
Value result = CreateValue(ValueType::Ref(HeapType::kString));
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf8, imm, offset, size,
&result);
Drop(2);
Push(result);
return opcode_length + imm.length;
}
case kExprStringNewWtf16: { case kExprStringNewWtf16: {
NON_CONST_ONLY NON_CONST_ONLY
MemoryIndexImmediate<validate> imm(this, this->pc_ + opcode_length); MemoryIndexImmediate<validate> imm(this, this->pc_ + opcode_length);
@ -5184,17 +5255,12 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length + imm.length; return opcode_length + imm.length;
} }
case kExprStringMeasureWtf8: { case kExprStringMeasureUtf8:
NON_CONST_ONLY return DecodeStringMeasureWtf8(unibrow::Utf8Variant::kUtf8,
Wtf8PolicyImmediate<validate> imm(this, this->pc_ + opcode_length); opcode_length);
Value str = Peek(0, 0, kWasmStringRef); case kExprStringMeasureWtf8:
Value result = CreateValue(kWasmI32); return DecodeStringMeasureWtf8(unibrow::Utf8Variant::kWtf8,
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringMeasureWtf8, imm, str, opcode_length);
&result);
Drop(str);
Push(result);
return opcode_length + imm.length;
}
case kExprStringMeasureWtf16: { case kExprStringMeasureWtf16: {
NON_CONST_ONLY NON_CONST_ONLY
Value str = Peek(0, 0, kWasmStringRef); Value str = Peek(0, 0, kWasmStringRef);
@ -5204,20 +5270,15 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length; return opcode_length;
} }
case kExprStringEncodeWtf8: { case kExprStringEncodeUtf8:
NON_CONST_ONLY return DecodeStringEncodeWtf8(unibrow::Utf8Variant::kUtf8,
EncodeWtf8Immediate<validate> imm(this, this->pc_ + opcode_length); opcode_length);
if (!this->Validate(this->pc_ + opcode_length, imm)) return 0; case kExprStringEncodeLossyUtf8:
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32; return DecodeStringEncodeWtf8(unibrow::Utf8Variant::kLossyUtf8,
Value str = Peek(1, 0, kWasmStringRef); opcode_length);
Value addr = Peek(0, 1, addr_type); case kExprStringEncodeWtf8:
Value result = CreateValue(kWasmI32); return DecodeStringEncodeWtf8(unibrow::Utf8Variant::kWtf8,
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringEncodeWtf8, imm, str, addr, opcode_length);
&result);
Drop(2);
Push(result);
return opcode_length + imm.length;
}
case kExprStringEncodeWtf16: { case kExprStringEncodeWtf16: {
NON_CONST_ONLY NON_CONST_ONLY
MemoryIndexImmediate<validate> imm(this, this->pc_ + opcode_length); MemoryIndexImmediate<validate> imm(this, this->pc_ + opcode_length);
@ -5282,25 +5343,15 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length; return opcode_length;
} }
case kExprStringViewWtf8Encode: { case kExprStringViewWtf8EncodeUtf8:
NON_CONST_ONLY return DecodeStringViewWtf8Encode(unibrow::Utf8Variant::kUtf8,
EncodeWtf8Immediate<validate> imm(this, this->pc_ + opcode_length); opcode_length);
if (!this->Validate(this->pc_ + opcode_length, imm)) return 0; case kExprStringViewWtf8EncodeLossyUtf8:
ValueType addr_type = this->module_->is_memory64 ? kWasmI64 : kWasmI32; return DecodeStringViewWtf8Encode(unibrow::Utf8Variant::kLossyUtf8,
Value view = Peek(3, 0, kWasmStringViewWtf8); opcode_length);
Value addr = Peek(2, 1, addr_type); case kExprStringViewWtf8EncodeWtf8:
Value pos = Peek(1, 2, kWasmI32); return DecodeStringViewWtf8Encode(unibrow::Utf8Variant::kWtf8,
Value bytes = Peek(0, 3, kWasmI32); opcode_length);
Value next_pos = CreateValue(kWasmI32);
Value bytes_out = CreateValue(kWasmI32);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringViewWtf8Encode, imm, view,
addr, pos, bytes, &next_pos,
&bytes_out);
Drop(4);
Push(next_pos);
Push(bytes_out);
return opcode_length + imm.length;
}
case kExprStringViewWtf8Slice: { case kExprStringViewWtf8Slice: {
NON_CONST_ONLY NON_CONST_ONLY
Value view = Peek(2, 0, kWasmStringViewWtf8); Value view = Peek(2, 0, kWasmStringViewWtf8);
@ -5370,7 +5421,6 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length; return opcode_length;
} }
case kExprStringAsIter: { case kExprStringAsIter: {
NON_CONST_ONLY NON_CONST_ONLY
Value str = Peek(0, 0, kWasmStringRef); Value str = Peek(0, 0, kWasmStringRef);
@ -5422,20 +5472,18 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length; return opcode_length;
} }
case kExprStringNewWtf8Array: { case kExprStringNewUtf8Array:
CHECK_PROTOTYPE_OPCODE(gc); CHECK_PROTOTYPE_OPCODE(gc);
NON_CONST_ONLY return DecodeStringNewWtf8Array(unibrow::Utf8Variant::kUtf8,
Wtf8PolicyImmediate<validate> imm(this, this->pc_ + opcode_length); opcode_length);
Value array = PeekPackedArray(2, 0, kWasmI8, WasmArrayAccess::kRead); case kExprStringNewLossyUtf8Array:
Value start = Peek(1, 1, kWasmI32); CHECK_PROTOTYPE_OPCODE(gc);
Value end = Peek(0, 2, kWasmI32); return DecodeStringNewWtf8Array(unibrow::Utf8Variant::kLossyUtf8,
Value result = CreateValue(ValueType::Ref(HeapType::kString)); opcode_length);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringNewWtf8Array, imm, array, case kExprStringNewWtf8Array:
start, end, &result); CHECK_PROTOTYPE_OPCODE(gc);
Drop(3); return DecodeStringNewWtf8Array(unibrow::Utf8Variant::kWtf8,
Push(result); opcode_length);
return opcode_length + imm.length;
}
case kExprStringNewWtf16Array: { case kExprStringNewWtf16Array: {
CHECK_PROTOTYPE_OPCODE(gc); CHECK_PROTOTYPE_OPCODE(gc);
NON_CONST_ONLY NON_CONST_ONLY
@ -5449,20 +5497,18 @@ class WasmFullDecoder : public WasmDecoder<validate, decoding_mode> {
Push(result); Push(result);
return opcode_length; return opcode_length;
} }
case kExprStringEncodeWtf8Array: { case kExprStringEncodeUtf8Array:
CHECK_PROTOTYPE_OPCODE(gc); CHECK_PROTOTYPE_OPCODE(gc);
NON_CONST_ONLY return DecodeStringEncodeWtf8Array(unibrow::Utf8Variant::kUtf8,
Wtf8PolicyImmediate<validate> imm(this, this->pc_ + opcode_length); opcode_length);
Value str = Peek(2, 0, kWasmStringRef); case kExprStringEncodeLossyUtf8Array:
Value array = PeekPackedArray(1, 1, kWasmI8, WasmArrayAccess::kWrite); CHECK_PROTOTYPE_OPCODE(gc);
Value start = Peek(0, 2, kWasmI32); return DecodeStringEncodeWtf8Array(unibrow::Utf8Variant::kLossyUtf8,
Value result = CreateValue(kWasmI32); opcode_length);
CALL_INTERFACE_IF_OK_AND_REACHABLE(StringEncodeWtf8Array, imm, str, case kExprStringEncodeWtf8Array:
array, start, &result); CHECK_PROTOTYPE_OPCODE(gc);
Drop(3); return DecodeStringEncodeWtf8Array(unibrow::Utf8Variant::kWtf8,
Push(result); opcode_length);
return opcode_length + imm.length;
}
case kExprStringEncodeWtf16Array: { case kExprStringEncodeWtf16Array: {
CHECK_PROTOTYPE_OPCODE(gc); CHECK_PROTOTYPE_OPCODE(gc);
NON_CONST_ONLY NON_CONST_ONLY

View File

@ -1371,18 +1371,18 @@ class WasmGraphBuildingInterface {
} }
void StringNewWtf8(FullDecoder* decoder, void StringNewWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& memory,
const Value& offset, const Value& size, Value* result) { const unibrow::Utf8Variant variant, const Value& offset,
SetAndTypeNode(result, const Value& size, Value* result) {
builder_->StringNewWtf8(imm.memory.index, imm.policy.value, SetAndTypeNode(result, builder_->StringNewWtf8(memory.index, variant,
offset.node, size.node)); offset.node, size.node));
} }
void StringNewWtf8Array(FullDecoder* decoder, void StringNewWtf8Array(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant,
const Value& array, const Value& start, const Value& array, const Value& start,
const Value& end, Value* result) { const Value& end, Value* result) {
SetAndTypeNode(result, builder_->StringNewWtf8Array(imm.value, array.node, SetAndTypeNode(result, builder_->StringNewWtf8Array(variant, array.node,
start.node, end.node)); start.node, end.node));
} }
@ -1406,15 +1406,15 @@ class WasmGraphBuildingInterface {
} }
void StringMeasureWtf8(FullDecoder* decoder, void StringMeasureWtf8(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant, const Value& str,
const Value& str, Value* result) { Value* result) {
switch (imm.value) { switch (variant) {
case kWtf8PolicyReject: case unibrow::Utf8Variant::kUtf8:
result->node = builder_->StringMeasureUtf8( result->node = builder_->StringMeasureUtf8(
str.node, NullCheckFor(str.type), decoder->position()); str.node, NullCheckFor(str.type), decoder->position());
break; break;
case kWtf8PolicyAccept: case unibrow::Utf8Variant::kLossyUtf8:
case kWtf8PolicyReplace: case unibrow::Utf8Variant::kWtf8:
result->node = builder_->StringMeasureWtf8( result->node = builder_->StringMeasureWtf8(
str.node, NullCheckFor(str.type), decoder->position()); str.node, NullCheckFor(str.type), decoder->position());
break; break;
@ -1428,19 +1428,20 @@ class WasmGraphBuildingInterface {
} }
void StringEncodeWtf8(FullDecoder* decoder, void StringEncodeWtf8(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& memory,
const Value& str, const Value& offset, Value* result) { const unibrow::Utf8Variant variant, const Value& str,
result->node = builder_->StringEncodeWtf8( const Value& offset, Value* result) {
imm.memory.index, imm.policy.value, str.node, NullCheckFor(str.type), result->node = builder_->StringEncodeWtf8(memory.index, variant, str.node,
offset.node, decoder->position()); NullCheckFor(str.type),
offset.node, decoder->position());
} }
void StringEncodeWtf8Array(FullDecoder* decoder, void StringEncodeWtf8Array(FullDecoder* decoder,
const Wtf8PolicyImmediate<validate>& imm, const unibrow::Utf8Variant variant,
const Value& str, const Value& array, const Value& str, const Value& array,
const Value& start, Value* result) { const Value& start, Value* result) {
result->node = builder_->StringEncodeWtf8Array( result->node = builder_->StringEncodeWtf8Array(
imm.value, str.node, NullCheckFor(str.type), array.node, variant, str.node, NullCheckFor(str.type), array.node,
NullCheckFor(array.type), start.node, decoder->position()); NullCheckFor(array.type), start.node, decoder->position());
} }
@ -1495,14 +1496,15 @@ class WasmGraphBuildingInterface {
} }
void StringViewWtf8Encode(FullDecoder* decoder, void StringViewWtf8Encode(FullDecoder* decoder,
const EncodeWtf8Immediate<validate>& imm, const MemoryIndexImmediate<validate>& memory,
const unibrow::Utf8Variant variant,
const Value& view, const Value& addr, const Value& view, const Value& addr,
const Value& pos, const Value& bytes, const Value& pos, const Value& bytes,
Value* next_pos, Value* bytes_written) { Value* next_pos, Value* bytes_written) {
builder_->StringViewWtf8Encode( builder_->StringViewWtf8Encode(memory.index, variant, view.node,
imm.memory.index, imm.policy.value, view.node, NullCheckFor(view.type), NullCheckFor(view.type), addr.node, pos.node,
addr.node, pos.node, bytes.node, &next_pos->node, &bytes_written->node, bytes.node, &next_pos->node,
decoder->position()); &bytes_written->node, decoder->position());
} }
void StringViewWtf8Slice(FullDecoder* decoder, const Value& view, void StringViewWtf8Slice(FullDecoder* decoder, const Value& view,

View File

@ -147,16 +147,6 @@ enum NameSectionKindCode : uint8_t {
kTagCode = 11, kTagCode = 11,
}; };
// What to do when treating a stringref as WTF-8 and we see an isolated
// surrogate.
enum StringRefWtf8Policy : uint8_t {
kWtf8PolicyReject = 0, // Strict UTF-8; no isolated surrogates allowed.
kWtf8PolicyAccept = 1, // Follow WTF-8 encoding of isolates surrogates.
kWtf8PolicyReplace = 2, // Replace isolated surrogates and decoding errors
// with U+FFFD.
kLastWtf8Policy = kWtf8PolicyReplace
};
constexpr size_t kWasmPageSize = 0x10000; constexpr size_t kWasmPageSize = 0x10000;
constexpr uint32_t kWasmPageSizeLog2 = 16; constexpr uint32_t kWasmPageSizeLog2 = 16;
static_assert(kWasmPageSize == size_t{1} << kWasmPageSizeLog2, "consistency"); static_assert(kWasmPageSize == size_t{1} << kWasmPageSizeLog2, "consistency");

View File

@ -375,13 +375,6 @@ class ImmediatesPrinter {
out_ << " " << imm.index; // -- out_ << " " << imm.index; // --
} }
void Wtf8Policy(Wtf8PolicyImmediate<validate>& imm) {
out_ << (imm.value == kWtf8PolicyReject ? " reject"
: imm.value == kWtf8PolicyAccept ? " accept"
: imm.value == kWtf8PolicyReplace ? " replace"
: " unknown-policy");
}
void TagIndex(TagIndexImmediate<validate>& imm) { void TagIndex(TagIndexImmediate<validate>& imm) {
out_ << " "; out_ << " ";
names()->PrintTagName(out_, imm.index); names()->PrintTagName(out_, imm.index);

View File

@ -726,20 +726,28 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(BrOnNonArray, 0xfb67, _, "br_on_non_array") \ V(BrOnNonArray, 0xfb67, _, "br_on_non_array") \
V(ExternInternalize, 0xfb70, _, "extern.internalize") \ V(ExternInternalize, 0xfb70, _, "extern.internalize") \
V(ExternExternalize, 0xfb71, _, "extern.externalize") \ V(ExternExternalize, 0xfb71, _, "extern.externalize") \
V(StringNewWtf8, 0xfb80, _, "string.new_wtf8") \ V(StringNewUtf8, 0xfb80, _, "string.new_utf8") \
V(StringNewWtf16, 0xfb81, _, "string.new_wtf16") \ V(StringNewWtf16, 0xfb81, _, "string.new_wtf16") \
V(StringConst, 0xfb82, _, "string.const") \ V(StringConst, 0xfb82, _, "string.const") \
V(StringMeasureUtf8, 0xfb83, _, "string.measure_utf8") \
V(StringMeasureWtf8, 0xfb84, _, "string.measure_wtf8") \ V(StringMeasureWtf8, 0xfb84, _, "string.measure_wtf8") \
V(StringMeasureWtf16, 0xfb85, _, "string.measure_wtf16") \ V(StringMeasureWtf16, 0xfb85, _, "string.measure_wtf16") \
V(StringEncodeWtf8, 0xfb86, _, "string.encode_wtf8") \ V(StringEncodeUtf8, 0xfb86, _, "string.encode_utf8") \
V(StringEncodeWtf16, 0xfb87, _, "string.encode_wtf16") \ V(StringEncodeWtf16, 0xfb87, _, "string.encode_wtf16") \
V(StringConcat, 0xfb88, _, "string.concat") \ V(StringConcat, 0xfb88, _, "string.concat") \
V(StringEq, 0xfb89, _, "string.eq") \ V(StringEq, 0xfb89, _, "string.eq") \
V(StringIsUSVSequence, 0xfb8a, _, "string.is_usv_sequence") \ V(StringIsUSVSequence, 0xfb8a, _, "string.is_usv_sequence") \
V(StringNewLossyUtf8, 0xfb8b, _, "string.new_lossy_utf8") \
V(StringNewWtf8, 0xfb8c, _, "string.new_wtf8") \
V(StringEncodeLossyUtf8, 0xfb8d, _, "string.encode_lossy_utf8") \
V(StringEncodeWtf8, 0xfb8e, _, "string.encode_wtf8") \
V(StringAsWtf8, 0xfb90, _, "string.as_wtf8") \ V(StringAsWtf8, 0xfb90, _, "string.as_wtf8") \
V(StringViewWtf8Advance, 0xfb91, _, "stringview_wtf8.advance") \ V(StringViewWtf8Advance, 0xfb91, _, "stringview_wtf8.advance") \
V(StringViewWtf8Encode, 0xfb92, _, "stringview_wtf8.encode") \ V(StringViewWtf8EncodeUtf8, 0xfb92, _, "stringview_wtf8.encode_utf8") \
V(StringViewWtf8Slice, 0xfb93, _, "stringview_wtf8.slice") \ V(StringViewWtf8Slice, 0xfb93, _, "stringview_wtf8.slice") \
V(StringViewWtf8EncodeLossyUtf8, 0xfb94, _, \
"stringview_wtf8.encode_lossy_utf8") \
V(StringViewWtf8EncodeWtf8, 0xfb95, _, "stringview_wtf8.encode_wtf8") \
V(StringAsWtf16, 0xfb98, _, "string.as_wtf16") \ V(StringAsWtf16, 0xfb98, _, "string.as_wtf16") \
V(StringViewWtf16Length, 0xfb99, _, "stringview_wtf16.length") \ V(StringViewWtf16Length, 0xfb99, _, "stringview_wtf16.length") \
V(StringViewWtf16GetCodeUnit, 0xfb9a, _, "stringview_wtf16.get_codeunit") \ V(StringViewWtf16GetCodeUnit, 0xfb9a, _, "stringview_wtf16.get_codeunit") \
@ -750,10 +758,14 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(StringViewIterAdvance, 0xfba2, _, "stringview_iter.advance") \ V(StringViewIterAdvance, 0xfba2, _, "stringview_iter.advance") \
V(StringViewIterRewind, 0xfba3, _, "stringview_iter.rewind") \ V(StringViewIterRewind, 0xfba3, _, "stringview_iter.rewind") \
V(StringViewIterSlice, 0xfba4, _, "stringview_iter.slice") \ V(StringViewIterSlice, 0xfba4, _, "stringview_iter.slice") \
V(StringNewWtf8Array, 0xfbb0, _, "string.new_wtf8_array") \ V(StringNewUtf8Array, 0xfbb0, _, "string.new_utf8_array") \
V(StringNewWtf16Array, 0xfbb1, _, "string.new_wtf16_array") \ V(StringNewWtf16Array, 0xfbb1, _, "string.new_wtf16_array") \
V(StringEncodeWtf8Array, 0xfbb2, _, "string.encode_wtf8_array") \ V(StringEncodeUtf8Array, 0xfbb2, _, "string.encode_utf8_array") \
V(StringEncodeWtf16Array, 0xfbb3, _, "string.encode_wtf16_array") V(StringEncodeWtf16Array, 0xfbb3, _, "string.encode_wtf16_array") \
V(StringNewLossyUtf8Array, 0xfbb4, _, "string.new_lossy_utf8_array") \
V(StringNewWtf8Array, 0xfbb5, _, "string.new_wtf8_array") \
V(StringEncodeLossyUtf8Array, 0xfbb6, _, "string.encode_lossy_utf8_array") \
V(StringEncodeWtf8Array, 0xfbb7, _, "string.encode_wtf8_array")
// All opcodes. // All opcodes.
#define FOREACH_OPCODE(V) \ #define FOREACH_OPCODE(V) \

View File

@ -114,15 +114,16 @@ function makeWtf8TestDataSegment() {
kGCPrefix, kExprArrayNewData, i8_array, data_index kGCPrefix, kExprArrayNewData, i8_array, data_index
]).index; ]).index;
for (let [policy, name] of [[kWtf8PolicyAccept, "new_wtf8"], for (let [instr, name] of
[kWtf8PolicyReject, "new_utf8"], [[kExprStringNewWtf8Array, "new_wtf8"],
[kWtf8PolicyReplace, "new_utf8_sloppy"]]) { [kExprStringNewUtf8Array, "new_utf8"],
[kExprStringNewLossyUtf8Array, "new_utf8_sloppy"]]) {
builder.addFunction(name, kSig_w_ii) builder.addFunction(name, kSig_w_ii)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprCallFunction, make_i8_array, kExprCallFunction, make_i8_array,
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8Array), policy ...GCInstr(instr)
]); ]);
} }
@ -133,7 +134,7 @@ function makeWtf8TestDataSegment() {
...wasmI32Const("ascii".length), ...wasmI32Const("ascii".length),
kGCPrefix, kExprArrayNewData, i8_array, ascii_data_index, kGCPrefix, kExprArrayNewData, i8_array, ascii_data_index,
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8Array)
]); ]);
let instance = builder.instantiate(); let instance = builder.instantiate();
@ -268,7 +269,9 @@ function makeWtf16TestDataSegment() {
let kSig_w_wii = let kSig_w_wii =
makeSig([kWasmStringRef, kWasmI32, kWasmI32], makeSig([kWasmStringRef, kWasmI32, kWasmI32],
[kWasmStringRef]); [kWasmStringRef]);
for (let [policy, name] of ["utf8", "wtf8", "replace"].entries()) { for (let [instr, name] of [[kExprStringEncodeUtf8Array, "utf8"],
[kExprStringEncodeWtf8Array, "wtf8"],
[kExprStringEncodeLossyUtf8Array, "replace"]]) {
// Allocate an array that's exactly the expected size, and encode // Allocate an array that's exactly the expected size, and encode
// into it. Then decode it. // into it. Then decode it.
// (str, length, offset=0) -> str // (str, length, offset=0) -> str
@ -286,14 +289,14 @@ function makeWtf16TestDataSegment() {
kExprLocalGet, 0, kExprLocalGet, 0,
kExprLocalGet, 3, kExprLocalGet, 3,
kExprLocalGet, 2, kExprLocalGet, 2,
...GCInstr(kExprStringEncodeWtf8Array), policy, ...GCInstr(instr),
kExprLocalSet, 4, kExprLocalSet, 4,
// Read buffer. // Read buffer.
kExprLocalGet, 3, kExprLocalGet, 3,
kExprLocalGet, 2, kExprLocalGet, 2,
kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add, kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyAccept, ...GCInstr(kExprStringNewWtf8Array)
]); ]);
} }
@ -303,17 +306,17 @@ function makeWtf16TestDataSegment() {
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array, kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringEncodeWtf8Array), 0, ...GCInstr(kExprStringEncodeWtf8Array)
]); ]);
builder.addFunction("encode_null_array", kSig_i_v) builder.addFunction("encode_null_array", kSig_i_v)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array, kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array,
kExprI32Const, 0, kExprI32Const, 0, kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyAccept, ...GCInstr(kExprStringNewWtf8Array),
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringEncodeWtf8Array), kWtf8PolicyAccept, ...GCInstr(kExprStringEncodeWtf8Array)
]); ]);
let instance = builder.instantiate(); let instance = builder.instantiate();

View File

@ -162,21 +162,21 @@ function makeWtf8TestDataSegment() {
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyReject ...GCInstr(kExprStringNewUtf8), 0
]); ]);
builder.addFunction("string_new_wtf8", kSig_w_ii) builder.addFunction("string_new_wtf8", kSig_w_ii)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8), 0
]); ]);
builder.addFunction("string_new_utf8_sloppy", kSig_w_ii) builder.addFunction("string_new_utf8_sloppy", kSig_w_ii)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyReplace ...GCInstr(kExprStringNewLossyUtf8), 0
]); ]);
let instance = builder.instantiate(); let instance = builder.instantiate();
@ -282,50 +282,34 @@ function makeWtf16TestDataSegment() {
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), 0 ...GCInstr(kExprStringMeasureUtf8)
]); ]);
builder.addFunction("string_measure_wtf8", kSig_i_w) builder.addFunction("string_measure_wtf8", kSig_i_w)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), 1 ...GCInstr(kExprStringMeasureWtf8)
]);
builder.addFunction("string_measure_wtf8_replace", kSig_i_w)
.exportFunc()
.addBody([
kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), 2
]); ]);
builder.addFunction("string_measure_utf8_null", kSig_i_v) builder.addFunction("string_measure_utf8_null", kSig_i_v)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
...GCInstr(kExprStringMeasureWtf8), 0 ...GCInstr(kExprStringMeasureUtf8)
]); ]);
builder.addFunction("string_measure_wtf8_null", kSig_i_v) builder.addFunction("string_measure_wtf8_null", kSig_i_v)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
...GCInstr(kExprStringMeasureWtf8), 1 ...GCInstr(kExprStringMeasureWtf8)
]);
builder.addFunction("string_measure_wtf8_replace_null", kSig_i_v)
.exportFunc()
.addBody([
kExprRefNull, kStringRefCode,
...GCInstr(kExprStringMeasureWtf8), 2
]); ]);
let instance = builder.instantiate(); let instance = builder.instantiate();
for (let str of interestingStrings) { for (let str of interestingStrings) {
let wtf8 = encodeWtf8(str); let wtf8 = encodeWtf8(str);
assertEquals(wtf8.length, instance.exports.string_measure_wtf8(str)); assertEquals(wtf8.length, instance.exports.string_measure_wtf8(str));
assertEquals(wtf8.length,
instance.exports.string_measure_wtf8_replace(str));
if (HasIsolatedSurrogate(str)) { if (HasIsolatedSurrogate(str)) {
assertEquals(-1, instance.exports.string_measure_utf8(str)); assertEquals(-1, instance.exports.string_measure_utf8(str));
} else { } else {
@ -337,8 +321,6 @@ function makeWtf16TestDataSegment() {
WebAssembly.RuntimeError, "dereferencing a null pointer"); WebAssembly.RuntimeError, "dereferencing a null pointer");
assertThrows(() => instance.exports.string_measure_wtf8_null(), assertThrows(() => instance.exports.string_measure_wtf8_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer"); WebAssembly.RuntimeError, "dereferencing a null pointer");
assertThrows(() => instance.exports.string_measure_wtf8_replace_null(),
WebAssembly.RuntimeError, "dereferencing a null pointer");
})(); })();
(function TestStringMeasureWtf16() { (function TestStringMeasureWtf16() {
@ -372,13 +354,15 @@ function makeWtf16TestDataSegment() {
builder.addMemory(1, undefined, true /* exported */, false); builder.addMemory(1, undefined, true /* exported */, false);
for (let [policy, name] of ["utf8", "wtf8", "replace"].entries()) { for (let [instr, name] of [[kExprStringEncodeUtf8, "utf8"],
[kExprStringEncodeWtf8, "wtf8"],
[kExprStringEncodeLossyUtf8, "replace"]]) {
builder.addFunction("encode_" + name, kSig_i_wi) builder.addFunction("encode_" + name, kSig_i_wi)
.exportFunc() .exportFunc()
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 0,
kExprLocalGet, 1, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, policy, ...GCInstr(instr), 0,
]); ]);
} }
@ -860,9 +844,10 @@ function makeWtf16TestDataSegment() {
...GCInstr(kExprStringViewWtf8Advance) ...GCInstr(kExprStringViewWtf8Advance)
]); ]);
for (let [name, policy] of Object.entries({utf8: kWtf8PolicyReject, for (let [instr, name] of
wtf8: kWtf8PolicyAccept, [[kExprStringViewWtf8EncodeUtf8, "utf8"],
replace: kWtf8PolicyReplace})) { [kExprStringViewWtf8EncodeWtf8, "wtf8"],
[kExprStringViewWtf8EncodeLossyUtf8, "replace"]]) {
builder.addFunction(`encode_${name}`, kSig_ii_wiii) builder.addFunction(`encode_${name}`, kSig_ii_wiii)
.exportFunc() .exportFunc()
.addBody([ .addBody([
@ -871,7 +856,7 @@ function makeWtf16TestDataSegment() {
kExprLocalGet, 1, kExprLocalGet, 1,
kExprLocalGet, 2, kExprLocalGet, 2,
kExprLocalGet, 3, kExprLocalGet, 3,
...GCInstr(kExprStringViewWtf8Encode), 0, policy ...GCInstr(instr), 0
]); ]);
} }
builder.addFunction("encode_null", kSig_v_v) builder.addFunction("encode_null", kSig_v_v)
@ -881,7 +866,7 @@ function makeWtf16TestDataSegment() {
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringViewWtf8Encode), 0, kWtf8PolicyAccept, ...GCInstr(kExprStringViewWtf8EncodeWtf8), 0,
kExprDrop, kExprDrop,
kExprDrop kExprDrop
]); ]);

View File

@ -72,22 +72,20 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
builder.addMemory(0, undefined, false, false); builder.addMemory(0, undefined, false, false);
builder.addFunction("string.new_wtf8/reject", kSig_w_ii) builder.addFunction("string.new_utf8", kSig_w_ii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyReject ...GCInstr(kExprStringNewUtf8), 0
]); ]);
builder.addFunction("string.new_lossy_utf8", kSig_w_ii)
builder.addFunction("string.new_wtf8/accept", kSig_w_ii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyAccept ...GCInstr(kExprStringNewLossyUtf8), 0
]); ]);
builder.addFunction("string.new_wtf8", kSig_w_ii)
builder.addFunction("string.new_wtf8/replace", kSig_w_ii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyReplace ...GCInstr(kExprStringNewWtf8), 0
]); ]);
builder.addFunction("string.new_wtf16", kSig_w_ii) builder.addFunction("string.new_wtf16", kSig_w_ii)
@ -102,22 +100,15 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
...GCInstr(kExprStringConst), 0 ...GCInstr(kExprStringConst), 0
]); ]);
builder.addFunction("string.measure_wtf8/utf-8", kSig_i_w) builder.addFunction("string.measure_utf8", kSig_i_w)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), kWtf8PolicyReject ...GCInstr(kExprStringMeasureUtf8)
]); ]);
builder.addFunction("string.measure_wtf8", kSig_i_w)
builder.addFunction("string.measure_wtf8/wtf-8", kSig_i_w)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), kWtf8PolicyAccept ...GCInstr(kExprStringMeasureWtf8)
]);
builder.addFunction("string.measure_wtf8/replace", kSig_i_w)
.addBody([
kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), kWtf8PolicyReplace
]); ]);
builder.addFunction("string.measure_wtf16", kSig_i_w) builder.addFunction("string.measure_wtf16", kSig_i_w)
@ -126,20 +117,20 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
...GCInstr(kExprStringMeasureWtf16) ...GCInstr(kExprStringMeasureWtf16)
]); ]);
builder.addFunction("string.encode_wtf8/utf-8", kSig_i_wi) builder.addFunction("string.encode_utf8", kSig_i_wi)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, kWtf8PolicyAccept ...GCInstr(kExprStringEncodeUtf8), 0
]); ]);
builder.addFunction("string.encode_wtf8/wtf-8", kSig_i_wi) builder.addFunction("string.encode_lossy_utf8", kSig_i_wi)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, kWtf8PolicyReject ...GCInstr(kExprStringEncodeLossyUtf8), 0
]); ]);
builder.addFunction("string.encode_wtf8/replace", kSig_i_wi) builder.addFunction("string.encode_wtf8", kSig_i_wi)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, kWtf8PolicyReplace ...GCInstr(kExprStringEncodeWtf8), 0
]); ]);
builder.addFunction("string.encode_wtf16", kSig_i_wi) builder.addFunction("string.encode_wtf16", kSig_i_wi)
@ -172,22 +163,20 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
...GCInstr(kExprStringViewWtf8Advance) ...GCInstr(kExprStringViewWtf8Advance)
]); ]);
builder.addFunction("stringview_wtf8.encode/utf-8", kSig_ii_xiii) builder.addFunction("stringview_wtf8.encode_utf8", kSig_ii_xiii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3, kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3,
...GCInstr(kExprStringViewWtf8Encode), 0, 0 ...GCInstr(kExprStringViewWtf8EncodeUtf8), 0
]); ]);
builder.addFunction("stringview_wtf8.encode_lossy_utf8", kSig_ii_xiii)
builder.addFunction("stringview_wtf8.encode/wtf-8", kSig_ii_xiii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3, kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3,
...GCInstr(kExprStringViewWtf8Encode), 0, 1 ...GCInstr(kExprStringViewWtf8EncodeLossyUtf8), 0
]); ]);
builder.addFunction("stringview_wtf8.encode_wtf8", kSig_ii_xiii)
builder.addFunction("stringview_wtf8.encode/replace", kSig_ii_xiii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3, kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 2, kExprLocalGet, 3,
...GCInstr(kExprStringViewWtf8Encode), 0, 2 ...GCInstr(kExprStringViewWtf8EncodeWtf8), 0
]); ]);
builder.addFunction("stringview_wtf8.slice", kSig_w_xii) builder.addFunction("stringview_wtf8.slice", kSig_w_xii)
@ -259,28 +248,26 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
let i8_array = builder.addArray(kWasmI8, true); let i8_array = builder.addArray(kWasmI8, true);
let i16_array = builder.addArray(kWasmI16, true); let i16_array = builder.addArray(kWasmI16, true);
builder.addFunction("string.new_wtf8_array/accept", kSig_w_v) builder.addFunction("string.new_utf8_array", kSig_w_v)
.addBody([ .addBody([
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8Array)
]); ]);
builder.addFunction("string.new_lossy_utf8_array", kSig_w_v)
builder.addFunction("string.new_wtf8_array/reject", kSig_w_v)
.addBody([ .addBody([
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyReject ...GCInstr(kExprStringNewLossyUtf8Array)
]); ]);
builder.addFunction("string.new_wtf8_array", kSig_w_v)
builder.addFunction("string.new_wtf8_array/replace", kSig_w_v)
.addBody([ .addBody([
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyReplace ...GCInstr(kExprStringNewWtf8Array)
]); ]);
builder.addFunction("string.new_wtf16_array", kSig_w_v) builder.addFunction("string.new_wtf16_array", kSig_w_v)
@ -291,28 +278,26 @@ let kSig_w_zi = makeSig([kWasmStringViewIter, kWasmI32],
...GCInstr(kExprStringNewWtf16Array) ...GCInstr(kExprStringNewWtf16Array)
]); ]);
builder.addFunction("string.encode_wtf8_array/accept", kSig_i_v) builder.addFunction("string.encode_utf8_array", kSig_i_v)
.addBody([ .addBody([
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringEncodeWtf8Array), kWtf8PolicyAccept ...GCInstr(kExprStringEncodeUtf8Array)
]); ]);
builder.addFunction("string.encode_lossy_utf8_array", kSig_i_v)
builder.addFunction("string.encode_wtf8_array/reject", kSig_i_v)
.addBody([ .addBody([
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringEncodeWtf8Array), kWtf8PolicyReject ...GCInstr(kExprStringEncodeLossyUtf8Array)
]); ]);
builder.addFunction("string.encode_wtf8_array", kSig_i_v)
builder.addFunction("string.encode_wtf8_array/replace", kSig_i_v)
.addBody([ .addBody([
kExprRefNull, kStringRefCode, kExprRefNull, kStringRefCode,
kExprRefNull, i8_array, kExprRefNull, i8_array,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringEncodeWtf8Array), kWtf8PolicyReplace ...GCInstr(kExprStringEncodeWtf8Array)
]); ]);
builder.addFunction("string.encode_wtf16_array", kSig_i_v) builder.addFunction("string.encode_wtf16_array", kSig_i_v)
@ -340,7 +325,7 @@ assertInvalid(
builder.addFunction("string.new_wtf8/no-mem", kSig_w_ii) builder.addFunction("string.new_wtf8/no-mem", kSig_w_ii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 0, kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8), 0
]); ]);
}, },
/memory instruction with no memory/); /memory instruction with no memory/);
@ -351,7 +336,7 @@ assertInvalid(
builder.addFunction("string.new_wtf8/bad-mem", kSig_w_ii) builder.addFunction("string.new_wtf8/bad-mem", kSig_w_ii)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringNewWtf8), 1, kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8), 1
]); ]);
}, },
/expected memory index 0, found 1/); /expected memory index 0, found 1/);
@ -361,7 +346,7 @@ assertInvalid(
builder.addFunction("string.encode_wtf8/no-mem", kSig_i_wi) builder.addFunction("string.encode_wtf8/no-mem", kSig_i_wi)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, kWtf8PolicyAccept ...GCInstr(kExprStringEncodeWtf8), 0
]); ]);
}, },
/memory instruction with no memory/); /memory instruction with no memory/);
@ -372,45 +357,11 @@ assertInvalid(
builder.addFunction("string.encode_wtf8/bad-mem", kSig_i_wi) builder.addFunction("string.encode_wtf8/bad-mem", kSig_i_wi)
.addBody([ .addBody([
kExprLocalGet, 0, kExprLocalGet, 1, kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 1, kWtf8PolicyAccept ...GCInstr(kExprStringEncodeWtf8), 1
]); ]);
}, },
/expected memory index 0, found 1/); /expected memory index 0, found 1/);
assertInvalid(
builder => {
builder.addMemory(0, undefined, false, false);
builder.addFunction("string.encode_wtf8/bad-policy", kSig_i_wi)
.addBody([
kExprLocalGet, 0, kExprLocalGet, 1,
...GCInstr(kExprStringEncodeWtf8), 0, 3
]);
},
/expected wtf8 policy 0, 1, or 2, but found 3/);
assertInvalid(
builder => {
builder.addFunction("string.measure_wtf8/bad-policy", kSig_i_w)
.addBody([
kExprLocalGet, 0,
...GCInstr(kExprStringMeasureWtf8), 3
]);
},
/expected wtf8 policy 0, 1, or 2, but found 3/);
assertInvalid(
builder => {
let i8_array = builder.addArray(kWasmI8, true);
builder.addFunction("string.new_wtf8_array/bad-policy", kSig_w_v)
.addBody([
kExprRefNull, i8_array,
kExprI32Const, 0,
kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), 3
]);
},
/expected wtf8 policy 0, 1, or 2, but found 3/);
assertInvalid( assertInvalid(
builder => { builder => {
let i16_array = builder.addArray(kWasmI16, true); let i16_array = builder.addArray(kWasmI16, true);
@ -419,7 +370,7 @@ assertInvalid(
kExprRefNull, i16_array, kExprRefNull, i16_array,
kExprI32Const, 0, kExprI32Const, 0,
kExprI32Const, 0, kExprI32Const, 0,
...GCInstr(kExprStringNewWtf8Array), kWtf8PolicyAccept ...GCInstr(kExprStringNewWtf8Array)
]); ]);
}, },
/string.new_wtf8_array\[0\] expected array of i8, found ref.null of type \(ref null 0\)/); /string.new_wtf8_array\[0\] expected array of i8, found ref.null of type \(ref null 0\)/);
@ -449,7 +400,7 @@ assertInvalid(
kExprLocalGet, 0, kExprLocalGet, 0,
kExprLocalGet, 1, kExprLocalGet, 1,
kExprLocalGet, 2, kExprLocalGet, 2,
...GCInstr(kExprStringEncodeWtf8Array), kWtf8PolicyAccept, ...GCInstr(kExprStringEncodeWtf8Array)
]); ]);
}, },
/string.encode_wtf8_array\[1\] expected array of mutable i8, found local.get of type \(ref 0\)/); /string.encode_wtf8_array\[1\] expected array of mutable i8, found local.get of type \(ref 0\)/);

View File

@ -526,20 +526,27 @@ let kExprBrOnNonI31 = 0x65;
let kExprBrOnNonArray = 0x67; let kExprBrOnNonArray = 0x67;
let kExprExternInternalize = 0x70; let kExprExternInternalize = 0x70;
let kExprExternExternalize = 0x71; let kExprExternExternalize = 0x71;
let kExprStringNewWtf8 = 0x80; let kExprStringNewUtf8 = 0x80;
let kExprStringNewWtf16 = 0x81; let kExprStringNewWtf16 = 0x81;
let kExprStringConst = 0x82; let kExprStringConst = 0x82;
let kExprStringMeasureUtf8 = 0x83;
let kExprStringMeasureWtf8 = 0x84; let kExprStringMeasureWtf8 = 0x84;
let kExprStringMeasureWtf16 = 0x85; let kExprStringMeasureWtf16 = 0x85;
let kExprStringEncodeWtf8 = 0x86; let kExprStringEncodeUtf8 = 0x86;
let kExprStringEncodeWtf16 = 0x87; let kExprStringEncodeWtf16 = 0x87;
let kExprStringConcat = 0x88; let kExprStringConcat = 0x88;
let kExprStringEq = 0x89; let kExprStringEq = 0x89;
let kExprStringIsUsvSequence = 0x8a; let kExprStringIsUsvSequence = 0x8a;
let kExprStringNewLossyUtf8 = 0x8b;
let kExprStringNewWtf8 = 0x8c;
let kExprStringEncodeLossyUtf8 = 0x8d;
let kExprStringEncodeWtf8 = 0x8e;
let kExprStringAsWtf8 = 0x90; let kExprStringAsWtf8 = 0x90;
let kExprStringViewWtf8Advance = 0x91; let kExprStringViewWtf8Advance = 0x91;
let kExprStringViewWtf8Encode = 0x92; let kExprStringViewWtf8EncodeUtf8 = 0x92;
let kExprStringViewWtf8Slice = 0x93; let kExprStringViewWtf8Slice = 0x93;
let kExprStringViewWtf8EncodeLossyUtf8 = 0x94;
let kExprStringViewWtf8EncodeWtf8 = 0x95;
let kExprStringAsWtf16 = 0x98; let kExprStringAsWtf16 = 0x98;
let kExprStringViewWtf16Length = 0x99; let kExprStringViewWtf16Length = 0x99;
let kExprStringViewWtf16GetCodeunit = 0x9a; let kExprStringViewWtf16GetCodeunit = 0x9a;
@ -550,10 +557,14 @@ let kExprStringViewIterNext = 0xa1
let kExprStringViewIterAdvance = 0xa2; let kExprStringViewIterAdvance = 0xa2;
let kExprStringViewIterRewind = 0xa3 let kExprStringViewIterRewind = 0xa3
let kExprStringViewIterSlice = 0xa4; let kExprStringViewIterSlice = 0xa4;
let kExprStringNewWtf8Array = 0xb0; let kExprStringNewUtf8Array = 0xb0;
let kExprStringNewWtf16Array = 0xb1; let kExprStringNewWtf16Array = 0xb1;
let kExprStringEncodeWtf8Array = 0xb2; let kExprStringEncodeUtf8Array = 0xb2;
let kExprStringEncodeWtf16Array = 0xb3; let kExprStringEncodeWtf16Array = 0xb3;
let kExprStringNewLossyUtf8Array = 0xb4;
let kExprStringNewWtf8Array = 0xb5;
let kExprStringEncodeLossyUtf8Array = 0xb6;
let kExprStringEncodeWtf8Array = 0xb7;
// Numeric opcodes. // Numeric opcodes.
let kExprI32SConvertSatF32 = 0x00; let kExprI32SConvertSatF32 = 0x00;
@ -883,11 +894,6 @@ let kExprI32x4TruncSatF64x2UZero = 0xfd;
let kExprF64x2ConvertLowI32x4S = 0xfe; let kExprF64x2ConvertLowI32x4S = 0xfe;
let kExprF64x2ConvertLowI32x4U = 0xff; let kExprF64x2ConvertLowI32x4U = 0xff;
// WTF-8 parsing policies.
let kWtf8PolicyReject = 0;
let kWtf8PolicyAccept = 1;
let kWtf8PolicyReplace = 2;
// Compilation hint constants. // Compilation hint constants.
let kCompilationHintStrategyDefault = 0x00; let kCompilationHintStrategyDefault = 0x00;
let kCompilationHintStrategyLazy = 0x01; let kCompilationHintStrategyLazy = 0x01;

View File

@ -4864,8 +4864,8 @@ TEST_F(WasmOpcodeLengthTest, GCOpcodes) {
ExpectLength(3, 0xfb, 0x07, 0x42); ExpectLength(3, 0xfb, 0x07, 0x42);
ExpectLength(4, 0xfb, 0x07, 0x80, 0x00); ExpectLength(4, 0xfb, 0x07, 0x80, 0x00);
// string.new_wtf8 with $mem=0, $policy=0. // string.new_utf8 with $mem=0.
ExpectLength(5, 0xfb, 0x80, 0x01, 0x00, 0x00); ExpectLength(4, 0xfb, 0x80, 0x01, 0x00);
// string.as_wtf8. // string.as_wtf8.
ExpectLength(3, 0xfb, 0x90, 0x01); ExpectLength(3, 0xfb, 0x90, 0x01);