[stringrefs] Faster string.new_wtf16_array

By moving it from a runtime function to Torque.

Bug: v8:12868
Change-Id: Iede756af683d97c720040144a4ec02872da4c453
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4020425
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84266}
This commit is contained in:
Jakob Kummerow 2022-11-15 11:58:28 +01:00 committed by V8 LUCI CQ
parent cf3f222543
commit cd5322de13
5 changed files with 90 additions and 54 deletions

View File

@ -44,7 +44,6 @@ extern runtime WasmStringNewWtf8Array(
Context, Smi, WasmArray, Smi, Smi): String;
extern runtime WasmStringNewWtf16(
Context, WasmInstanceObject, Smi, Number, Number): String;
extern runtime WasmStringNewWtf16Array(Context, WasmArray, Smi, Smi): String;
extern runtime WasmStringConst(Context, WasmInstanceObject, Smi): String;
extern runtime WasmStringMeasureUtf8(Context, String): Number;
extern runtime WasmStringMeasureWtf8(Context, String): Number;
@ -831,20 +830,107 @@ builtin WasmStringNewWtf16(
LoadContextFromInstance(instance), instance, SmiFromUint32(memory),
WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
}
struct TwoByteToOneByteIterator {
macro Next(): char8 labels NoMore {
if (this.offset == this.end_offset) goto NoMore;
const raw: char16 = *torque_internal::unsafe::NewReference<char16>(
this.array, this.offset);
const result: char8 = %RawDownCast<char8>(raw & 0xFF);
this.offset += 2;
return result;
}
array: WasmArray;
offset: intptr;
end_offset: intptr;
}
builtin WasmStringNewWtf16Array(
array: WasmArray, start: uint32, end: uint32): String {
const context = LoadContextFromFrame();
try {
if (array.length < end) goto OffsetOutOfRange;
if (end < start) goto OffsetOutOfRange;
tail runtime::WasmStringNewWtf16Array(
context, array, SmiFromUint32(start), SmiFromUint32(end));
const length: uint32 = end - start;
if (length == 0) return kEmptyString;
if (length > kStringMaxLength) goto TooLong;
const intptrLength = Convert<intptr>(length);
const arrayContent = torque_internal::unsafe::NewConstSlice<char16>(
array, kWasmArrayHeaderSize, Convert<intptr>(array.length));
const substring =
Subslice(arrayContent, Convert<intptr>(start), intptrLength)
otherwise goto OffsetOutOfRange;
// Ideas for additional future improvements:
// (1) We could add a fast path for very short strings, e.g. <= 8 chars,
// and just allocate two-byte strings for them. That would save time
// here, and would only waste a couple of bytes at most. A concern is
// that such strings couldn't take one-byte fast paths later on, e.g.
// in toLower/toUpper case conversions.
// (2) We could load more than one array element at a time, e.g. using
// intptr-wide loads, or possibly even wider SIMD instructions. We'd
// have to make sure that non-aligned start offsets are handled,
// and the implementation would become more platform-specific.
// (3) We could shift the problem around by allocating two-byte strings
// here and checking whether they're one-byte-compatible later, e.g.
// when promoting them from new to old space. Drawback: rewriting
// strings to different maps isn't great for optimized code that's
// based on collected type feedback, or that wants to elide duplicate
// map checks within the function.
// (4) We could allocate space for a two-byte string, then optimistically
// start writing one-byte characters into it, and then either restart
// in two-byte mode if needed, or return the over-allocated bytes to
// the allocator in the end.
// (5) We could standardize a `string.new_ascii_array` instruction, which
// could safely produce one-byte strings without checking characters.
// See https://github.com/WebAssembly/stringref/issues/53.
try {
// To reduce the amount of branching, check 8 code units at a time. The
// tradeoff for choosing 8 is that we want to check for early termination
// of the loop often (to avoid unnecessary work) but not too often
// (because each check has a cost).
let i: intptr = 0;
const eightElementLoopEnd = intptrLength - 8;
while (i <= eightElementLoopEnd) {
const bits = Convert<uint32>(*substring.UncheckedAtIndex(i)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 1)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 2)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 3)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 4)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 5)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 6)) |
Convert<uint32>(*substring.UncheckedAtIndex(i + 7));
if (bits > 0xFF) goto TwoByte;
i += 8;
}
let bits: uint32 = 0;
while (i < intptrLength) {
bits |= Convert<uint32>(*substring.UncheckedAtIndex(i));
i += 1;
}
if (bits > 0xFF) goto TwoByte;
} label TwoByte {
return AllocateSeqTwoByteString(length, substring.Iterator());
}
return AllocateNonEmptySeqOneByteString(length, TwoByteToOneByteIterator{
array: array,
offset: kWasmArrayHeaderSize +
torque_internal::TimesSizeOf<char16>(Convert<intptr>(start)),
end_offset: kWasmArrayHeaderSize +
torque_internal::TimesSizeOf<char16>(Convert<intptr>(end))
});
} label OffsetOutOfRange deferred {
const context = LoadContextFromFrame();
const error = MessageTemplate::kWasmTrapArrayOutOfBounds;
runtime::ThrowWasmError(context, SmiConstant(error));
unreachable;
} label TooLong deferred {
ThrowInvalidStringLength(LoadContextFromFrame());
}
}
builtin WasmStringConst(index: uint32): String {
const instance = LoadInstanceFromFrame();
tail runtime::WasmStringConst(

View File

@ -829,39 +829,6 @@ MaybeHandle<String> Factory::NewStringFromUtf8(
allocation);
}
namespace {
struct Wtf16Decoder {
int length_;
bool is_one_byte_;
explicit Wtf16Decoder(const base::Vector<const uint16_t>& data)
: length_(data.length()),
is_one_byte_(String::IsOneByte(data.begin(), length_)) {}
bool is_invalid() const { return false; }
bool is_one_byte() const { return is_one_byte_; }
int utf16_length() const { return length_; }
template <typename Char>
void Decode(Char* out, const base::Vector<const uint16_t>& data) {
CopyChars(out, data.begin(), length_);
}
};
} // namespace
MaybeHandle<String> Factory::NewStringFromUtf16(Handle<WasmArray> array,
uint32_t start, uint32_t end,
AllocationType allocation) {
DCHECK_EQ(sizeof(uint16_t), array->type()->element_type().value_kind_size());
DCHECK_LE(start, end);
DCHECK_LE(end, array->length());
// {end - start} can never be more than what the Utf8Decoder can handle.
static_assert(WasmArray::MaxLength(sizeof(uint16_t)) <= kMaxInt);
auto peek_bytes = [&]() -> base::Vector<const uint16_t> {
const uint16_t* contents =
reinterpret_cast<const uint16_t*>(array->ElementAddress(0));
return {contents + start, end - start};
};
return NewStringFromBytes<Wtf16Decoder>(isolate(), peek_bytes, allocation,
MessageTemplate::kNone);
}
#endif // V8_ENABLE_WEBASSEMBLY
MaybeHandle<String> Factory::NewStringFromUtf8SubString(

View File

@ -272,10 +272,6 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
Handle<ByteArray> array, uint32_t start, uint32_t end,
unibrow::Utf8Variant utf8_variant,
AllocationType allocation = AllocationType::kYoung);
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf16(
Handle<WasmArray> array, uint32_t start, uint32_t end,
AllocationType allocation = AllocationType::kYoung);
#endif // V8_ENABLE_WEBASSEMBLY
V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8SubString(

View File

@ -993,18 +993,6 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16) {
{codeunits, size_in_codeunits}));
}
RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16Array) {
ClearThreadInWasmScope flag_scope(isolate);
DCHECK_EQ(3, args.length());
HandleScope scope(isolate);
Handle<WasmArray> array(WasmArray::cast(args[0]), isolate);
uint32_t start = NumberToUint32(args[1]);
uint32_t end = NumberToUint32(args[2]);
RETURN_RESULT_OR_TRAP(
isolate->factory()->NewStringFromUtf16(array, start, end));
}
// Returns the new string if the operation succeeds. Otherwise traps.
RUNTIME_FUNCTION(Runtime_WasmStringConst) {
ClearThreadInWasmScope flag_scope(isolate);

View File

@ -633,7 +633,6 @@ namespace internal {
F(WasmStringNewWtf8, 5, 1) \
F(WasmStringNewWtf8Array, 4, 1) \
F(WasmStringNewWtf16, 4, 1) \
F(WasmStringNewWtf16Array, 3, 1) \
F(WasmStringConst, 2, 1) \
F(WasmStringMeasureUtf8, 1, 1) \
F(WasmStringMeasureWtf8, 1, 1) \