[stringrefs] Faster string.new_wtf16_array

By moving it from a runtime function to Torque. Bug: v8:12868 Change-Id: Iede756af683d97c720040144a4ec02872da4c453 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4020425 Commit-Queue: Jakob Kummerow <jkummerow@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/main@{#84266}
2022-11-15 11:58:28 +01:00 · 2022-11-15 11:58:28 +01:00 · cd5322de13
commit cd5322de13
parent cf3f222543
5 changed files with 90 additions and 54 deletions
--- a/src/builtins/wasm.tq
+++ b/src/builtins/wasm.tq
@ -44,7 +44,6 @@ extern runtime WasmStringNewWtf8Array(
    Context, Smi, WasmArray, Smi, Smi): String;
 extern runtime WasmStringNewWtf16(
    Context, WasmInstanceObject, Smi, Number, Number): String;
-extern runtime WasmStringNewWtf16Array(Context, WasmArray, Smi, Smi): String;
 extern runtime WasmStringConst(Context, WasmInstanceObject, Smi): String;
 extern runtime WasmStringMeasureUtf8(Context, String): Number;
 extern runtime WasmStringMeasureWtf8(Context, String): Number;
@ -831,20 +830,107 @@ builtin WasmStringNewWtf16(
      LoadContextFromInstance(instance), instance, SmiFromUint32(memory),
      WasmUint32ToNumber(offset), WasmUint32ToNumber(size));
 }
+
+struct TwoByteToOneByteIterator {
+  macro Next(): char8 labels NoMore {
+    if (this.offset == this.end_offset) goto NoMore;
+    const raw: char16 = *torque_internal::unsafe::NewReference<char16>(
+        this.array, this.offset);
+    const result: char8 = %RawDownCast<char8>(raw & 0xFF);
+    this.offset += 2;
+    return result;
+  }
+
+  array: WasmArray;
+  offset: intptr;
+  end_offset: intptr;
+}
+
 builtin WasmStringNewWtf16Array(
    array: WasmArray, start: uint32, end: uint32): String {
-  const context = LoadContextFromFrame();
  try {
    if (array.length < end) goto OffsetOutOfRange;
    if (end < start) goto OffsetOutOfRange;
-    tail runtime::WasmStringNewWtf16Array(
-        context, array, SmiFromUint32(start), SmiFromUint32(end));
+    const length: uint32 = end - start;
+    if (length == 0) return kEmptyString;
+    if (length > kStringMaxLength) goto TooLong;
+    const intptrLength = Convert<intptr>(length);
+    const arrayContent = torque_internal::unsafe::NewConstSlice<char16>(
+        array, kWasmArrayHeaderSize, Convert<intptr>(array.length));
+    const substring =
+        Subslice(arrayContent, Convert<intptr>(start), intptrLength)
+        otherwise goto OffsetOutOfRange;
+
+    // Ideas for additional future improvements:
+    // (1) We could add a fast path for very short strings, e.g. <= 8 chars,
+    //     and just allocate two-byte strings for them. That would save time
+    //     here, and would only waste a couple of bytes at most. A concern is
+    //     that such strings couldn't take one-byte fast paths later on, e.g.
+    //     in toLower/toUpper case conversions.
+    // (2) We could load more than one array element at a time, e.g. using
+    //     intptr-wide loads, or possibly even wider SIMD instructions. We'd
+    //     have to make sure that non-aligned start offsets are handled,
+    //     and the implementation would become more platform-specific.
+    // (3) We could shift the problem around by allocating two-byte strings
+    //     here and checking whether they're one-byte-compatible later, e.g.
+    //     when promoting them from new to old space. Drawback: rewriting
+    //     strings to different maps isn't great for optimized code that's
+    //     based on collected type feedback, or that wants to elide duplicate
+    //     map checks within the function.
+    // (4) We could allocate space for a two-byte string, then optimistically
+    //     start writing one-byte characters into it, and then either restart
+    //     in two-byte mode if needed, or return the over-allocated bytes to
+    //     the allocator in the end.
+    // (5) We could standardize a `string.new_ascii_array` instruction, which
+    //     could safely produce one-byte strings without checking characters.
+    //     See https://github.com/WebAssembly/stringref/issues/53.
+
+    try {
+      // To reduce the amount of branching, check 8 code units at a time. The
+      // tradeoff for choosing 8 is that we want to check for early termination
+      // of the loop often (to avoid unnecessary work) but not too often
+      // (because each check has a cost).
+      let i: intptr = 0;
+      const eightElementLoopEnd = intptrLength - 8;
+      while (i <= eightElementLoopEnd) {
+        const bits = Convert<uint32>(*substring.UncheckedAtIndex(i)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 1)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 2)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 3)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 4)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 5)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 6)) |
+            Convert<uint32>(*substring.UncheckedAtIndex(i + 7));
+        if (bits > 0xFF) goto TwoByte;
+        i += 8;
+      }
+      let bits: uint32 = 0;
+      while (i < intptrLength) {
+        bits |= Convert<uint32>(*substring.UncheckedAtIndex(i));
+        i += 1;
+      }
+      if (bits > 0xFF) goto TwoByte;
+    } label TwoByte {
+      return AllocateSeqTwoByteString(length, substring.Iterator());
+    }
+
+    return AllocateNonEmptySeqOneByteString(length, TwoByteToOneByteIterator{
+      array: array,
+      offset: kWasmArrayHeaderSize +
+          torque_internal::TimesSizeOf<char16>(Convert<intptr>(start)),
+      end_offset: kWasmArrayHeaderSize +
+          torque_internal::TimesSizeOf<char16>(Convert<intptr>(end))
+    });
  } label OffsetOutOfRange deferred {
+    const context = LoadContextFromFrame();
    const error = MessageTemplate::kWasmTrapArrayOutOfBounds;
    runtime::ThrowWasmError(context, SmiConstant(error));
    unreachable;
+  } label TooLong deferred {
+    ThrowInvalidStringLength(LoadContextFromFrame());
  }
 }
+
 builtin WasmStringConst(index: uint32): String {
  const instance = LoadInstanceFromFrame();
  tail runtime::WasmStringConst(
--- a/src/heap/factory.cc
+++ b/src/heap/factory.cc
@ -829,39 +829,6 @@ MaybeHandle<String> Factory::NewStringFromUtf8(
                                  allocation);
 }

-namespace {
-struct Wtf16Decoder {
-  int length_;
-  bool is_one_byte_;
-  explicit Wtf16Decoder(const base::Vector<const uint16_t>& data)
-      : length_(data.length()),
-        is_one_byte_(String::IsOneByte(data.begin(), length_)) {}
-  bool is_invalid() const { return false; }
-  bool is_one_byte() const { return is_one_byte_; }
-  int utf16_length() const { return length_; }
-  template <typename Char>
-  void Decode(Char* out, const base::Vector<const uint16_t>& data) {
-    CopyChars(out, data.begin(), length_);
-  }
-};
-}  // namespace
-
-MaybeHandle<String> Factory::NewStringFromUtf16(Handle<WasmArray> array,
-                                                uint32_t start, uint32_t end,
-                                                AllocationType allocation) {
-  DCHECK_EQ(sizeof(uint16_t), array->type()->element_type().value_kind_size());
-  DCHECK_LE(start, end);
-  DCHECK_LE(end, array->length());
-  // {end - start} can never be more than what the Utf8Decoder can handle.
-  static_assert(WasmArray::MaxLength(sizeof(uint16_t)) <= kMaxInt);
-  auto peek_bytes = [&]() -> base::Vector<const uint16_t> {
-    const uint16_t* contents =
-        reinterpret_cast<const uint16_t*>(array->ElementAddress(0));
-    return {contents + start, end - start};
-  };
-  return NewStringFromBytes<Wtf16Decoder>(isolate(), peek_bytes, allocation,
-                                          MessageTemplate::kNone);
-}
 #endif  // V8_ENABLE_WEBASSEMBLY

 MaybeHandle<String> Factory::NewStringFromUtf8SubString(
--- a/src/heap/factory.h
+++ b/src/heap/factory.h
@ -272,10 +272,6 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
      Handle<ByteArray> array, uint32_t start, uint32_t end,
      unibrow::Utf8Variant utf8_variant,
      AllocationType allocation = AllocationType::kYoung);
-
-  V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf16(
-      Handle<WasmArray> array, uint32_t start, uint32_t end,
-      AllocationType allocation = AllocationType::kYoung);
 #endif  // V8_ENABLE_WEBASSEMBLY

  V8_WARN_UNUSED_RESULT MaybeHandle<String> NewStringFromUtf8SubString(
--- a/src/runtime/runtime-wasm.cc
+++ b/src/runtime/runtime-wasm.cc
@ -993,18 +993,6 @@ RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16) {
      {codeunits, size_in_codeunits}));
 }

-RUNTIME_FUNCTION(Runtime_WasmStringNewWtf16Array) {
-  ClearThreadInWasmScope flag_scope(isolate);
-  DCHECK_EQ(3, args.length());
-  HandleScope scope(isolate);
-  Handle<WasmArray> array(WasmArray::cast(args[0]), isolate);
-  uint32_t start = NumberToUint32(args[1]);
-  uint32_t end = NumberToUint32(args[2]);
-
-  RETURN_RESULT_OR_TRAP(
-      isolate->factory()->NewStringFromUtf16(array, start, end));
-}
-
 // Returns the new string if the operation succeeds.  Otherwise traps.
 RUNTIME_FUNCTION(Runtime_WasmStringConst) {
  ClearThreadInWasmScope flag_scope(isolate);
--- a/src/runtime/runtime.h
+++ b/src/runtime/runtime.h
@ -633,7 +633,6 @@ namespace internal {
  F(WasmStringNewWtf8, 5, 1)          \
  F(WasmStringNewWtf8Array, 4, 1)     \
  F(WasmStringNewWtf16, 4, 1)         \
-  F(WasmStringNewWtf16Array, 3, 1)    \
  F(WasmStringConst, 2, 1)            \
  F(WasmStringMeasureUtf8, 1, 1)      \
  F(WasmStringMeasureWtf8, 1, 1)      \