// Copyright 2022 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // Flags: --experimental-wasm-stringref --experimental-wasm-gc d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js"); let kSig_w_v = makeSig([], [kWasmStringRef]); let kSig_w_ii = makeSig([kWasmI32, kWasmI32], [kWasmStringRef]); function encodeWtf8(str) { // String iterator coalesces surrogate pairs. let out = []; for (let codepoint of str) { codepoint = codepoint.codePointAt(0); if (codepoint <= 0x7f) { out.push(codepoint); } else if (codepoint <= 0x7ff) { out.push(0xc0 | (codepoint >> 6)); out.push(0x80 | (codepoint & 0x3f)); } else if (codepoint <= 0xffff) { out.push(0xe0 | (codepoint >> 12)); out.push(0x80 | ((codepoint >> 6) & 0x3f)); out.push(0x80 | (codepoint & 0x3f)); } else if (codepoint <= 0x10ffff) { out.push(0xf0 | (codepoint >> 18)); out.push(0x80 | ((codepoint >> 12) & 0x3f)); out.push(0x80 | ((codepoint >> 6) & 0x3f)); out.push(0x80 | (codepoint & 0x3f)); } else { throw new Error("bad codepoint " + codepoint); } } return out; } let interestingStrings = ['', 'ascii', 'latin \xa9 1', 'two \ucccc byte', 'surrogate \ud800\udc000 pair', 'isolated \ud800 leading', 'isolated \udc00 trailing', '\ud800 isolated leading at beginning', '\udc00 isolated trailing at beginning', 'isolated leading at end \ud800', 'isolated trailing at end \udc00', 'swapped surrogate \udc00\ud800 pair']; function IsSurrogate(codepoint) { return 0xD800 <= codepoint && codepoint <= 0xDFFF } function HasIsolatedSurrogate(str) { for (let codepoint of str) { let value = codepoint.codePointAt(0); if (IsSurrogate(value)) return true; } return false; } function ReplaceIsolatedSurrogates(str, replacement='\ufffd') { let replaced = ''; for (let codepoint of str) { replaced += IsSurrogate(codepoint.codePointAt(0)) ? replacement : codepoint; } return replaced; } function makeWtf8TestDataSegment() { let data = [] let valid = {}; let invalid = {}; for (let str of interestingStrings) { let bytes = encodeWtf8(str); valid[str] = { offset: data.length, length: bytes.length }; for (let byte of bytes) { data.push(byte); } } for (let bytes of ['trailing high byte \xa9', 'interstitial high \xa9 byte', 'invalid \xc0 byte', 'invalid three-byte \xed\xd0\x80', 'surrogate \xed\xa0\x80\xed\xb0\x80 pair']) { invalid[bytes] = { offset: data.length, length: bytes.length }; for (let i = 0; i < bytes.length; i++) { data.push(bytes.charCodeAt(i)); } } return { valid, invalid, data: Uint8Array.from(data) }; }; (function TestStringNewWtf8Array() { let builder = new WasmModuleBuilder(); let data = makeWtf8TestDataSegment(); let data_index = builder.addPassiveDataSegment(data.data); let ascii_data_index = builder.addPassiveDataSegment(Uint8Array.from(encodeWtf8("ascii"))); let i8_array = builder.addArray(kWasmI8, true); let make_i8_array = builder.addFunction( "make_i8_array", makeSig([], [wasmRefType(i8_array)])) .addBody([ ...wasmI32Const(0), ...wasmI32Const(data.data.length), kGCPrefix, kExprArrayNewDataStatic, i8_array, data_index ]).index; for (let [policy, name] of [[kWtf8PolicyAccept, "new_wtf8"], [kWtf8PolicyReject, "new_utf8"], [kWtf8PolicyReplace, "new_utf8_sloppy"]]) { builder.addFunction(name, kSig_w_ii) .exportFunc() .addBody([ kExprCallFunction, make_i8_array, kExprLocalGet, 0, kExprLocalGet, 1, kGCPrefix, kExprStringNewWtf8Array, policy ]); } builder.addFunction("bounds_check", kSig_w_ii) .exportFunc() .addBody([ ...wasmI32Const(0), ...wasmI32Const("ascii".length), kGCPrefix, kExprArrayNewDataStatic, i8_array, ascii_data_index, kExprLocalGet, 0, kExprLocalGet, 1, kGCPrefix, kExprStringNewWtf8Array, kWtf8PolicyAccept ]); let instance = builder.instantiate(); for (let [str, {offset, length}] of Object.entries(data.valid)) { let start = offset; let end = offset + length; assertEquals(str, instance.exports.new_wtf8(start, end)); if (HasIsolatedSurrogate(str)) { assertThrows(() => instance.exports.new_utf8(start, end), WebAssembly.RuntimeError, "invalid UTF-8 string"); // Isolated surrogates have the three-byte pattern ED [A0,BF] // [80,BF]. When the sloppy decoder gets to the second byte, it // will reject the sequence, and then retry parsing at the second // byte. Seeing the second byte can't start a sequence, it // replaces the second byte and continues with the next, which // also can't start a sequence. The result is that one isolated // surrogate is replaced by three U+FFFD codepoints. assertEquals(ReplaceIsolatedSurrogates(str, '\ufffd\ufffd\ufffd'), instance.exports.new_utf8_sloppy(start, end)); } else { assertEquals(str, instance.exports.new_utf8(start, end)); assertEquals(str, instance.exports.new_utf8_sloppy(start, end)); } } for (let [str, {offset, length}] of Object.entries(data.invalid)) { let start = offset; let end = offset + length; assertThrows(() => instance.exports.new_wtf8(start, end), WebAssembly.RuntimeError, "invalid WTF-8 string"); assertThrows(() => instance.exports.new_utf8(start, end), WebAssembly.RuntimeError, "invalid UTF-8 string"); } assertEquals("ascii", instance.exports.bounds_check(0, "ascii".length)); assertEquals("", instance.exports.bounds_check("ascii".length, "ascii".length)); assertEquals("i", instance.exports.bounds_check("ascii".length - 1, "ascii".length)); assertThrows(() => instance.exports.bounds_check(0, 100), WebAssembly.RuntimeError, "array element access out of bounds"); assertThrows(() => instance.exports.bounds_check("ascii".length, "ascii".length + 1), WebAssembly.RuntimeError, "array element access out of bounds"); })(); function encodeWtf16LE(str) { // String iterator coalesces surrogate pairs. let out = []; for (let i = 0; i < str.length; i++) { codeunit = str.charCodeAt(i); out.push(codeunit & 0xff) out.push(codeunit >> 8); } return out; } function makeWtf16TestDataSegment() { let data = [] let valid = {}; for (let str of interestingStrings) { valid[str] = { offset: data.length, length: str.length }; for (let byte of encodeWtf16LE(str)) { data.push(byte); } } return { valid, data: Uint8Array.from(data) }; }; (function TestStringNewWtf16Array() { let builder = new WasmModuleBuilder(); let data = makeWtf16TestDataSegment(); let data_index = builder.addPassiveDataSegment(data.data); let ascii_data_index = builder.addPassiveDataSegment(Uint8Array.from(encodeWtf16LE("ascii"))); let i16_array = builder.addArray(kWasmI16, true); let make_i16_array = builder.addFunction( "make_i16_array", makeSig([], [wasmRefType(i16_array)])) .addBody([ ...wasmI32Const(0), ...wasmI32Const(data.data.length / 2), kGCPrefix, kExprArrayNewDataStatic, i16_array, data_index ]).index; builder.addFunction("new_wtf16", kSig_w_ii) .exportFunc() .addBody([ kExprCallFunction, make_i16_array, kExprLocalGet, 0, kExprLocalGet, 1, kGCPrefix, kExprStringNewWtf16Array ]); builder.addFunction("bounds_check", kSig_w_ii) .exportFunc() .addBody([ ...wasmI32Const(0), ...wasmI32Const("ascii".length), kGCPrefix, kExprArrayNewDataStatic, i16_array, ascii_data_index, kExprLocalGet, 0, kExprLocalGet, 1, kGCPrefix, kExprStringNewWtf16Array ]); let instance = builder.instantiate(); for (let [str, {offset, length}] of Object.entries(data.valid)) { let start = offset / 2; let end = start + length; assertEquals(str, instance.exports.new_wtf16(start, end)); } assertEquals("ascii", instance.exports.bounds_check(0, "ascii".length)); assertEquals("", instance.exports.bounds_check("ascii".length, "ascii".length)); assertEquals("i", instance.exports.bounds_check("ascii".length - 1, "ascii".length)); assertThrows(() => instance.exports.bounds_check(0, 100), WebAssembly.RuntimeError, "array element access out of bounds"); assertThrows(() => instance.exports.bounds_check("ascii".length, "ascii".length + 1), WebAssembly.RuntimeError, "array element access out of bounds"); })(); (function TestStringEncodeWtf8Array() { let builder = new WasmModuleBuilder(); let i8_array = builder.addArray(kWasmI8, true); let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], [kWasmStringRef]); for (let [policy, name] of ["utf8", "wtf8", "replace"].entries()) { // Allocate an array that's exactly the expected size, and encode // into it. Then decode it. // (str, length, offset=0) -> str builder.addFunction("encode_" + name, kSig_w_wii) .exportFunc() .addLocals(wasmRefNullType(i8_array), 1) .addLocals(kWasmI32, 1) .addBody([ // Allocate buffer. kExprLocalGet, 1, kGCPrefix, kExprArrayNewDefault, i8_array, kExprLocalSet, 3, // Write buffer, store number of bytes written. kExprLocalGet, 0, kExprLocalGet, 3, kExprLocalGet, 2, kGCPrefix, kExprStringEncodeWtf8Array, policy, kExprLocalSet, 4, // Read buffer. kExprLocalGet, 3, kExprLocalGet, 2, kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add, kGCPrefix, kExprStringNewWtf8Array, kWtf8PolicyAccept, ]); } builder.addFunction("encode_null_string", kSig_i_v) .exportFunc() .addBody([ kExprRefNull, kStringRefCode, kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array, kExprI32Const, 0, kGCPrefix, kExprStringEncodeWtf8Array, 0, ]); builder.addFunction("encode_null_array", kSig_i_v) .exportFunc() .addBody([ kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i8_array, kExprI32Const, 0, kExprI32Const, 0, kGCPrefix, kExprStringNewWtf8Array, kWtf8PolicyAccept, kExprRefNull, i8_array, kExprI32Const, 0, kGCPrefix, kExprStringEncodeWtf8Array, kWtf8PolicyAccept, ]); let instance = builder.instantiate(); for (let str of interestingStrings) { let wtf8 = encodeWtf8(str); assertEquals(str, instance.exports.encode_wtf8(str, wtf8.length, 0)); assertEquals(str, instance.exports.encode_wtf8(str, wtf8.length + 20, 10)); } for (let str of interestingStrings) { let wtf8 = encodeWtf8(str); if (HasIsolatedSurrogate(str)) { assertThrows(() => instance.exports.encode_utf8(str, wtf8.length, 0), WebAssembly.RuntimeError, "Failed to encode string as UTF-8: contains unpaired surrogate"); } else { assertEquals(str, instance.exports.encode_utf8(str, wtf8.length, 0)); assertEquals(str, instance.exports.encode_wtf8(str, wtf8.length + 20, 10)); } } for (let str of interestingStrings) { let offset = 42; let replaced = ReplaceIsolatedSurrogates(str); if (!HasIsolatedSurrogate(str)) assertEquals(str, replaced); let wtf8 = encodeWtf8(replaced); assertEquals(replaced, instance.exports.encode_replace(str, wtf8.length, 0)); assertEquals(replaced, instance.exports.encode_replace(str, wtf8.length + 20, 10)); } assertThrows(() => instance.exports.encode_null_array(), WebAssembly.RuntimeError, "dereferencing a null pointer"); assertThrows(() => instance.exports.encode_null_string(), WebAssembly.RuntimeError, "dereferencing a null pointer"); for (let str of interestingStrings) { let wtf8 = encodeWtf8(str); let message = "array element access out of bounds"; assertThrows(() => instance.exports.encode_wtf8(str, wtf8.length, 1), WebAssembly.RuntimeError, message); assertThrows(() => instance.exports.encode_utf8(str, wtf8.length, 1), WebAssembly.RuntimeError, message); assertThrows(() => instance.exports.encode_replace(str, wtf8.length, 1), WebAssembly.RuntimeError, message); } })(); (function TestStringEncodeWtf16Array() { let builder = new WasmModuleBuilder(); let i16_array = builder.addArray(kWasmI16, true); let kSig_w_wii = makeSig([kWasmStringRef, kWasmI32, kWasmI32], [kWasmStringRef]); // Allocate an array and encode into it. Then decode it. // (str, length, offset=0) -> str builder.addFunction("encode", kSig_w_wii) .exportFunc() .addLocals(wasmRefNullType(i16_array), 1) .addLocals(kWasmI32, 1) .addBody([ // Allocate buffer. kExprLocalGet, 1, kGCPrefix, kExprArrayNewDefault, i16_array, kExprLocalSet, 3, // Write buffer, store number of code units written. kExprLocalGet, 0, kExprLocalGet, 3, kExprLocalGet, 2, kGCPrefix, kExprStringEncodeWtf16Array, kExprLocalSet, 4, // Read buffer. kExprLocalGet, 3, kExprLocalGet, 2, kExprLocalGet, 2, kExprLocalGet, 4, kExprI32Add, kGCPrefix, kExprStringNewWtf16Array, ]); builder.addFunction("encode_null_string", kSig_i_v) .exportFunc() .addBody([ kExprRefNull, kStringRefCode, kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i16_array, kExprI32Const, 0, kGCPrefix, kExprStringEncodeWtf16Array ]); builder.addFunction("encode_null_array", kSig_i_v) .exportFunc() .addBody([ kExprI32Const, 0, kGCPrefix, kExprArrayNewDefault, i16_array, kExprI32Const, 0, kExprI32Const, 0, kGCPrefix, kExprStringNewWtf16Array, kExprRefNull, i16_array, kExprI32Const, 0, kGCPrefix, kExprStringEncodeWtf16Array ]); let instance = builder.instantiate(); for (let str of interestingStrings) { assertEquals(str, instance.exports.encode(str, str.length, 0)); assertEquals(str, instance.exports.encode(str, str.length + 20, 10)); } assertThrows(() => instance.exports.encode_null_array(), WebAssembly.RuntimeError, "dereferencing a null pointer"); assertThrows(() => instance.exports.encode_null_string(), WebAssembly.RuntimeError, "dereferencing a null pointer"); for (let str of interestingStrings) { let message = "array element access out of bounds"; assertThrows(() => instance.exports.encode(str, str.length, 1), WebAssembly.RuntimeError, message); } })();