[wasm] Add UTF-8 validation
Names passed for imports and exports are checked during decoding, leading to errors if they are no valid UTF-8. Function names are not checked during decode, but rather lead to undefined being returned at runtime if they are not UTF-8. We need to do these checks on the Wasm side, since the factory methods assume to get valid UTF-8 strings. R=titzer@chromium.org, yangguo@chromium.org Review-Url: https://codereview.chromium.org/1967023004 Cr-Commit-Position: refs/heads/master@{#36208}
This commit is contained in:
parent
4aec7ba1aa
commit
f0523e3046
@ -137,6 +137,12 @@ unsigned Utf8::Length(uchar c, int previous) {
|
||||
}
|
||||
}
|
||||
|
||||
bool Utf8::IsValidCharacter(uchar c) {
|
||||
return c < 0xD800u || (c >= 0xE000u && c < 0xFDD0u) ||
|
||||
(c > 0xFDEFu && c <= 0x10FFFFu && (c & 0xFFFEu) != 0xFFFEu &&
|
||||
c != kBadChar);
|
||||
}
|
||||
|
||||
} // namespace unibrow
|
||||
|
||||
#endif // V8_UNICODE_INL_H_
|
||||
|
@ -305,6 +305,20 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
|
||||
0x03C82080;
|
||||
}
|
||||
|
||||
bool Utf8::Validate(const byte* bytes, size_t length) {
|
||||
size_t cursor = 0;
|
||||
|
||||
// Performance optimization: Skip over single-byte values first.
|
||||
while (cursor < length && bytes[cursor] <= kMaxOneByteChar) {
|
||||
++cursor;
|
||||
}
|
||||
|
||||
while (cursor < length) {
|
||||
uchar c = ValueOf(bytes + cursor, length - cursor, &cursor);
|
||||
if (!IsValidCharacter(c)) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Uppercase: point.category == 'Lu'
|
||||
|
||||
|
@ -155,6 +155,11 @@ class Utf8 {
|
||||
// UTF-8.
|
||||
static const unsigned kMax16BitCodeUnitSize = 3;
|
||||
static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
|
||||
|
||||
// Excludes non-characters from the set of valid code points.
|
||||
static inline bool IsValidCharacter(uchar c);
|
||||
|
||||
static bool Validate(const byte* str, size_t length);
|
||||
};
|
||||
|
||||
struct Uppercase {
|
||||
|
@ -267,13 +267,13 @@ class ModuleDecoder : public Decoder {
|
||||
for (uint32_t i = 0; i < functions_count; i++) {
|
||||
WasmFunction* function = &module->functions[i];
|
||||
function->name_offset =
|
||||
consume_string(&function->name_length, "function name");
|
||||
consume_string(&function->name_length, false);
|
||||
|
||||
uint32_t local_names_count =
|
||||
consume_u32v(&length, "local names count");
|
||||
for (uint32_t j = 0; j < local_names_count; j++) {
|
||||
uint32_t unused = 0;
|
||||
uint32_t offset = consume_string(&unused, "local name");
|
||||
uint32_t offset = consume_string(&unused, false);
|
||||
USE(unused);
|
||||
USE(offset);
|
||||
}
|
||||
@ -372,13 +372,13 @@ class ModuleDecoder : public Decoder {
|
||||
|
||||
import->sig_index = consume_sig_index(module, &import->sig);
|
||||
const byte* pos = pc_;
|
||||
import->module_name_offset = consume_string(
|
||||
&import->module_name_length, "import module name");
|
||||
import->module_name_offset =
|
||||
consume_string(&import->module_name_length, true);
|
||||
if (import->module_name_length == 0) {
|
||||
error(pos, "import module name cannot be NULL");
|
||||
}
|
||||
import->function_name_offset = consume_string(
|
||||
&import->function_name_length, "import function name");
|
||||
import->function_name_offset =
|
||||
consume_string(&import->function_name_length, true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -402,7 +402,7 @@ class ModuleDecoder : public Decoder {
|
||||
|
||||
WasmFunction* func;
|
||||
exp->func_index = consume_func_index(module, &func);
|
||||
exp->name_offset = consume_string(&exp->name_length, "export name");
|
||||
exp->name_offset = consume_string(&exp->name_length, true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -500,7 +500,9 @@ class ModuleDecoder : public Decoder {
|
||||
|
||||
// Decodes a single global entry inside a module starting at {pc_}.
|
||||
void DecodeGlobalInModule(WasmGlobal* global) {
|
||||
global->name_offset = consume_string(&global->name_length, "global name");
|
||||
global->name_offset = consume_string(&global->name_length, false);
|
||||
DCHECK(unibrow::Utf8::Validate(start_ + global->name_offset,
|
||||
global->name_length));
|
||||
global->type = mem_type();
|
||||
global->offset = 0;
|
||||
global->exported = consume_u8("exported") != 0;
|
||||
@ -529,13 +531,13 @@ class ModuleDecoder : public Decoder {
|
||||
decl_bits & kDeclFunctionExport ? " exported" : "",
|
||||
(decl_bits & kDeclFunctionImport) == 0 ? " body" : "");
|
||||
|
||||
function->exported = decl_bits & kDeclFunctionExport;
|
||||
|
||||
if (decl_bits & kDeclFunctionName) {
|
||||
function->name_offset =
|
||||
consume_string(&function->name_length, "function name");
|
||||
consume_string(&function->name_length, function->exported);
|
||||
}
|
||||
|
||||
function->exported = decl_bits & kDeclFunctionExport;
|
||||
|
||||
// Imported functions have no locals or body.
|
||||
if (decl_bits & kDeclFunctionImport) {
|
||||
function->external = true;
|
||||
@ -639,11 +641,14 @@ class ModuleDecoder : public Decoder {
|
||||
|
||||
// Reads a length-prefixed string, checking that it is within bounds. Returns
|
||||
// the offset of the string, and the length as an out parameter.
|
||||
uint32_t consume_string(uint32_t* length, const char* name = nullptr) {
|
||||
uint32_t consume_string(uint32_t* length, bool validate_utf8) {
|
||||
int varint_length;
|
||||
*length = consume_u32v(&varint_length, "string length");
|
||||
uint32_t offset = pc_offset();
|
||||
TRACE(" +%u %-20s: (%u bytes)\n", offset, "string", *length);
|
||||
if (validate_utf8 && !unibrow::Utf8::Validate(pc_, *length)) {
|
||||
error(pc_, "no valid UTF-8 string");
|
||||
}
|
||||
consume_bytes(*length);
|
||||
return offset;
|
||||
}
|
||||
|
@ -60,6 +60,9 @@ Handle<Object> GetWasmFunctionNameFromTable(Handle<ByteArray> func_names_array,
|
||||
: func_names_array->get_int(func_index + 2);
|
||||
ScopedVector<byte> buffer(next_offset - offset);
|
||||
func_names_array->copy_out(offset, buffer.start(), next_offset - offset);
|
||||
if (!unibrow::Utf8::Validate(buffer.start(), buffer.length())) {
|
||||
return undefined();
|
||||
}
|
||||
MaybeHandle<Object> maybe_name =
|
||||
func_names_array->GetIsolate()->factory()->NewStringFromUtf8(
|
||||
Vector<const char>::cast(buffer));
|
||||
|
@ -586,9 +586,9 @@ bool FinishCompilation(Isolate* isolate, WasmModule* module,
|
||||
DCHECK_EQ(i, func.func_index);
|
||||
WasmName str = module->GetName(func.name_offset, func.name_length);
|
||||
WasmName str_null = {nullptr, 0};
|
||||
Handle<String> name = factory->InternalizeUtf8String(str);
|
||||
Handle<Code> code = Handle<Code>::null();
|
||||
Handle<JSFunction> function = Handle<JSFunction>::null();
|
||||
Handle<String> function_name = Handle<String>::null();
|
||||
if (func.external) {
|
||||
// Lookup external function in FFI object.
|
||||
MaybeHandle<JSFunction> function =
|
||||
@ -613,8 +613,9 @@ bool FinishCompilation(Isolate* isolate, WasmModule* module,
|
||||
return false;
|
||||
}
|
||||
if (func.exported) {
|
||||
function_name = factory->InternalizeUtf8String(str);
|
||||
function = compiler::CompileJSToWasmWrapper(
|
||||
isolate, &module_env, name, code, instance.js_object, i);
|
||||
isolate, &module_env, function_name, code, instance.js_object, i);
|
||||
record_code_size(total_code_size, function->code());
|
||||
}
|
||||
}
|
||||
@ -629,7 +630,8 @@ bool FinishCompilation(Isolate* isolate, WasmModule* module,
|
||||
// module.
|
||||
desc.set_value(function);
|
||||
Maybe<bool> status = JSReceiver::DefineOwnProperty(
|
||||
isolate, instance.js_object, name, &desc, Object::THROW_ON_ERROR);
|
||||
isolate, instance.js_object, function_name, &desc,
|
||||
Object::THROW_ON_ERROR);
|
||||
if (!status.IsJust())
|
||||
thrower.Error("export of %.*s failed.", str.length(), str.start());
|
||||
}
|
||||
|
121
test/mjsunit/wasm/unicode-validation.js
Normal file
121
test/mjsunit/wasm/unicode-validation.js
Normal file
@ -0,0 +1,121 @@
|
||||
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --expose-wasm
|
||||
|
||||
load("test/mjsunit/wasm/wasm-constants.js");
|
||||
load("test/mjsunit/wasm/wasm-module-builder.js");
|
||||
|
||||
function toByteArray(s) {
|
||||
var arr = [];
|
||||
for (var i = 0; i < s.length; ++i) {
|
||||
arr.push(s.charCodeAt(i) & 0xff);
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
function toString(arr) {
|
||||
if (typeof arr === "string") return arr;
|
||||
var s = "";
|
||||
for (var b of arr) s += String.fromCharCode(b);
|
||||
return s;
|
||||
}
|
||||
|
||||
function toUTF8(arr) {
|
||||
if (typeof arr === "string" || arr === undefined) return arr;
|
||||
return decodeURIComponent(escape(toString(arr)));
|
||||
}
|
||||
|
||||
function isValidUtf8(arr) {
|
||||
if (typeof arr === "string" || arr === undefined) return true;
|
||||
try {
|
||||
var s = toUTF8(arr);
|
||||
for (var i = 0; i < s.length; ++i)
|
||||
if ((s.charCodeAt(i) & 0xfffe) == 0xfffe)
|
||||
return false;
|
||||
return true;
|
||||
} catch (e) {
|
||||
if (e instanceof URIError) return false;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
function checkImportsAndExports(imported_module_name, imported_function_name,
|
||||
internal_function_name, exported_function_name, shouldThrow) {
|
||||
var builder = new WasmModuleBuilder();
|
||||
|
||||
builder.addImportWithModule(imported_module_name, imported_function_name,
|
||||
kSig_v_v);
|
||||
|
||||
builder.addFunction(internal_function_name, kSig_v_v)
|
||||
.addBody([kExprCallImport, kArity0, 0])
|
||||
.exportAs(exported_function_name);
|
||||
|
||||
// sanity check: does javascript agree with out shouldThrow annotation?
|
||||
assertEquals(shouldThrow,
|
||||
!isValidUtf8(imported_module_name) ||
|
||||
!isValidUtf8(imported_function_name) ||
|
||||
!isValidUtf8(exported_function_name),
|
||||
"JavaScript does not agree with our shouldThrow expectation");
|
||||
|
||||
if (!shouldThrow) {
|
||||
imported_module_name = toUTF8(imported_module_name);
|
||||
imported_function_name = toUTF8(imported_function_name);
|
||||
}
|
||||
|
||||
var ffi = new Object();
|
||||
if (imported_function_name === undefined) {
|
||||
ffi[imported_module_name] = function() { };
|
||||
} else {
|
||||
ffi[imported_module_name] = new Object();
|
||||
ffi[imported_module_name][imported_function_name] = function() { };
|
||||
}
|
||||
|
||||
var hasThrown = true;
|
||||
try {
|
||||
builder.instantiate(ffi);
|
||||
hasThrown = false;
|
||||
} catch (err) {
|
||||
if (!shouldThrow) print(err);
|
||||
assertTrue(shouldThrow, "Should not throw error on valid names");
|
||||
assertContains("UTF-8", err.toString());
|
||||
}
|
||||
assertEquals(shouldThrow, hasThrown,
|
||||
"Should throw validation error on invalid names");
|
||||
}
|
||||
|
||||
function checkImportedModuleName(name, shouldThrow) {
|
||||
checkImportsAndExports(name, "imp", "func", undefined, shouldThrow);
|
||||
}
|
||||
|
||||
function checkImportedFunctionName(name, shouldThrow) {
|
||||
checkImportsAndExports("module", name, "func", "func", shouldThrow);
|
||||
}
|
||||
|
||||
function checkExportedFunctionName(name, shouldThrow) {
|
||||
checkImportsAndExports("module", "func", "func", name, shouldThrow);
|
||||
}
|
||||
|
||||
function checkInternalFunctionName(name) {
|
||||
checkImportsAndExports("module", "func", name, "func", false);
|
||||
}
|
||||
|
||||
function checkAll(name, shouldThrow) {
|
||||
checkImportedModuleName(name, shouldThrow);
|
||||
checkImportedFunctionName(name, shouldThrow);
|
||||
checkExportedFunctionName(name, shouldThrow);
|
||||
checkInternalFunctionName(name);
|
||||
}
|
||||
|
||||
checkAll("ascii", false);
|
||||
checkAll("some math: (½)² = ¼", false);
|
||||
checkAll("中国历史系列条目\n北", false);
|
||||
checkAll(toByteArray("\xef\xb7\x8f"), false);
|
||||
checkAll(toByteArray("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), false);
|
||||
checkAll(toByteArray("\xff"), true);
|
||||
checkAll(toByteArray("\xed\xa0\x8f"), true); // surrogate code points
|
||||
checkAll(toByteArray("\xe0\x82\x80"), true); // overlong sequence
|
||||
checkAll(toByteArray("\xf4\x90\x80\x80"), true); // beyond limit: U+110000
|
||||
checkAll(toByteArray("\xef\xbf\xbe"), true); // non-character; U+FFFE
|
||||
checkAll(toByteArray("with\x00null"), false);
|
@ -75,9 +75,7 @@ WasmModuleBuilder.prototype.addImportWithModule = function(module, name, sig) {
|
||||
}
|
||||
|
||||
WasmModuleBuilder.prototype.addImport = function(name, sig) {
|
||||
var sig_index = (typeof sig) == "number" ? sig : this.addSignature(sig);
|
||||
this.imports.push({module: name, name: undefined, sig_index: sig_index});
|
||||
return this.imports.length - 1;
|
||||
this.addImportWithModule(name, undefined, sig);
|
||||
}
|
||||
|
||||
WasmModuleBuilder.prototype.addDataSegment = function(addr, data, init) {
|
||||
@ -107,9 +105,19 @@ function emit_u32(bytes, val) {
|
||||
}
|
||||
|
||||
function emit_string(bytes, string) {
|
||||
emit_varint(bytes, string.length);
|
||||
for (var i = 0; i < string.length; i++) {
|
||||
emit_u8(bytes, string.charCodeAt(i));
|
||||
// When testing illegal names, we pass a byte array directly.
|
||||
if (string instanceof Array) {
|
||||
emit_varint(bytes, string.length);
|
||||
emit_bytes(bytes, string);
|
||||
return;
|
||||
}
|
||||
|
||||
// This is the hacky way to convert a JavaScript scring to a UTF8 encoded
|
||||
// string only containing single-byte characters.
|
||||
var string_utf8 = unescape(encodeURIComponent(string));
|
||||
emit_varint(bytes, string_utf8.length);
|
||||
for (var i = 0; i < string_utf8.length; i++) {
|
||||
emit_u8(bytes, string_utf8.charCodeAt(i));
|
||||
}
|
||||
}
|
||||
|
||||
@ -309,8 +317,8 @@ WasmModuleBuilder.prototype.toArray = function(debug) {
|
||||
emit_varint(bytes, wasm.functions.length);
|
||||
for (func of wasm.functions) {
|
||||
var name = func.name == undefined ? "" : func.name;
|
||||
emit_string(bytes, name);
|
||||
emit_u8(bytes, 0); // local names count == 0
|
||||
emit_string(bytes, name);
|
||||
emit_u8(bytes, 0); // local names count == 0
|
||||
}
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user