Fix out-of-range access in unibrow::Utf8::CalculateValue.

This code should not access bytes out of the permitted range in order to check
the range of a possible UTF-8 value. Instead, the length check should occur
before such checks.

BUG=chromium:667260, chromium:662822

Review-Url: https://codereview.chromium.org/2520053003
Cr-Commit-Position: refs/heads/master@{#41165}
This commit is contained in:
jbroman 2016-11-22 01:27:41 -08:00 committed by Commit bot
parent 8c4988f738
commit 9d524bd33d
5 changed files with 52 additions and 14 deletions

View File

@ -7,10 +7,11 @@
#include <sys/types.h>
#include "src/globals.h"
#include "src/utils.h"
namespace unibrow {
class Utf8DecoderBase {
class V8_EXPORT_PRIVATE Utf8DecoderBase {
public:
// Initialization done in subclass.
inline Utf8DecoderBase();

View File

@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
while (count < max_count && IsContinuationCharacter(str[count])) {
count++;
}
*cursor += count;
// Check overly long sequences & other conditions. Use length as error
// indicator.
// There must be enough continuation characters.
if (count != length) return kBadChar;
// Check overly long sequences & other conditions.
if (length == 3) {
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
// Overlong three-byte sequence?
length = 0;
return kBadChar;
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
// High and low surrogate halves?
length = 0;
return kBadChar;
}
} else if (length == 4) {
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
// Overlong four-byte sequence.
length = 0;
return kBadChar;
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
// Code points outside of the unicode range.
length = 0;
return kBadChar;
}
}
if (count != length) {
// All invalid encodings should land here.
*cursor += count;
return kBadChar;
}
// All errors have been handled, so we only have to assemble the result.
*cursor += length;
switch (length) {
case 1:
return str[0];

View File

@ -120,6 +120,7 @@ v8_executable("unittests") {
"source-position-table-unittest.cc",
"test-utils.cc",
"test-utils.h",
"unicode-unittest.cc",
"value-serializer-unittest.cc",
"wasm/asm-types-unittest.cc",
"wasm/ast-decoder-unittest.cc",

View File

@ -0,0 +1,39 @@
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <memory>
#include <string>
#include "src/unicode-decoder.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace v8 {
namespace internal {
namespace {
using Utf8Decoder = unibrow::Utf8Decoder<512>;
void Decode(Utf8Decoder* decoder, const std::string& str) {
// Put the string in its own buffer on the heap to make sure that
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
std::unique_ptr<char[]> buffer(new char[str.length()]);
memcpy(buffer.get(), str.data(), str.length());
decoder->Reset(buffer.get(), str.length());
}
} // namespace
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
Utf8Decoder decoder;
// Not enough continuation bytes before string ends.
Decode(&decoder, "\xE0");
Decode(&decoder, "\xED");
Decode(&decoder, "\xF0");
Decode(&decoder, "\xF4");
}
} // namespace internal
} // namespace v8

View File

@ -118,6 +118,7 @@
'source-position-table-unittest.cc',
'test-utils.h',
'test-utils.cc',
'unicode-unittest.cc',
'value-serializer-unittest.cc',
'zone/segmentpool-unittest.cc',
'zone/zone-chunk-list-unittest.cc',