Fix out-of-range access in unibrow::Utf8::CalculateValue.
This code should not access bytes out of the permitted range in order to check the range of a possible UTF-8 value. Instead, the length check should occur before such checks. BUG=chromium:667260, chromium:662822 Review-Url: https://codereview.chromium.org/2520053003 Cr-Commit-Position: refs/heads/master@{#41165}
This commit is contained in:
parent
8c4988f738
commit
9d524bd33d
@ -7,10 +7,11 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
#include "src/globals.h"
|
||||
#include "src/utils.h"
|
||||
|
||||
namespace unibrow {
|
||||
|
||||
class Utf8DecoderBase {
|
||||
class V8_EXPORT_PRIVATE Utf8DecoderBase {
|
||||
public:
|
||||
// Initialization done in subclass.
|
||||
inline Utf8DecoderBase();
|
||||
|
@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
|
||||
while (count < max_count && IsContinuationCharacter(str[count])) {
|
||||
count++;
|
||||
}
|
||||
*cursor += count;
|
||||
|
||||
// Check overly long sequences & other conditions. Use length as error
|
||||
// indicator.
|
||||
// There must be enough continuation characters.
|
||||
if (count != length) return kBadChar;
|
||||
|
||||
// Check overly long sequences & other conditions.
|
||||
if (length == 3) {
|
||||
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
|
||||
// Overlong three-byte sequence?
|
||||
length = 0;
|
||||
return kBadChar;
|
||||
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
|
||||
// High and low surrogate halves?
|
||||
length = 0;
|
||||
return kBadChar;
|
||||
}
|
||||
} else if (length == 4) {
|
||||
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
|
||||
// Overlong four-byte sequence.
|
||||
length = 0;
|
||||
return kBadChar;
|
||||
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
|
||||
// Code points outside of the unicode range.
|
||||
length = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (count != length) {
|
||||
// All invalid encodings should land here.
|
||||
*cursor += count;
|
||||
return kBadChar;
|
||||
}
|
||||
}
|
||||
|
||||
// All errors have been handled, so we only have to assemble the result.
|
||||
*cursor += length;
|
||||
switch (length) {
|
||||
case 1:
|
||||
return str[0];
|
||||
|
@ -120,6 +120,7 @@ v8_executable("unittests") {
|
||||
"source-position-table-unittest.cc",
|
||||
"test-utils.cc",
|
||||
"test-utils.h",
|
||||
"unicode-unittest.cc",
|
||||
"value-serializer-unittest.cc",
|
||||
"wasm/asm-types-unittest.cc",
|
||||
"wasm/ast-decoder-unittest.cc",
|
||||
|
39
test/unittests/unicode-unittest.cc
Normal file
39
test/unittests/unicode-unittest.cc
Normal file
@ -0,0 +1,39 @@
|
||||
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "src/unicode-decoder.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
|
||||
using Utf8Decoder = unibrow::Utf8Decoder<512>;
|
||||
|
||||
void Decode(Utf8Decoder* decoder, const std::string& str) {
|
||||
// Put the string in its own buffer on the heap to make sure that
|
||||
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
|
||||
std::unique_ptr<char[]> buffer(new char[str.length()]);
|
||||
memcpy(buffer.get(), str.data(), str.length());
|
||||
decoder->Reset(buffer.get(), str.length());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
|
||||
Utf8Decoder decoder;
|
||||
|
||||
// Not enough continuation bytes before string ends.
|
||||
Decode(&decoder, "\xE0");
|
||||
Decode(&decoder, "\xED");
|
||||
Decode(&decoder, "\xF0");
|
||||
Decode(&decoder, "\xF4");
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
@ -118,6 +118,7 @@
|
||||
'source-position-table-unittest.cc',
|
||||
'test-utils.h',
|
||||
'test-utils.cc',
|
||||
'unicode-unittest.cc',
|
||||
'value-serializer-unittest.cc',
|
||||
'zone/segmentpool-unittest.cc',
|
||||
'zone/zone-chunk-list-unittest.cc',
|
||||
|
Loading…
Reference in New Issue
Block a user