Fix out-of-range access in unibrow::Utf8::CalculateValue.
This code should not access bytes out of the permitted range in order to check the range of a possible UTF-8 value. Instead, the length check should occur before such checks. BUG=chromium:667260, chromium:662822 Review-Url: https://codereview.chromium.org/2520053003 Cr-Commit-Position: refs/heads/master@{#41165}
This commit is contained in:
parent
8c4988f738
commit
9d524bd33d
@ -7,10 +7,11 @@
|
|||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "src/globals.h"
|
#include "src/globals.h"
|
||||||
|
#include "src/utils.h"
|
||||||
|
|
||||||
namespace unibrow {
|
namespace unibrow {
|
||||||
|
|
||||||
class Utf8DecoderBase {
|
class V8_EXPORT_PRIVATE Utf8DecoderBase {
|
||||||
public:
|
public:
|
||||||
// Initialization done in subclass.
|
// Initialization done in subclass.
|
||||||
inline Utf8DecoderBase();
|
inline Utf8DecoderBase();
|
||||||
|
@ -235,35 +235,31 @@ uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
|
|||||||
while (count < max_count && IsContinuationCharacter(str[count])) {
|
while (count < max_count && IsContinuationCharacter(str[count])) {
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
*cursor += count;
|
||||||
|
|
||||||
// Check overly long sequences & other conditions. Use length as error
|
// There must be enough continuation characters.
|
||||||
// indicator.
|
if (count != length) return kBadChar;
|
||||||
|
|
||||||
|
// Check overly long sequences & other conditions.
|
||||||
if (length == 3) {
|
if (length == 3) {
|
||||||
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
|
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
|
||||||
// Overlong three-byte sequence?
|
// Overlong three-byte sequence?
|
||||||
length = 0;
|
return kBadChar;
|
||||||
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
|
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
|
||||||
// High and low surrogate halves?
|
// High and low surrogate halves?
|
||||||
length = 0;
|
return kBadChar;
|
||||||
}
|
}
|
||||||
} else if (length == 4) {
|
} else if (length == 4) {
|
||||||
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
|
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
|
||||||
// Overlong four-byte sequence.
|
// Overlong four-byte sequence.
|
||||||
length = 0;
|
return kBadChar;
|
||||||
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
|
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
|
||||||
// Code points outside of the unicode range.
|
// Code points outside of the unicode range.
|
||||||
length = 0;
|
return kBadChar;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count != length) {
|
|
||||||
// All invalid encodings should land here.
|
|
||||||
*cursor += count;
|
|
||||||
return kBadChar;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All errors have been handled, so we only have to assemble the result.
|
// All errors have been handled, so we only have to assemble the result.
|
||||||
*cursor += length;
|
|
||||||
switch (length) {
|
switch (length) {
|
||||||
case 1:
|
case 1:
|
||||||
return str[0];
|
return str[0];
|
||||||
|
@ -120,6 +120,7 @@ v8_executable("unittests") {
|
|||||||
"source-position-table-unittest.cc",
|
"source-position-table-unittest.cc",
|
||||||
"test-utils.cc",
|
"test-utils.cc",
|
||||||
"test-utils.h",
|
"test-utils.h",
|
||||||
|
"unicode-unittest.cc",
|
||||||
"value-serializer-unittest.cc",
|
"value-serializer-unittest.cc",
|
||||||
"wasm/asm-types-unittest.cc",
|
"wasm/asm-types-unittest.cc",
|
||||||
"wasm/ast-decoder-unittest.cc",
|
"wasm/ast-decoder-unittest.cc",
|
||||||
|
39
test/unittests/unicode-unittest.cc
Normal file
39
test/unittests/unicode-unittest.cc
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
// Copyright 2016 the V8 project authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "src/unicode-decoder.h"
|
||||||
|
#include "testing/gtest/include/gtest/gtest.h"
|
||||||
|
|
||||||
|
namespace v8 {
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using Utf8Decoder = unibrow::Utf8Decoder<512>;
|
||||||
|
|
||||||
|
void Decode(Utf8Decoder* decoder, const std::string& str) {
|
||||||
|
// Put the string in its own buffer on the heap to make sure that
|
||||||
|
// AddressSanitizer's heap-buffer-overflow logic can see what's going on.
|
||||||
|
std::unique_ptr<char[]> buffer(new char[str.length()]);
|
||||||
|
memcpy(buffer.get(), str.data(), str.length());
|
||||||
|
decoder->Reset(buffer.get(), str.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST(UnicodeTest, ReadOffEndOfUtf8String) {
|
||||||
|
Utf8Decoder decoder;
|
||||||
|
|
||||||
|
// Not enough continuation bytes before string ends.
|
||||||
|
Decode(&decoder, "\xE0");
|
||||||
|
Decode(&decoder, "\xED");
|
||||||
|
Decode(&decoder, "\xF0");
|
||||||
|
Decode(&decoder, "\xF4");
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace v8
|
@ -118,6 +118,7 @@
|
|||||||
'source-position-table-unittest.cc',
|
'source-position-table-unittest.cc',
|
||||||
'test-utils.h',
|
'test-utils.h',
|
||||||
'test-utils.cc',
|
'test-utils.cc',
|
||||||
|
'unicode-unittest.cc',
|
||||||
'value-serializer-unittest.cc',
|
'value-serializer-unittest.cc',
|
||||||
'zone/segmentpool-unittest.cc',
|
'zone/segmentpool-unittest.cc',
|
||||||
'zone/zone-chunk-list-unittest.cc',
|
'zone/zone-chunk-list-unittest.cc',
|
||||||
|
Loading…
Reference in New Issue
Block a user