Script streaming: fix split UTF-8 character handling.
Invalid UTF-8 data can contain too many characters which look like they're part of a multi-byte character, and that was overflowing a buffer. BUG=chromium:417891 LOG=n . R=yangguo@chromium.org Review URL: https://codereview.chromium.org/607043002 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@24251 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
b9583b5f98
commit
cc2c6e6339
@ -411,13 +411,17 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(
|
||||
|
||||
// Move bytes which are part of an incomplete character from the end of the
|
||||
// current chunk to utf8_split_char_buffer_. They will be converted when the
|
||||
// next data chunk arrives.
|
||||
// next data chunk arrives. Note that all valid UTF-8 characters are at most 4
|
||||
// bytes long, but if the data is invalid, we can have character values bigger
|
||||
// than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes.
|
||||
while (current_data_length_ > current_data_offset_ &&
|
||||
(c = current_data_[current_data_length_ - 1]) >
|
||||
unibrow::Utf8::kMaxOneByteChar) {
|
||||
unibrow::Utf8::kMaxOneByteChar &&
|
||||
utf8_split_char_buffer_length_ < 4) {
|
||||
--current_data_length_;
|
||||
++utf8_split_char_buffer_length_;
|
||||
}
|
||||
CHECK(utf8_split_char_buffer_length_ <= 4);
|
||||
for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
|
||||
utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
|
||||
}
|
||||
|
@ -23381,3 +23381,23 @@ TEST(StreamingProducesParserCache) {
|
||||
CHECK(cached_data->data != NULL);
|
||||
CHECK_GT(cached_data->length, 0);
|
||||
}
|
||||
|
||||
|
||||
TEST(StreamingScriptWithInvalidUtf8) {
|
||||
// Regression test for a crash: test that invalid UTF-8 bytes in the end of a
|
||||
// chunk don't produce a crash.
|
||||
const char* reference = "\xeb\x91\x80\x80\x80";
|
||||
char chunk1[] =
|
||||
"function foo() {\n"
|
||||
" // This function will contain an UTF-8 character which is not in\n"
|
||||
" // ASCII.\n"
|
||||
" var foobXXXXX"; // Too many bytes which look like incomplete chars!
|
||||
char chunk2[] =
|
||||
"r = 13;\n"
|
||||
" return foob\xeb\x91\x80\x80\x80r;\n"
|
||||
"}\n";
|
||||
for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i];
|
||||
|
||||
const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
|
||||
RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user