From cc2c6e6339c2620a4a0b38bd6a2e0112044d426e Mon Sep 17 00:00:00 2001 From: "marja@chromium.org" Date: Fri, 26 Sep 2014 11:17:31 +0000 Subject: [PATCH] Script streaming: fix split UTF-8 character handling. Invalid UTF-8 data can contain too many characters which look like they're part of a multi-byte character, and that was overflowing a buffer. BUG=chromium:417891 LOG=n . R=yangguo@chromium.org Review URL: https://codereview.chromium.org/607043002 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@24251 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/scanner-character-streams.cc | 8 ++++++-- test/cctest/test-api.cc | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/src/scanner-character-streams.cc b/src/scanner-character-streams.cc index 31b4ee47c4..d06f479f94 100644 --- a/src/scanner-character-streams.cc +++ b/src/scanner-character-streams.cc @@ -411,13 +411,17 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters( // Move bytes which are part of an incomplete character from the end of the // current chunk to utf8_split_char_buffer_. They will be converted when the - // next data chunk arrives. + // next data chunk arrives. Note that all valid UTF-8 characters are at most 4 + // bytes long, but if the data is invalid, we can have character values bigger + // than unibrow::Utf8::kMaxOneByteChar for more than 4 consecutive bytes. while (current_data_length_ > current_data_offset_ && (c = current_data_[current_data_length_ - 1]) > - unibrow::Utf8::kMaxOneByteChar) { + unibrow::Utf8::kMaxOneByteChar && + utf8_split_char_buffer_length_ < 4) { --current_data_length_; ++utf8_split_char_buffer_length_; } + CHECK(utf8_split_char_buffer_length_ <= 4); for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) { utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i]; } diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc index b4cf5cf4a9..66dc5a0c4a 100644 --- a/test/cctest/test-api.cc +++ b/test/cctest/test-api.cc @@ -23381,3 +23381,23 @@ TEST(StreamingProducesParserCache) { CHECK(cached_data->data != NULL); CHECK_GT(cached_data->length, 0); } + + +TEST(StreamingScriptWithInvalidUtf8) { + // Regression test for a crash: test that invalid UTF-8 bytes in the end of a + // chunk don't produce a crash. + const char* reference = "\xeb\x91\x80\x80\x80"; + char chunk1[] = + "function foo() {\n" + " // This function will contain an UTF-8 character which is not in\n" + " // ASCII.\n" + " var foobXXXXX"; // Too many bytes which look like incomplete chars! + char chunk2[] = + "r = 13;\n" + " return foob\xeb\x91\x80\x80\x80r;\n" + "}\n"; + for (int i = 0; i < 5; ++i) chunk1[strlen(chunk1) - 5 + i] = reference[i]; + + const char* chunks[] = {chunk1, chunk2, "foo();", NULL}; + RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false); +}