Script streaming: UTF-8 handling fix.
The problem was that there can be several multi-byte UTF-8 characters near the splitting point of the data chunks, and the code didn't handle it properly. This was also the source of crbug.com/417891 - I thought the crash can only happen when V8 is passed invalid UTF-8 data, but it can also happen in the abovementioned case. After the fix, we handle the valid UTF-8 case and also guard against invalid UTF-8 data. R=yangguo@chromium.org BUG=chromium:417891 LOG=N Review URL: https://codereview.chromium.org/654503002 git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@24547 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
f60bc89083
commit
363ac55a5d
@ -420,6 +420,12 @@ void ExternalStreamingStream::HandleUtf8SplitCharacters(
|
|||||||
utf8_split_char_buffer_length_ < 4) {
|
utf8_split_char_buffer_length_ < 4) {
|
||||||
--current_data_length_;
|
--current_data_length_;
|
||||||
++utf8_split_char_buffer_length_;
|
++utf8_split_char_buffer_length_;
|
||||||
|
if (c >= (3 << 6)) {
|
||||||
|
// 3 << 6 = 0b11000000; this is the first byte of the multi-byte
|
||||||
|
// character. No need to copy the previous characters into the conversion
|
||||||
|
// buffer (even if they're multi-byte).
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
CHECK(utf8_split_char_buffer_length_ <= 4);
|
CHECK(utf8_split_char_buffer_length_ <= 4);
|
||||||
for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
|
for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
|
||||||
|
@ -23813,3 +23813,24 @@ TEST(StreamingScriptWithInvalidUtf8) {
|
|||||||
const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
|
const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
|
||||||
RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
|
RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST(StreamingUtf8ScriptWithMultipleMultibyteCharactersSomeSplit) {
|
||||||
|
// Regression test: Stream data where there are several multi-byte UTF-8
|
||||||
|
// characters in a sequence and one of them is split between two data chunks.
|
||||||
|
const char* reference = "\xeb\x91\x80";
|
||||||
|
char chunk1[] =
|
||||||
|
"function foo() {\n"
|
||||||
|
" // This function will contain an UTF-8 character which is not in\n"
|
||||||
|
" // ASCII.\n"
|
||||||
|
" var foob\xeb\x91\x80X";
|
||||||
|
char chunk2[] =
|
||||||
|
"XXr = 13;\n"
|
||||||
|
" return foob\xeb\x91\x80\xeb\x91\x80r;\n"
|
||||||
|
"}\n";
|
||||||
|
chunk1[strlen(chunk1) - 1] = reference[0];
|
||||||
|
chunk2[0] = reference[1];
|
||||||
|
chunk2[1] = reference[2];
|
||||||
|
const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
|
||||||
|
RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user