Return kBadChar for longest subpart of incomplete utf-8 character.
This brings the two utf-8 decoders (bulk + incremental) in line. Technically, either behaviour was correct, since the utf-8 spec demands incomplete utf-8 be handled, but does not specify how. Unicode recommends that "the maximal subpart at that offset should be replaced by a single U+FFFD," and with this change we consistently do that. More details + spec references in the bug. BUG=chromium:662822 Review-Url: https://codereview.chromium.org/2493143003 Cr-Commit-Position: refs/heads/master@{#41025}
This commit is contained in:
parent
0188c3fba3
commit
fd40ebb1e6
125
src/unicode.cc
125
src/unicode.cc
@ -228,80 +228,56 @@ static inline bool IsContinuationCharacter(byte chr) {
|
||||
// This method decodes an UTF-8 value according to RFC 3629.
|
||||
uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
|
||||
size_t length = NonASCIISequenceLength(str[0]);
|
||||
if (length == 0 || max_length < length) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
if (length == 2) {
|
||||
if (!IsContinuationCharacter(str[1])) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
*cursor += 2;
|
||||
return ((str[0] << 6) + str[1]) - 0x00003080;
|
||||
|
||||
// Check continuation characters.
|
||||
size_t max_count = std::min(length, max_length);
|
||||
size_t count = 1;
|
||||
while (count < max_count && IsContinuationCharacter(str[count])) {
|
||||
count++;
|
||||
}
|
||||
|
||||
// Check overly long sequences & other conditions. Use length as error
|
||||
// indicator.
|
||||
if (length == 3) {
|
||||
switch (str[0]) {
|
||||
case 0xE0:
|
||||
// Overlong three-byte sequence.
|
||||
if (str[1] < 0xA0 || str[1] > 0xBF) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
break;
|
||||
case 0xED:
|
||||
// High and low surrogate halves.
|
||||
if (str[1] < 0x80 || str[1] > 0x9F) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (!IsContinuationCharacter(str[1])) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
|
||||
// Overlong three-byte sequence?
|
||||
length = 0;
|
||||
} else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
|
||||
// High and low surrogate halves?
|
||||
length = 0;
|
||||
}
|
||||
if (!IsContinuationCharacter(str[2])) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
*cursor += 3;
|
||||
return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
|
||||
}
|
||||
DCHECK(length == 4);
|
||||
switch (str[0]) {
|
||||
case 0xF0:
|
||||
} else if (length == 4) {
|
||||
if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
|
||||
// Overlong four-byte sequence.
|
||||
if (str[1] < 0x90 || str[1] > 0xBF) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
break;
|
||||
case 0xF4:
|
||||
length = 0;
|
||||
} else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
|
||||
// Code points outside of the unicode range.
|
||||
if (str[1] < 0x80 || str[1] > 0x8F) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (!IsContinuationCharacter(str[1])) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
}
|
||||
length = 0;
|
||||
}
|
||||
}
|
||||
if (!IsContinuationCharacter(str[2])) {
|
||||
*cursor += 1;
|
||||
|
||||
if (count != length) {
|
||||
// All invalid encodings should land here.
|
||||
*cursor += count;
|
||||
return kBadChar;
|
||||
}
|
||||
if (!IsContinuationCharacter(str[3])) {
|
||||
*cursor += 1;
|
||||
return kBadChar;
|
||||
|
||||
// All errors have been handled, so we only have to assemble the result.
|
||||
*cursor += length;
|
||||
switch (length) {
|
||||
case 1:
|
||||
return str[0];
|
||||
case 2:
|
||||
return ((str[0] << 6) + str[1]) - 0x00003080;
|
||||
case 3:
|
||||
return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
|
||||
case 4:
|
||||
return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
|
||||
0x03C82080;
|
||||
}
|
||||
*cursor += 4;
|
||||
return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
|
||||
0x03C82080;
|
||||
|
||||
UNREACHABLE();
|
||||
return kBadChar;
|
||||
}
|
||||
|
||||
uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
|
||||
@ -323,9 +299,10 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
|
||||
// with one shift.
|
||||
uint8_t mask = 0x7f >> kind;
|
||||
|
||||
// Store the kind - 1 (i.e., remaining bytes) in the top byte, value
|
||||
// in the bottom three.
|
||||
*buffer = (kind - 1) << 24 | (next & mask);
|
||||
// Store the kind in the top nibble, and kind - 1 (i.e., remaining bytes)
|
||||
// in 2nd nibble, and the value in the bottom three. The 2nd nibble is
|
||||
// intended as a counter about how many bytes are still needed.
|
||||
*buffer = kind << 28 | (kind - 1) << 24 | (next & mask);
|
||||
return kIncomplete;
|
||||
} else {
|
||||
// No buffer, and not the start of a 1-byte char (handled at the
|
||||
@ -354,15 +331,19 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
|
||||
// We're inside of a character, as described by buffer.
|
||||
|
||||
// How many bytes (excluding this one) do we still expect?
|
||||
uint8_t count = (*buffer >> 24) - 1;
|
||||
uint8_t bytes_expected = *buffer >> 28;
|
||||
uint8_t bytes_left = (*buffer >> 24) & 0x0f;
|
||||
bytes_left--;
|
||||
// Update the value.
|
||||
uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F);
|
||||
if (count) {
|
||||
*buffer = count << 24 | value;
|
||||
if (bytes_left) {
|
||||
*buffer = (bytes_expected << 28 | bytes_left << 24 | value);
|
||||
return kIncomplete;
|
||||
} else {
|
||||
*buffer = 0;
|
||||
return value;
|
||||
bool sequence_was_too_long = (bytes_expected == 2 && value < 0x80) ||
|
||||
(bytes_expected == 3 && value < 0x800);
|
||||
return sequence_was_too_long ? kBadChar : value;
|
||||
}
|
||||
} else {
|
||||
// Within a character, but not a continuation character? Then the
|
||||
|
@ -700,74 +700,26 @@ TEST(RegExpScanning) {
|
||||
TestScanRegExp("/=?/", "=?");
|
||||
}
|
||||
|
||||
static int Ucs2CharLength(unibrow::uchar c) {
|
||||
if (c == unibrow::Utf8::kIncomplete || c == unibrow::Utf8::kBufferEmpty) {
|
||||
return 0;
|
||||
} else if (c < 0xffff) {
|
||||
return 1;
|
||||
} else {
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
static int Utf8LengthHelper(const char* s) {
|
||||
int len = i::StrLength(s);
|
||||
int character_length = len;
|
||||
for (int i = 0; i < len; i++) {
|
||||
unsigned char c = s[i];
|
||||
int input_offset = 0;
|
||||
int output_adjust = 0;
|
||||
if (c > 0x7f) {
|
||||
if (c < 0xc0) continue;
|
||||
if (c >= 0xf0) {
|
||||
if (c >= 0xf8) {
|
||||
// 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8
|
||||
// byte.
|
||||
continue; // Handle first UTF-8 byte.
|
||||
}
|
||||
if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) {
|
||||
// This 4 byte sequence could have been coded as a 3 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
input_offset = 3;
|
||||
// 4 bytes of UTF-8 turn into 2 UTF-16 code units.
|
||||
character_length -= 2;
|
||||
} else if (c >= 0xe0) {
|
||||
if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
|
||||
// This 3 byte sequence could have been coded as a 2 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
if (c == 0xed) {
|
||||
unsigned char d = s[i + 1];
|
||||
if ((d < 0x80) || (d > 0x9f)) {
|
||||
// This 3 byte sequence is part of a surrogate pair which is not
|
||||
// supported by UTF-8. Record a single kBadChar for the first byte
|
||||
// and continue.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
input_offset = 2;
|
||||
// 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
|
||||
output_adjust = 2;
|
||||
} else {
|
||||
if ((c & 0x1e) == 0) {
|
||||
// This 2 byte sequence could have been coded as a 1 byte sequence.
|
||||
// Record a single kBadChar for the first byte and continue.
|
||||
continue;
|
||||
}
|
||||
input_offset = 1;
|
||||
// 2 bytes of UTF-8 turn into 1 UTF-16 code unit.
|
||||
output_adjust = 1;
|
||||
}
|
||||
bool bad = false;
|
||||
for (int j = 1; j <= input_offset; j++) {
|
||||
if ((s[i + j] & 0xc0) != 0x80) {
|
||||
// Bad UTF-8 sequence turns the first in the sequence into kBadChar,
|
||||
// which is a single UTF-16 code unit.
|
||||
bad = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!bad) {
|
||||
i += input_offset;
|
||||
character_length -= output_adjust;
|
||||
}
|
||||
}
|
||||
unibrow::Utf8::Utf8IncrementalBuffer buffer(unibrow::Utf8::kBufferEmpty);
|
||||
int length = 0;
|
||||
for (; *s != '\0'; s++) {
|
||||
unibrow::uchar tmp = unibrow::Utf8::ValueOfIncremental(*s, &buffer);
|
||||
length += Ucs2CharLength(tmp);
|
||||
}
|
||||
return character_length;
|
||||
unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&buffer);
|
||||
length += Ucs2CharLength(tmp);
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
@ -994,169 +946,206 @@ TEST(ScopePositions) {
|
||||
};
|
||||
|
||||
const SourceData source_data[] = {
|
||||
{ " with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " with ({}) ", "{\n"
|
||||
" block;\n"
|
||||
" }", "\n"
|
||||
" more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " with ({}) ", "statement", "\n"
|
||||
" more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " with ({})\n"
|
||||
" ", "statement;", "\n"
|
||||
" more;", i::WITH_SCOPE, i::SLOPPY },
|
||||
{ " try {} catch ", "(e) { block; }", " more;",
|
||||
i::CATCH_SCOPE, i::SLOPPY },
|
||||
{ " try {} catch ", "(e) { block; }", "; more;",
|
||||
i::CATCH_SCOPE, i::SLOPPY },
|
||||
{ " try {} catch ", "(e) {\n"
|
||||
" block;\n"
|
||||
" }", "\n"
|
||||
" more;", i::CATCH_SCOPE, i::SLOPPY },
|
||||
{ " try {} catch ", "(e) { block; }", " finally { block; } more;",
|
||||
i::CATCH_SCOPE, i::SLOPPY },
|
||||
{ " start;\n"
|
||||
" ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " start;\n"
|
||||
" ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " start;\n"
|
||||
" ", "{\n"
|
||||
" let block;\n"
|
||||
" }", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " start;\n"
|
||||
" function fun", "(a,b) { infunction; }", " more;",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " start;\n"
|
||||
" function fun", "(a,b) {\n"
|
||||
" infunction;\n"
|
||||
" }", "\n"
|
||||
" more;", i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " start;\n", "(a,b) => a + b", "; more;",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " start;\n", "(a,b) => { return a+b; }", "\nmore;",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " start;\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x) {\n"
|
||||
" block;\n"
|
||||
" }", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x) statement", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x = 1 ; x < 10; ++ x)\n"
|
||||
" statement;", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {}) { block; }", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {}) { block; }", "; more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {}) {\n"
|
||||
" block;\n"
|
||||
" }", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {}) statement;", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {}) statement", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
{ " for ", "(let x in {})\n"
|
||||
" statement;", "\n"
|
||||
" more;", i::BLOCK_SCOPE, i::STRICT },
|
||||
// Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
|
||||
// the preparser off in terms of byte offsets.
|
||||
// 6 byte encoding.
|
||||
{ " 'foo\355\240\201\355\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// 4 byte encoding.
|
||||
{ " 'foo\360\220\220\212';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// 3 byte encoding of \u0fff.
|
||||
{ " 'foo\340\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 6 byte encoding with missing last byte.
|
||||
{ " 'foo\355\240\201\355\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 3 byte encoding of \u0fff with missing last byte.
|
||||
{ " 'foo\340\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 3 byte encoding of \u0fff with missing 2 last bytes.
|
||||
{ " 'foo\340';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
|
||||
{ " 'foo\340\203\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 3 byte encoding of \u007f should be a 2 byte encoding.
|
||||
{ " 'foo\340\201\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Unpaired lead surrogate.
|
||||
{ " 'foo\355\240\201';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Unpaired lead surrogate where following code point is a 3 byte sequence.
|
||||
{ " 'foo\355\240\201\340\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Unpaired lead surrogate where following code point is a 4 byte encoding
|
||||
// of a trail surrogate.
|
||||
{ " 'foo\355\240\201\360\215\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Unpaired trail surrogate.
|
||||
{ " 'foo\355\260\211';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// 2 byte encoding of \u00ff.
|
||||
{ " 'foo\303\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 2 byte encoding of \u00ff with missing last byte.
|
||||
{ " 'foo\303';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Broken 2 byte encoding of \u007f should be a 1 byte encoding.
|
||||
{ " 'foo\301\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Illegal 5 byte encoding.
|
||||
{ " 'foo\370\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Illegal 6 byte encoding.
|
||||
{ " 'foo\374\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Illegal 0xfe byte
|
||||
{ " 'foo\376\277\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
// Illegal 0xff byte
|
||||
{ " 'foo\377\277\277\277\277\277\277\277';\n"
|
||||
" (function fun", "(a,b) { infunction; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " 'foo';\n"
|
||||
" (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ " 'foo';\n"
|
||||
" (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY },
|
||||
{ NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY }
|
||||
};
|
||||
{" with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY},
|
||||
{" with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY},
|
||||
{" with ({}) ",
|
||||
"{\n"
|
||||
" block;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::WITH_SCOPE, i::SLOPPY},
|
||||
{" with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY},
|
||||
{" with ({}) ", "statement",
|
||||
"\n"
|
||||
" more;",
|
||||
i::WITH_SCOPE, i::SLOPPY},
|
||||
{" with ({})\n"
|
||||
" ",
|
||||
"statement;",
|
||||
"\n"
|
||||
" more;",
|
||||
i::WITH_SCOPE, i::SLOPPY},
|
||||
{" try {} catch ", "(e) { block; }", " more;", i::CATCH_SCOPE,
|
||||
i::SLOPPY},
|
||||
{" try {} catch ", "(e) { block; }", "; more;", i::CATCH_SCOPE,
|
||||
i::SLOPPY},
|
||||
{" try {} catch ",
|
||||
"(e) {\n"
|
||||
" block;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::CATCH_SCOPE, i::SLOPPY},
|
||||
{" try {} catch ", "(e) { block; }", " finally { block; } more;",
|
||||
i::CATCH_SCOPE, i::SLOPPY},
|
||||
{" start;\n"
|
||||
" ",
|
||||
"{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT},
|
||||
{" start;\n"
|
||||
" ",
|
||||
"{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT},
|
||||
{" start;\n"
|
||||
" ",
|
||||
"{\n"
|
||||
" let block;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" start;\n"
|
||||
" function fun",
|
||||
"(a,b) { infunction; }", " more;", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
{" start;\n"
|
||||
" function fun",
|
||||
"(a,b) {\n"
|
||||
" infunction;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
{" start;\n", "(a,b) => a + b", "; more;", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
{" start;\n", "(a,b) => { return a+b; }", "\nmore;", i::FUNCTION_SCOPE,
|
||||
i::SLOPPY},
|
||||
{" start;\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
{" for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ",
|
||||
"(let x = 1 ; x < 10; ++ x) {\n"
|
||||
" block;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ", "(let x = 1 ; x < 10; ++ x) statement",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ",
|
||||
"(let x = 1 ; x < 10; ++ x)\n"
|
||||
" statement;",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ", "(let x in {}) { block; }", " more;", i::BLOCK_SCOPE,
|
||||
i::STRICT},
|
||||
{" for ", "(let x in {}) { block; }", "; more;", i::BLOCK_SCOPE,
|
||||
i::STRICT},
|
||||
{" for ",
|
||||
"(let x in {}) {\n"
|
||||
" block;\n"
|
||||
" }",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ", "(let x in {}) statement;", " more;", i::BLOCK_SCOPE,
|
||||
i::STRICT},
|
||||
{" for ", "(let x in {}) statement",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
{" for ",
|
||||
"(let x in {})\n"
|
||||
" statement;",
|
||||
"\n"
|
||||
" more;",
|
||||
i::BLOCK_SCOPE, i::STRICT},
|
||||
// Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
|
||||
// the preparser off in terms of byte offsets.
|
||||
// 2 surrogates, encode a character that doesn't need a surrogate.
|
||||
{" 'foo\355\240\201\355\260\211';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// 4 byte encoding.
|
||||
{" 'foo\360\220\220\212';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// 3 byte encoding of \u0fff.
|
||||
{" 'foo\340\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// 3 byte surrogate, followed by broken 2-byte surrogate w/ impossible 2nd
|
||||
// byte and last byte missing.
|
||||
{" 'foo\355\240\201\355\211';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 3 byte encoding of \u0fff with missing last byte.
|
||||
{" 'foo\340\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 3 byte encoding of \u0fff with missing 2 last bytes.
|
||||
{" 'foo\340';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
|
||||
{" 'foo\340\203\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 3 byte encoding of \u007f should be a 2 byte encoding.
|
||||
{" 'foo\340\201\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Unpaired lead surrogate.
|
||||
{" 'foo\355\240\201';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Unpaired lead surrogate where following code point is a 3 byte
|
||||
// sequence.
|
||||
{" 'foo\355\240\201\340\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Unpaired lead surrogate where following code point is a 4 byte encoding
|
||||
// of a trail surrogate.
|
||||
{" 'foo\355\240\201\360\215\260\211';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Unpaired trail surrogate.
|
||||
{" 'foo\355\260\211';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// 2 byte encoding of \u00ff.
|
||||
{" 'foo\303\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 2 byte encoding of \u00ff with missing last byte.
|
||||
{" 'foo\303';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Broken 2 byte encoding of \u007f should be a 1 byte encoding.
|
||||
{" 'foo\301\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Illegal 5 byte encoding.
|
||||
{" 'foo\370\277\277\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Illegal 6 byte encoding.
|
||||
{" 'foo\374\277\277\277\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Illegal 0xfe byte
|
||||
{" 'foo\376\277\277\277\277\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
// Illegal 0xff byte
|
||||
{" 'foo\377\277\277\277\277\277\277\277';\n"
|
||||
" (function fun",
|
||||
"(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY},
|
||||
{" 'foo';\n"
|
||||
" (function fun",
|
||||
"(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", i::FUNCTION_SCOPE,
|
||||
i::SLOPPY},
|
||||
{" 'foo';\n"
|
||||
" (function fun",
|
||||
"(a,b) { 'bar\360\220\220\214'; }", ")();", i::FUNCTION_SCOPE,
|
||||
i::SLOPPY},
|
||||
{NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY}};
|
||||
|
||||
i::Isolate* isolate = CcTest::i_isolate();
|
||||
i::Factory* factory = isolate->factory();
|
||||
|
Loading…
Reference in New Issue
Block a user