From fd40ebb1e64274ae3529f8bbe6dad6adc76cb391 Mon Sep 17 00:00:00 2001 From: vogelheim Date: Wed, 16 Nov 2016 03:02:54 -0800 Subject: [PATCH] Return kBadChar for longest subpart of incomplete utf-8 character. This brings the two utf-8 decoders (bulk + incremental) in line. Technically, either behaviour was correct, since the utf-8 spec demands incomplete utf-8 be handled, but does not specify how. Unicode recommends that "the maximal subpart at that offset should be replaced by a single U+FFFD," and with this change we consistently do that. More details + spec references in the bug. BUG=chromium:662822 Review-Url: https://codereview.chromium.org/2493143003 Cr-Commit-Position: refs/heads/master@{#41025} --- src/unicode.cc | 125 +++++----- test/cctest/test-parsing.cc | 445 ++++++++++++++++++------------------ 2 files changed, 270 insertions(+), 300 deletions(-) diff --git a/src/unicode.cc b/src/unicode.cc index eb6fb462f3..a63174c47d 100644 --- a/src/unicode.cc +++ b/src/unicode.cc @@ -228,80 +228,56 @@ static inline bool IsContinuationCharacter(byte chr) { // This method decodes an UTF-8 value according to RFC 3629. uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) { size_t length = NonASCIISequenceLength(str[0]); - if (length == 0 || max_length < length) { - *cursor += 1; - return kBadChar; - } - if (length == 2) { - if (!IsContinuationCharacter(str[1])) { - *cursor += 1; - return kBadChar; - } - *cursor += 2; - return ((str[0] << 6) + str[1]) - 0x00003080; + + // Check continuation characters. + size_t max_count = std::min(length, max_length); + size_t count = 1; + while (count < max_count && IsContinuationCharacter(str[count])) { + count++; } + + // Check overly long sequences & other conditions. Use length as error + // indicator. if (length == 3) { - switch (str[0]) { - case 0xE0: - // Overlong three-byte sequence. - if (str[1] < 0xA0 || str[1] > 0xBF) { - *cursor += 1; - return kBadChar; - } - break; - case 0xED: - // High and low surrogate halves. - if (str[1] < 0x80 || str[1] > 0x9F) { - *cursor += 1; - return kBadChar; - } - break; - default: - if (!IsContinuationCharacter(str[1])) { - *cursor += 1; - return kBadChar; - } + if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) { + // Overlong three-byte sequence? + length = 0; + } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) { + // High and low surrogate halves? + length = 0; } - if (!IsContinuationCharacter(str[2])) { - *cursor += 1; - return kBadChar; - } - *cursor += 3; - return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; - } - DCHECK(length == 4); - switch (str[0]) { - case 0xF0: + } else if (length == 4) { + if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) { // Overlong four-byte sequence. - if (str[1] < 0x90 || str[1] > 0xBF) { - *cursor += 1; - return kBadChar; - } - break; - case 0xF4: + length = 0; + } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) { // Code points outside of the unicode range. - if (str[1] < 0x80 || str[1] > 0x8F) { - *cursor += 1; - return kBadChar; - } - break; - default: - if (!IsContinuationCharacter(str[1])) { - *cursor += 1; - return kBadChar; - } + length = 0; + } } - if (!IsContinuationCharacter(str[2])) { - *cursor += 1; + + if (count != length) { + // All invalid encodings should land here. + *cursor += count; return kBadChar; } - if (!IsContinuationCharacter(str[3])) { - *cursor += 1; - return kBadChar; + + // All errors have been handled, so we only have to assemble the result. + *cursor += length; + switch (length) { + case 1: + return str[0]; + case 2: + return ((str[0] << 6) + str[1]) - 0x00003080; + case 3: + return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080; + case 4: + return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - + 0x03C82080; } - *cursor += 4; - return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) - - 0x03C82080; + + UNREACHABLE(); + return kBadChar; } uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { @@ -323,9 +299,10 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { // with one shift. uint8_t mask = 0x7f >> kind; - // Store the kind - 1 (i.e., remaining bytes) in the top byte, value - // in the bottom three. - *buffer = (kind - 1) << 24 | (next & mask); + // Store the kind in the top nibble, and kind - 1 (i.e., remaining bytes) + // in 2nd nibble, and the value in the bottom three. The 2nd nibble is + // intended as a counter about how many bytes are still needed. + *buffer = kind << 28 | (kind - 1) << 24 | (next & mask); return kIncomplete; } else { // No buffer, and not the start of a 1-byte char (handled at the @@ -354,15 +331,19 @@ uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) { // We're inside of a character, as described by buffer. // How many bytes (excluding this one) do we still expect? - uint8_t count = (*buffer >> 24) - 1; + uint8_t bytes_expected = *buffer >> 28; + uint8_t bytes_left = (*buffer >> 24) & 0x0f; + bytes_left--; // Update the value. uint32_t value = ((*buffer & 0xffffff) << 6) | (next & 0x3F); - if (count) { - *buffer = count << 24 | value; + if (bytes_left) { + *buffer = (bytes_expected << 28 | bytes_left << 24 | value); return kIncomplete; } else { *buffer = 0; - return value; + bool sequence_was_too_long = (bytes_expected == 2 && value < 0x80) || + (bytes_expected == 3 && value < 0x800); + return sequence_was_too_long ? kBadChar : value; } } else { // Within a character, but not a continuation character? Then the diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc index 60663b2a19..5a5a734236 100644 --- a/test/cctest/test-parsing.cc +++ b/test/cctest/test-parsing.cc @@ -700,74 +700,26 @@ TEST(RegExpScanning) { TestScanRegExp("/=?/", "=?"); } +static int Ucs2CharLength(unibrow::uchar c) { + if (c == unibrow::Utf8::kIncomplete || c == unibrow::Utf8::kBufferEmpty) { + return 0; + } else if (c < 0xffff) { + return 1; + } else { + return 2; + } +} static int Utf8LengthHelper(const char* s) { - int len = i::StrLength(s); - int character_length = len; - for (int i = 0; i < len; i++) { - unsigned char c = s[i]; - int input_offset = 0; - int output_adjust = 0; - if (c > 0x7f) { - if (c < 0xc0) continue; - if (c >= 0xf0) { - if (c >= 0xf8) { - // 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8 - // byte. - continue; // Handle first UTF-8 byte. - } - if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) { - // This 4 byte sequence could have been coded as a 3 byte sequence. - // Record a single kBadChar for the first byte and continue. - continue; - } - input_offset = 3; - // 4 bytes of UTF-8 turn into 2 UTF-16 code units. - character_length -= 2; - } else if (c >= 0xe0) { - if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) { - // This 3 byte sequence could have been coded as a 2 byte sequence. - // Record a single kBadChar for the first byte and continue. - continue; - } - if (c == 0xed) { - unsigned char d = s[i + 1]; - if ((d < 0x80) || (d > 0x9f)) { - // This 3 byte sequence is part of a surrogate pair which is not - // supported by UTF-8. Record a single kBadChar for the first byte - // and continue. - continue; - } - } - input_offset = 2; - // 3 bytes of UTF-8 turn into 1 UTF-16 code unit. - output_adjust = 2; - } else { - if ((c & 0x1e) == 0) { - // This 2 byte sequence could have been coded as a 1 byte sequence. - // Record a single kBadChar for the first byte and continue. - continue; - } - input_offset = 1; - // 2 bytes of UTF-8 turn into 1 UTF-16 code unit. - output_adjust = 1; - } - bool bad = false; - for (int j = 1; j <= input_offset; j++) { - if ((s[i + j] & 0xc0) != 0x80) { - // Bad UTF-8 sequence turns the first in the sequence into kBadChar, - // which is a single UTF-16 code unit. - bad = true; - break; - } - } - if (!bad) { - i += input_offset; - character_length -= output_adjust; - } - } + unibrow::Utf8::Utf8IncrementalBuffer buffer(unibrow::Utf8::kBufferEmpty); + int length = 0; + for (; *s != '\0'; s++) { + unibrow::uchar tmp = unibrow::Utf8::ValueOfIncremental(*s, &buffer); + length += Ucs2CharLength(tmp); } - return character_length; + unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&buffer); + length += Ucs2CharLength(tmp); + return length; } @@ -994,169 +946,206 @@ TEST(ScopePositions) { }; const SourceData source_data[] = { - { " with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY }, - { " with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY }, - { " with ({}) ", "{\n" - " block;\n" - " }", "\n" - " more;", i::WITH_SCOPE, i::SLOPPY }, - { " with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY }, - { " with ({}) ", "statement", "\n" - " more;", i::WITH_SCOPE, i::SLOPPY }, - { " with ({})\n" - " ", "statement;", "\n" - " more;", i::WITH_SCOPE, i::SLOPPY }, - { " try {} catch ", "(e) { block; }", " more;", - i::CATCH_SCOPE, i::SLOPPY }, - { " try {} catch ", "(e) { block; }", "; more;", - i::CATCH_SCOPE, i::SLOPPY }, - { " try {} catch ", "(e) {\n" - " block;\n" - " }", "\n" - " more;", i::CATCH_SCOPE, i::SLOPPY }, - { " try {} catch ", "(e) { block; }", " finally { block; } more;", - i::CATCH_SCOPE, i::SLOPPY }, - { " start;\n" - " ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT }, - { " start;\n" - " ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT }, - { " start;\n" - " ", "{\n" - " let block;\n" - " }", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " start;\n" - " function fun", "(a,b) { infunction; }", " more;", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " start;\n" - " function fun", "(a,b) {\n" - " infunction;\n" - " }", "\n" - " more;", i::FUNCTION_SCOPE, i::SLOPPY }, - { " start;\n", "(a,b) => a + b", "; more;", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " start;\n", "(a,b) => { return a+b; }", "\nmore;", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " start;\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x = 1 ; x < 10; ++ x) {\n" - " block;\n" - " }", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x = 1 ; x < 10; ++ x) statement", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x = 1 ; x < 10; ++ x)\n" - " statement;", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {}) { block; }", " more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {}) { block; }", "; more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {}) {\n" - " block;\n" - " }", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {}) statement;", " more;", - i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {}) statement", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - { " for ", "(let x in {})\n" - " statement;", "\n" - " more;", i::BLOCK_SCOPE, i::STRICT }, - // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw - // the preparser off in terms of byte offsets. - // 6 byte encoding. - { " 'foo\355\240\201\355\260\211';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // 4 byte encoding. - { " 'foo\360\220\220\212';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // 3 byte encoding of \u0fff. - { " 'foo\340\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 6 byte encoding with missing last byte. - { " 'foo\355\240\201\355\211';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 3 byte encoding of \u0fff with missing last byte. - { " 'foo\340\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 3 byte encoding of \u0fff with missing 2 last bytes. - { " 'foo\340';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 3 byte encoding of \u00ff should be a 2 byte encoding. - { " 'foo\340\203\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 3 byte encoding of \u007f should be a 2 byte encoding. - { " 'foo\340\201\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Unpaired lead surrogate. - { " 'foo\355\240\201';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Unpaired lead surrogate where following code point is a 3 byte sequence. - { " 'foo\355\240\201\340\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Unpaired lead surrogate where following code point is a 4 byte encoding - // of a trail surrogate. - { " 'foo\355\240\201\360\215\260\211';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Unpaired trail surrogate. - { " 'foo\355\260\211';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // 2 byte encoding of \u00ff. - { " 'foo\303\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 2 byte encoding of \u00ff with missing last byte. - { " 'foo\303';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Broken 2 byte encoding of \u007f should be a 1 byte encoding. - { " 'foo\301\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Illegal 5 byte encoding. - { " 'foo\370\277\277\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Illegal 6 byte encoding. - { " 'foo\374\277\277\277\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Illegal 0xfe byte - { " 'foo\376\277\277\277\277\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - // Illegal 0xff byte - { " 'foo\377\277\277\277\277\277\277\277';\n" - " (function fun", "(a,b) { infunction; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " 'foo';\n" - " (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - { " 'foo';\n" - " (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();", - i::FUNCTION_SCOPE, i::SLOPPY }, - { NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY } - }; + {" with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::SLOPPY}, + {" with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::SLOPPY}, + {" with ({}) ", + "{\n" + " block;\n" + " }", + "\n" + " more;", + i::WITH_SCOPE, i::SLOPPY}, + {" with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::SLOPPY}, + {" with ({}) ", "statement", + "\n" + " more;", + i::WITH_SCOPE, i::SLOPPY}, + {" with ({})\n" + " ", + "statement;", + "\n" + " more;", + i::WITH_SCOPE, i::SLOPPY}, + {" try {} catch ", "(e) { block; }", " more;", i::CATCH_SCOPE, + i::SLOPPY}, + {" try {} catch ", "(e) { block; }", "; more;", i::CATCH_SCOPE, + i::SLOPPY}, + {" try {} catch ", + "(e) {\n" + " block;\n" + " }", + "\n" + " more;", + i::CATCH_SCOPE, i::SLOPPY}, + {" try {} catch ", "(e) { block; }", " finally { block; } more;", + i::CATCH_SCOPE, i::SLOPPY}, + {" start;\n" + " ", + "{ let block; }", " more;", i::BLOCK_SCOPE, i::STRICT}, + {" start;\n" + " ", + "{ let block; }", "; more;", i::BLOCK_SCOPE, i::STRICT}, + {" start;\n" + " ", + "{\n" + " let block;\n" + " }", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" start;\n" + " function fun", + "(a,b) { infunction; }", " more;", i::FUNCTION_SCOPE, i::SLOPPY}, + {" start;\n" + " function fun", + "(a,b) {\n" + " infunction;\n" + " }", + "\n" + " more;", + i::FUNCTION_SCOPE, i::SLOPPY}, + {" start;\n", "(a,b) => a + b", "; more;", i::FUNCTION_SCOPE, i::SLOPPY}, + {" start;\n", "(a,b) => { return a+b; }", "\nmore;", i::FUNCTION_SCOPE, + i::SLOPPY}, + {" start;\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + {" for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", + "(let x = 1 ; x < 10; ++ x) {\n" + " block;\n" + " }", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", "(let x = 1 ; x < 10; ++ x) statement", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", + "(let x = 1 ; x < 10; ++ x)\n" + " statement;", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", "(let x in {}) { block; }", " more;", i::BLOCK_SCOPE, + i::STRICT}, + {" for ", "(let x in {}) { block; }", "; more;", i::BLOCK_SCOPE, + i::STRICT}, + {" for ", + "(let x in {}) {\n" + " block;\n" + " }", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", "(let x in {}) statement;", " more;", i::BLOCK_SCOPE, + i::STRICT}, + {" for ", "(let x in {}) statement", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + {" for ", + "(let x in {})\n" + " statement;", + "\n" + " more;", + i::BLOCK_SCOPE, i::STRICT}, + // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw + // the preparser off in terms of byte offsets. + // 2 surrogates, encode a character that doesn't need a surrogate. + {" 'foo\355\240\201\355\260\211';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // 4 byte encoding. + {" 'foo\360\220\220\212';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // 3 byte encoding of \u0fff. + {" 'foo\340\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // 3 byte surrogate, followed by broken 2-byte surrogate w/ impossible 2nd + // byte and last byte missing. + {" 'foo\355\240\201\355\211';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 3 byte encoding of \u0fff with missing last byte. + {" 'foo\340\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 3 byte encoding of \u0fff with missing 2 last bytes. + {" 'foo\340';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 3 byte encoding of \u00ff should be a 2 byte encoding. + {" 'foo\340\203\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 3 byte encoding of \u007f should be a 2 byte encoding. + {" 'foo\340\201\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Unpaired lead surrogate. + {" 'foo\355\240\201';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Unpaired lead surrogate where following code point is a 3 byte + // sequence. + {" 'foo\355\240\201\340\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Unpaired lead surrogate where following code point is a 4 byte encoding + // of a trail surrogate. + {" 'foo\355\240\201\360\215\260\211';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Unpaired trail surrogate. + {" 'foo\355\260\211';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // 2 byte encoding of \u00ff. + {" 'foo\303\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 2 byte encoding of \u00ff with missing last byte. + {" 'foo\303';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Broken 2 byte encoding of \u007f should be a 1 byte encoding. + {" 'foo\301\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Illegal 5 byte encoding. + {" 'foo\370\277\277\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Illegal 6 byte encoding. + {" 'foo\374\277\277\277\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Illegal 0xfe byte + {" 'foo\376\277\277\277\277\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + // Illegal 0xff byte + {" 'foo\377\277\277\277\277\277\277\277';\n" + " (function fun", + "(a,b) { infunction; }", ")();", i::FUNCTION_SCOPE, i::SLOPPY}, + {" 'foo';\n" + " (function fun", + "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();", i::FUNCTION_SCOPE, + i::SLOPPY}, + {" 'foo';\n" + " (function fun", + "(a,b) { 'bar\360\220\220\214'; }", ")();", i::FUNCTION_SCOPE, + i::SLOPPY}, + {NULL, NULL, NULL, i::EVAL_SCOPE, i::SLOPPY}}; i::Isolate* isolate = CcTest::i_isolate(); i::Factory* factory = isolate->factory();