ignore UTF-8 BOM if it is in the begining of a proto file
This commit is contained in:
parent
fde6e89f99
commit
b2d2cf8b48
@ -229,6 +229,32 @@ TEST_F(ParserTest, WarnIfSyntaxIdentifierOmmitted) {
|
||||
|
||||
typedef ParserTest ParseMessageTest;
|
||||
|
||||
TEST_F(ParseMessageTest, IgnoreBOM) {
|
||||
char input[] = " message TestMessage {\n"
|
||||
" required int32 foo = 1;\n"
|
||||
"}\n";
|
||||
// Set UTF-8 BOM.
|
||||
input[0] = (char)0xEF;
|
||||
input[1] = (char)0xBB;
|
||||
input[2] = (char)0xBF;
|
||||
ExpectParsesTo(input,
|
||||
"message_type {"
|
||||
" name: \"TestMessage\""
|
||||
" field { name:\"foo\" label:LABEL_REQUIRED type:TYPE_INT32 number:1 }"
|
||||
"}");
|
||||
}
|
||||
|
||||
TEST_F(ParseMessageTest, BOMError) {
|
||||
char input[] = " message TestMessage {\n"
|
||||
" required int32 foo = 1;\n"
|
||||
"}\n";
|
||||
input[0] = (char)0xEF;
|
||||
ExpectHasErrors(input,
|
||||
"0:1: Proto file starts with 0xEF but not UTF-8 BOM. "
|
||||
"Only UTF-8 is accepted for proto file.\n"
|
||||
"0:0: Expected top-level statement (e.g. \"message\").\n");
|
||||
}
|
||||
|
||||
TEST_F(ParseMessageTest, SimpleMessage) {
|
||||
ExpectParsesTo(
|
||||
"message TestMessage {\n"
|
||||
|
@ -762,6 +762,15 @@ bool Tokenizer::NextWithComments(string* prev_trailing_comments,
|
||||
next_leading_comments);
|
||||
|
||||
if (current_.type == TYPE_START) {
|
||||
// Ignore unicode byte order mark(BOM) if it appears at the file
|
||||
// beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
|
||||
if (TryConsume((char)0xEF)) {
|
||||
if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
|
||||
AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
|
||||
"Only UTF-8 is accepted for proto file.");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
collector.DetachFromPrev();
|
||||
} else {
|
||||
// A comment appearing on the same line must be attached to the previous
|
||||
|
Loading…
Reference in New Issue
Block a user