Ensure errors are reported from the last chunk of a file.

- Fix case where no error was provide if the consumer rejected something in the
  final chunk of input.
- Expose a more testable interface for the parser.
- Test all the basic behaviors of the parser.
This commit is contained in:
Thomas Van Lenten 2021-09-30 15:46:52 -04:00
parent 9466151605
commit a7823bbd45
3 changed files with 131 additions and 13 deletions

View File

@ -1675,16 +1675,12 @@ bool Parser::ParseChunk(StringPiece chunk) {
}
bool Parser::Finish() {
if (leftover_.empty()) {
return true;
}
// Force a newline onto the end to finish parsing.
leftover_ += "\n";
p_ = StringPiece(leftover_);
if (!ParseLoop()) {
// If there is still something to go, flush it with a newline.
if (!leftover_.empty() && !ParseChunk("\n")) {
return false;
}
return p_.empty(); // Everything used?
// This really should never fail if ParseChunk succeeded, but check to be sure.
return leftover_.empty();
}
bool Parser::ParseLoop() {
@ -1703,6 +1699,11 @@ bool Parser::ParseLoop() {
return true;
}
std::string ParserErrorString(const Parser& parser, const std::string& name) {
return std::string("error: ") + name + " Line " +
StrCat(parser.last_line()) + ", " + parser.error_str();
}
} // namespace
LineConsumer::LineConsumer() {}
@ -1723,22 +1724,31 @@ bool ParseSimpleFile(const std::string& path, LineConsumer* line_consumer,
io::FileInputStream file_stream(fd);
file_stream.SetCloseOnDelete(true);
return ParseSimpleStream(file_stream, path, line_consumer, out_error);
}
bool ParseSimpleStream(io::ZeroCopyInputStream& input_stream,
const std::string& stream_name,
LineConsumer* line_consumer,
std::string* out_error) {
Parser parser(line_consumer);
const void* buf;
int buf_len;
while (file_stream.Next(&buf, &buf_len)) {
while (input_stream.Next(&buf, &buf_len)) {
if (buf_len == 0) {
continue;
}
if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) {
*out_error =
std::string("error: ") + path +
" Line " + StrCat(parser.last_line()) + ", " + parser.error_str();
*out_error = ParserErrorString(parser, stream_name);
return false;
}
}
return parser.Finish();
if (!parser.Finish()) {
*out_error = ParserErrorString(parser, stream_name);
return false;
}
return true;
}
ImportWriter::ImportWriter(

View File

@ -38,6 +38,7 @@
#include <google/protobuf/descriptor.h>
#include <google/protobuf/descriptor.pb.h>
#include <google/protobuf/io/zero_copy_stream.h>
#include <google/protobuf/port_def.inc>
@ -287,6 +288,11 @@ bool PROTOC_EXPORT ParseSimpleFile(const std::string& path,
LineConsumer* line_consumer,
std::string* out_error);
bool PROTOC_EXPORT ParseSimpleStream(io::ZeroCopyInputStream& input_stream,
const std::string& stream_name,
LineConsumer* line_consumer,
std::string* out_error);
// Helper class for parsing framework import mappings and generating
// import statements.
class PROTOC_EXPORT ImportWriter {

View File

@ -29,6 +29,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <google/protobuf/compiler/objectivec/objectivec_helpers.h>
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
#include <google/protobuf/testing/googletest.h>
#include <gtest/gtest.h>
@ -242,6 +243,107 @@ TEST(ObjCHelperDeathTest, TextFormatDecodeData_Failures) {
}
#endif // PROTOBUF_HAS_DEATH_TEST
class TestLineCollector : public LineConsumer {
public:
TestLineCollector(std::vector<std::string>* inout_lines,
const std::string* reject_line = nullptr)
: lines_(inout_lines), reject_(reject_line) {}
bool ConsumeLine(const StringPiece& line, std::string* out_error) override {
if (reject_) {
if (*reject_ == line) {
*out_error = std::string("Rejected '") + *reject_ + "'";
return false;
}
}
if (lines_) {
lines_->emplace_back(line);
}
return true;
}
private:
std::vector<std::string>* lines_;
const std::string* reject_;
};
const int kBlockSizes[] = {-1, 1, 2, 5, 64};
const int kBlockSizeCount = GOOGLE_ARRAYSIZE(kBlockSizes);
TEST(ObjCHelper, ParseSimple_BasicsSuccess) {
const std::vector<std::pair<std::string, std::vector<std::string>>> tests = {
{"", {}},
{"a", {"a"}},
{"a c", {"a c"}},
{" a c ", {"a c"}},
{"\ta c ", {"a c"}},
{"abc\n", {"abc"}},
{"abc\nd f", {"abc", "d f"}},
{"\n abc \n def \n\n", {"abc", "def"}},
};
for (const auto& test : tests) {
for (int i = 0; i < kBlockSizeCount; i++) {
io::ArrayInputStream input(test.first.data(), test.first.size(), kBlockSizes[i]);
std::string err_str;
std::vector<std::string> lines;
TestLineCollector collector(&lines);
EXPECT_TRUE(ParseSimpleStream(input, "dummy", &collector, &err_str));
EXPECT_EQ(lines, test.second);
EXPECT_TRUE(err_str.empty());
}
}
}
TEST(ObjCHelper, ParseSimple_DropsComments) {
const std::vector<std::pair<std::string, std::vector<std::string>>> tests = {
{"# nothing", {}},
{"#", {}},
{"##", {}},
{"\n# nothing\n", {}},
{"a # same line", {"a"}},
{"a # same line\n", {"a"}},
{"a\n# line\nc", {"a", "c"}},
{"# n o t # h i n g #", {}},
{"## n o # t h i n g #", {}},
{"a# n o t # h i n g #", {"a"}},
{"a\n## n o # t h i n g #", {"a"}},
};
for (const auto& test : tests) {
for (int i = 0; i < kBlockSizeCount; i++) {
io::ArrayInputStream input(test.first.data(), test.first.size(), kBlockSizes[i]);
std::string err_str;
std::vector<std::string> lines;
TestLineCollector collector(&lines);
EXPECT_TRUE(ParseSimpleStream(input, "dummy", &collector, &err_str));
EXPECT_EQ(lines, test.second);
EXPECT_TRUE(err_str.empty());
}
}
}
TEST(ObjCHelper, ParseSimple_RejectLines) {
const std::vector<std::tuple<std::string, std::string, int>> tests = {
{"a\nb\nc", "a", 1},
{"a\nb\nc", "b", 2},
{"a\nb\nc", "c", 3},
{"a\nb\nc\n", "c", 3},
};
for (const auto& test : tests) {
for (int i = 0; i < kBlockSizeCount; i++) {
io::ArrayInputStream input(std::get<0>(test).data(), std::get<0>(test).size(),
kBlockSizes[i]);
std::string err_str;
TestLineCollector collector(nullptr, &std::get<1>(test));
EXPECT_FALSE(ParseSimpleStream(input, "dummy", &collector, &err_str));
std::string expected_err = StrCat("error: dummy Line ", std::get<2>(test), ", Rejected '", std::get<1>(test), "'");
EXPECT_EQ(err_str, expected_err);
}
}
}
// TODO(thomasvl): Should probably add some unittests for all the special cases
// of name mangling (class name, field name, enum names). Rather than doing
// this with an ObjC test in the objectivec directory, we should be able to