Add proto2 to proto3 util

This commit is contained in:
Yilun Chong 2018-05-30 17:06:45 -07:00
parent d69fd037f8
commit 193af9f4c1
7 changed files with 349 additions and 46 deletions

View File

@ -349,11 +349,11 @@ gogo_proto_middleman: protoc-gen-gogoproto
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-gogoproto --gogoproto_out=$$oldpwd/tmp/gogo_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) )
touch gogo_proto_middleman
new_data = $$(for data in $(all_data); do echo "tmp$${data\#$(srcdir)}"; done | xargs)
gogo_data = $$(for data in $(all_data); do echo "tmp/gogo_data$${data\#$(srcdir)}"; done | xargs)
generate_gogo_data: protoc_middleman protoc_middleman2 gogo-data-scrubber
mkdir -p `dirname $(new_data)`
./gogo-data-scrubber $(all_data) $(new_data)
mkdir -p `dirname $(gogo_data)`
./gogo-data-scrubber $(all_data) $(gogo_data)
touch generate_gogo_data
make_tmp_dir_gogo:
@ -408,8 +408,6 @@ gogoslick_protoc_middleman: make_tmp_dir_gogo $(top_srcdir)/src/protoc$(EXEEXT)
oldpwd=`pwd` && ( cd $(srcdir)/tmp/gogo_proto && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$$oldpwd/$(top_srcdir)/src --gogoslick_out=$$oldpwd/tmp/gogoslick $(benchmarks_protoc_inputs_proto2_message4) )
touch gogoslick_protoc_middleman
gogo_data = $$(find . -type f -name "dataset.*.pb" -path "./tmp/*")
generate-gogo-benchmark-code:
@echo '#! /bin/bash' > generate-gogo-benchmark-code
@echo 'cp $(srcdir)/go/go_benchmark_test.go tmp/$$1/benchmark_code/$$1_benchmark1_test.go' >> generate-gogo-benchmark-code
@ -453,7 +451,7 @@ gogoslick: gogoslick_protoc_middleman generate_gogo_data gogo-benchmark generat
############ UTIL RULES BEGIN ############
bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber
bin_PROGRAMS += protoc-gen-gogoproto gogo-data-scrubber protoc-gen-proto2_to_proto3 proto3-data-stripper
protoc_gen_gogoproto_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la
protoc_gen_gogoproto_SOURCES = util/protoc-gen-gogoproto.cc
@ -468,9 +466,40 @@ nodist_gogo_data_scrubber_SOURCES = \
$(benchmarks_protoc_outputs_proto2) \
$(benchmarks_protoc_outputs_proto2_header) \
$(benchmarks_protoc_outputs_header)
protoc_gen_proto2_to_proto3_LDADD = $(top_srcdir)/src/libprotobuf.la $(top_srcdir)/src/libprotoc.la
protoc_gen_proto2_to_proto3_SOURCES = util/protoc-gen-proto2_to_proto3.cc
protoc_gen_proto2_to_proto3_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util
proto3_data_stripper_LDADD = $(top_srcdir)/src/libprotobuf.la
proto3_data_stripper_SOURCES = util/proto3_data_stripper.cc
proto3_data_stripper_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)/cpp -I$(srcdir)/util
util/proto3_data_stripper-proto3_data_stripper.$(OBJEXT): $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2) $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header)
nodist_proto3_data_stripper_SOURCES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2) \
$(benchmarks_protoc_outputs_proto2_header) \
$(benchmarks_protoc_outputs_header)
############ UTIL RULES END ############
############ PROTO3 PREPARATION BEGIN #############
proto3_proto_middleman: protoc-gen-proto2_to_proto3
mkdir -p "tmp/proto3_proto"
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I$(srcdir) -I$(top_srcdir) --plugin=protoc-gen-proto2_to_proto3 --proto2_to_proto3_out=$$oldpwd/tmp/proto3_proto $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) $(benchmarks_protoc_inputs_proto2) )
touch proto3_proto_middleman
proto3_data = $$(for data in $(all_data); do echo "tmp/proto3_data$${data\#$(srcdir)}"; done | xargs)
generate_proto3_data: protoc_middleman protoc_middleman2 proto3-data-stripper
mkdir -p `dirname $(proto3_data)`
./proto3-data-stripper $(all_data) $(proto3_data)
touch generate_proto3_data
############ PROTO3 PREPARATION END #############
MAINTAINERCLEANFILES = \
Makefile.in
@ -513,7 +542,9 @@ CLEANFILES = \
gogoslick_protoc_middleman \
gogoslick \
gogo-benchmark \
gogo/cpp_no_group/cpp_benchmark.*
gogo/cpp_no_group/cpp_benchmark.* \
proto3_proto_middleman \
generate_proto3_data
clean-local:

View File

@ -0,0 +1,63 @@
#ifndef PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
#define PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
using google::protobuf::FieldDescriptor;
using google::protobuf::Message;
using google::protobuf::Reflection;
namespace google {
namespace protobuf {
namespace util {
class DataStripper {
public:
void StripMessage(Message *message) {
std::vector<const FieldDescriptor*> set_fields;
const Reflection* reflection = message->GetReflection();
reflection->ListFields(*message, &set_fields);
for (size_t i = 0; i < set_fields.size(); i++) {
const FieldDescriptor* field = set_fields[i];
if (ShouldBeClear(field)) {
reflection->ClearField(message, field);
}
if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
StripMessage(reflection->MutableRepeatedMessage(message, field, j));
}
} else {
StripMessage(reflection->MutableMessage(message, field));
}
}
}
reflection->MutableUnknownFields(message)->Clear();
}
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) = 0;
};
class GogoDataStripper : public DataStripper {
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) {
return field->type() == FieldDescriptor::TYPE_GROUP;
}
};
class Proto3DataStripper : public DataStripper {
private:
virtual bool ShouldBeClear(const FieldDescriptor *field) {
return field->type() == FieldDescriptor::TYPE_GROUP ||
field->is_extension();
}
};
} // namespace util
} // namespace protobuf
} // namespace google
#endif // PROTOBUF_BENCHMARKS_UTIL_DATA_PROTO2_TO_PROTO3_UTIL_H_

View File

@ -4,43 +4,11 @@
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
#include "data_proto2_to_proto3_util.h"
#include <fstream>
using google::protobuf::FieldDescriptor;
using google::protobuf::Message;
using google::protobuf::Reflection;
class DataGroupStripper {
public:
static void StripMessage(Message *message) {
std::vector<const FieldDescriptor*> set_fields;
const Reflection* reflection = message->GetReflection();
reflection->ListFields(*message, &set_fields);
for (size_t i = 0; i < set_fields.size(); i++) {
const FieldDescriptor* field = set_fields[i];
if (field->type() == FieldDescriptor::TYPE_GROUP) {
reflection->ClearField(message, field);
}
if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
StripMessage(reflection->MutableRepeatedMessage(message, field, j));
}
} else {
StripMessage(reflection->MutableMessage(message, field));
}
}
}
reflection->MutableUnknownFields(message)->Clear();
}
};
using google::protobuf::util::GogoDataStripper;
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
@ -91,7 +59,8 @@ int main(int argc, char *argv[]) {
for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
DataGroupStripper::StripMessage(message);
GogoDataStripper stripper;
stripper.StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}

View File

@ -0,0 +1,74 @@
#include "benchmarks.pb.h"
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
#include "data_proto2_to_proto3_util.h"
#include <fstream>
using google::protobuf::util::Proto3DataStripper;
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
<< name
<< "', please make sure you are running this command from the benchmarks"
<< " directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
int main(int argc, char *argv[]) {
if (argc % 2 == 0 || argc == 1) {
std::cerr << "Usage: [input_files] [output_file_names] where " <<
"input_files are one to one mapping to output_file_names." <<
std::endl;
return 1;
}
for (int i = argc / 2; i > 0; i--) {
const std::string &input_file = argv[i];
const std::string &output_file = argv[i + argc / 2];
std::cerr << "Generating " << input_file
<< " to " << output_file << std::endl;
benchmarks::BenchmarkDataset dataset;
Message* message;
std::string dataset_payload = ReadFile(input_file);
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
<< "Can' t parse data file " << input_file;
if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
message = new benchmarks::proto3::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
message = new benchmarks::proto2::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
message = new benchmarks::proto2::GoogleMessage2;
} else if (dataset.message_name() ==
"benchmarks.google_message3.GoogleMessage3") {
message = new benchmarks::google_message3::GoogleMessage3;
} else if (dataset.message_name() ==
"benchmarks.google_message4.GoogleMessage4") {
message = new benchmarks::google_message4::GoogleMessage4;
} else {
std::cerr << "Unknown message type: " << dataset.message_name();
exit(1);
}
for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
Proto3DataStripper stripper;
stripper.StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}
std::ofstream ofs(output_file);
ofs << dataset.SerializeAsString();
ofs.close();
}
return 0;
}

View File

@ -12,7 +12,7 @@ using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::SchemaAddZeroEnumValue;
using google::protobuf::util::EnumScrubber;
namespace google {
namespace protobuf {
@ -74,7 +74,7 @@ class GoGoProtoGenerator : public CodeGenerator {
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);
SchemaAddZeroEnumValue enum_scrubber;
EnumScrubber enum_scrubber;
enum_scrubber.ScrubFile(&new_file);
string filename = file->name();

View File

@ -0,0 +1,108 @@
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "schema_proto2_to_proto3_util.h"
#include "google/protobuf/compiler/plugin.h"
using google::protobuf::FileDescriptorProto;
using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::EnumScrubber;
using google::protobuf::util::ExtensionStripper;
using google::protobuf::util::FieldScrubber;
namespace google {
namespace protobuf {
namespace compiler {
namespace {
string StripProto(string filename) {
if (filename.substr(filename.size() - 11) == ".protodevel") {
// .protodevel
return filename.substr(0, filename.size() - 11);
} else {
// .proto
return filename.substr(0, filename.size() - 6);
}
}
DescriptorPool new_pool_;
} // namespace
class Proto2ToProto3Generator : public CodeGenerator {
public:
virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const string& parameter,
GeneratorContext* context,
string* error) const {
for (int i = 0; i < files.size(); i++) {
for (auto file : files) {
bool can_generate =
(new_pool_.FindFileByName(file->name()) == nullptr);
for (int j = 0; j < file->dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->public_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->public_dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->weak_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->weak_dependency(j)->name()) != nullptr);
}
if (can_generate) {
Generate(file, parameter, context, error);
break;
}
}
}
return true;
}
virtual bool Generate(const FileDescriptor* file,
const string& parameter,
GeneratorContext* context,
string* error) const {
FileDescriptorProto new_file;
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);
EnumScrubber enum_scrubber;
enum_scrubber.ScrubFile(&new_file);
ExtensionStripper::StripFile(&new_file);
FieldScrubber::ScrubFile(&new_file);
new_file.set_syntax("proto3");
string filename = file->name();
string basename = StripProto(filename);
std::vector<std::pair<string,string>> option_pairs;
ParseGeneratorParameter(parameter, &option_pairs);
std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
context->Open(basename + ".proto"));
string content = new_pool_.BuildFile(new_file)->DebugString();
Printer printer(output.get(), '$');
printer.WriteRaw(content.c_str(), content.size());
return true;
}
};
} // namespace compiler
} // namespace protobuf
} // namespace google
int main(int argc, char* argv[]) {
google::protobuf::compiler::Proto2ToProto3Generator generator;
return google::protobuf::compiler::PluginMain(argc, argv, &generator);
}

View File

@ -74,10 +74,10 @@ class SchemaGroupStripper {
};
class SchemaAddZeroEnumValue {
class EnumScrubber {
public:
SchemaAddZeroEnumValue()
EnumScrubber()
: total_added_(0) {
}
@ -130,6 +130,64 @@ class SchemaAddZeroEnumValue {
int total_added_;
};
class ExtensionStripper {
public:
static void StripFile(FileDescriptorProto *file) {
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
StripMessage(file->mutable_message_type(i));
}
file->mutable_extension()->Clear();
}
private:
static void StripMessage(DescriptorProto *message_type) {
message_type->mutable_extension()->Clear();
message_type->clear_extension_range();
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
StripMessage(message_type->mutable_nested_type(i));
}
}
};
class FieldScrubber {
public:
static void ScrubFile(FileDescriptorProto *file) {
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
ScrubMessage(file->mutable_message_type(i));
}
for (int i = 0; i < file->mutable_extension()->size(); i++) {
file->mutable_extension(i)->clear_default_value();
if (ShouldClearLabel(file->mutable_extension(i))) {
file->mutable_extension(i)->clear_label();
}
}
}
private:
static bool ShouldClearLabel(const FieldDescriptorProto *field) {
return field->label() == FieldDescriptorProto::LABEL_OPTIONAL ||
field->label() == FieldDescriptorProto::LABEL_REQUIRED;
}
static void ScrubMessage(DescriptorProto *message_type) {
message_type->mutable_extension()->Clear();
for (int i = 0; i < message_type->mutable_extension()->size(); i++) {
message_type->mutable_extension(i)->clear_default_value();
if (ShouldClearLabel(message_type->mutable_extension(i))) {
message_type->mutable_extension(i)->clear_label();
}
}
for (int i = 0; i < message_type->mutable_field()->size(); i++) {
message_type->mutable_field(i)->clear_default_value();
if (ShouldClearLabel(message_type->mutable_field(i))) {
message_type->mutable_field(i)->clear_label();
}
}
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
ScrubMessage(message_type->mutable_nested_type(i));
}
}
};
} // namespace util
} // namespace protobuf
} // namespace google