diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
index 5aa35c660..f0cb9402a 100644
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
@@ -1,9 +1,11 @@
+benchmarks_protoc_inputs_benchmark_wrapper =                               \
+	benchmarks.proto
+
 benchmarks_protoc_inputs =                                                 \
-	benchmarks.proto                                                         \
-	datasets/google_message1/benchmark_message1_proto3.proto
+	datasets/google_message1/proto3/benchmark_message1_proto3.proto
 
 benchmarks_protoc_inputs_proto2 =                                          \
-	datasets/google_message1/benchmark_message1_proto2.proto                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.proto          \
 	datasets/google_message2/benchmark_message2.proto                        \
 	datasets/google_message3/benchmark_message3.proto                        \
 	datasets/google_message3/benchmark_message3_1.proto                      \
@@ -26,7 +28,7 @@ make_tmp_dir:
 if USE_EXTERNAL_PROTOC
 
 protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs)
-	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper)
 	touch protoc_middleman
 
 protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2)
@@ -38,8 +40,8 @@ else
 # We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
 # relative to srcdir, which may not be the same as the current directory when
 # building out-of-tree.
-protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
-	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
+protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) )
 	touch protoc_middleman
 
 protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
@@ -54,14 +56,14 @@ all_data = `find . -type f -name "dataset.*.pb"`
 
 benchmarks_protoc_outputs =                                                \
 	benchmarks.pb.cc                                                         \
-	datasets/google_message1/benchmark_message1_proto3.pb.cc
+	datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc
   
 benchmarks_protoc_outputs_header =                                         \
 	benchmarks.pb.h                                                          \
-	datasets/google_message1/benchmark_message1_proto3.pb.h
+	datasets/google_message1/proto3/benchmark_message1_proto3.pb.h
 
 benchmarks_protoc_outputs_proto2_header =                                  \
-	datasets/google_message1/benchmark_message1_proto2.pb.h                  \
+	datasets/google_message1/proto2/benchmark_message1_proto2.pb.h           \
 	datasets/google_message2/benchmark_message2.pb.h                         \
 	datasets/google_message3/benchmark_message3.pb.h                         \
 	datasets/google_message3/benchmark_message3_1.pb.h                       \
@@ -78,7 +80,7 @@ benchmarks_protoc_outputs_proto2_header =                                  \
 	datasets/google_message4/benchmark_message4_3.pb.h
 
 benchmarks_protoc_outputs_proto2 =                                         \
-	datasets/google_message1/benchmark_message1_proto2.pb.cc                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc          \
 	datasets/google_message2/benchmark_message2.pb.cc                        \
 	datasets/google_message3/benchmark_message3.pb.cc                        \
 	datasets/google_message3/benchmark_message3_1.pb.cc                      \
@@ -224,6 +226,81 @@ python-cpp-generated-code: python-cpp-generated-code-benchmark
 
 ############# PYTHON RULES END ##############
 
+############# GO RULES BEGIN ##############
+
+benchmarks_protoc_inputs_proto2_message1 =                                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.proto
+
+benchmarks_protoc_inputs_proto2_message2 =                                 \
+	datasets/google_message2/benchmark_message2.proto
+
+benchmarks_protoc_inputs_proto2_message3 =                                 \
+	datasets/google_message3/benchmark_message3.proto                        \
+	datasets/google_message3/benchmark_message3_1.proto                      \
+	datasets/google_message3/benchmark_message3_2.proto                      \
+	datasets/google_message3/benchmark_message3_3.proto                      \
+	datasets/google_message3/benchmark_message3_4.proto                      \
+	datasets/google_message3/benchmark_message3_5.proto                      \
+	datasets/google_message3/benchmark_message3_6.proto                      \
+	datasets/google_message3/benchmark_message3_7.proto                      \
+	datasets/google_message3/benchmark_message3_8.proto
+
+benchmarks_protoc_inputs_proto2_message4 =                                 \
+	datasets/google_message4/benchmark_message4.proto                        \
+	datasets/google_message4/benchmark_message4_1.proto                      \
+	datasets/google_message4/benchmark_message4_2.proto                      \
+	datasets/google_message4/benchmark_message4_3.proto
+
+if USE_EXTERNAL_PROTOC
+
+go_protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) 
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_benchmark_wrapper)
+	touch protoc_middleman
+
+go_protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2_message1) $(benchmarks_protoc_inputs_proto2_message2) $(benchmarks_protoc_inputs_proto2_message3) $(benchmarks_protoc_inputs_proto2_message4)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message1)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message2)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message3)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message4)
+	touch protoc_middleman2
+
+else
+
+# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
+# relative to srcdir, which may not be the same as the current directory when
+# building out-of-tree.
+go_protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_benchmark_wrapper) )
+	touch protoc_middleman
+
+go_protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2_message1) $(benchmarks_protoc_inputs_proto2_message2) $(benchmarks_protoc_inputs_proto2_message3) $(benchmarks_protoc_inputs_proto2_message4) $(well_known_type_protoc_inputs)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message1) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message2) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message3) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message4) )
+	touch protoc_middleman2
+
+endif
+
+go-benchmark: go_protoc_middleman go_protoc_middleman2 
+	@echo "Writing shortcut script go-benchmark..."
+	@echo '#! /bin/sh' > go-benchmark
+	@echo 'mkdir tmp_cc && mv *.cc tmp_cc' >> go-benchmark
+	@echo 'for file in $$@; do' >> go-benchmark
+	@echo '  echo "Testing go benchmark for data file: $$file";' >> go-benchmark
+	@echo '  go test -bench=. -- $$file;' >> go-benchmark
+	@echo 'done' >> go-benchmark
+	@echo 'mv tmp_cc/* . && rm -rf tmp_cc' >> go-benchmark
+	@chmod +x go-benchmark
+
+go: go_protoc_middleman go_protoc_middleman2 go-benchmark
+	./go-benchmark $(all_data)
+
+############# GO RULES END ##############
+
+
 MAINTAINERCLEANFILES =                                                     \
 	Makefile.in
 
@@ -241,7 +318,10 @@ CLEANFILES =                                                               \
 	python_cpp_proto_library                                                 \
 	python-pure-python-benchmark                                             \
 	python-cpp-reflection-benchmark                                          \
-	python-cpp-generated-code-benchmark
+	python-cpp-generated-code-benchmark                                      \
+	go-benchmark                                                             \
+	go_protoc_middleman                                                      \
+	go_protoc_middleman2
 
 clean-local:
 	-rm -rf tmp/*
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 459c7b9d3..183280ff5 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -36,6 +36,18 @@ $ sudo apt-get install python3-dev
 ```
 And you also need to make sure `pkg-config` is installed.
 
+### Go
+Golang protobuf is under [golang repo](https://github.com/golang/protobuf), you
+need to install golang and golang protobuf's proto and protoc-gen-go before
+running the benchmark. And we're using [testing package](https://golang.org/pkg/testing/)
+to test benchmark. 
+
+To install golang protobuf proto and protoc-gen-go, you can 
+```
+$ go get -u github.com/golang/protobuf/protoc-gen-go
+$ go get -u github.com/golang/protobuf/proto
+```
+
 ### Big data
 
 There's some optional big testing data which is not included in the directory
@@ -87,6 +99,11 @@ $ make python-cpp-reflection
 $ make python-cpp-generated-code
 ```
 
+### Go
+```
+$ make go
+```
+
 To run a specific dataset:
 
 ### Java:
@@ -126,6 +143,13 @@ $ make python-cpp-generated-code-benchmark
 $ ./python-cpp-generated-code-benchmark $(specific generated dataset file name)
 ```
 
+### Go:
+```
+$ make go-benchmark
+$ ./go-benchmark $(specific generated dataset file name)
+```
+
+
 ## Benchmark datasets
 
 Each data set is in the format of benchmarks.proto:
diff --git a/benchmarks/cpp_benchmark.cc b/benchmarks/cpp_benchmark.cc
index 0ba4dc52d..f8b552917 100644
--- a/benchmarks/cpp_benchmark.cc
+++ b/benchmarks/cpp_benchmark.cc
@@ -32,8 +32,8 @@
 #include <iostream>
 #include "benchmark/benchmark_api.h"
 #include "benchmarks.pb.h"
-#include "datasets/google_message1/benchmark_message1_proto2.pb.h"
-#include "datasets/google_message1/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
 #include "datasets/google_message2/benchmark_message2.pb.h"
 #include "datasets/google_message3/benchmark_message3.pb.h"
 #include "datasets/google_message4/benchmark_message4.pb.h"
diff --git a/benchmarks/datasets/google_message1/benchmark_message1_proto2.proto b/benchmarks/datasets/google_message1/proto2/benchmark_message1_proto2.proto
similarity index 100%
rename from benchmarks/datasets/google_message1/benchmark_message1_proto2.proto
rename to benchmarks/datasets/google_message1/proto2/benchmark_message1_proto2.proto
diff --git a/benchmarks/datasets/google_message1/dataset.google_message1_proto2.pb b/benchmarks/datasets/google_message1/proto2/dataset.google_message1_proto2.pb
similarity index 100%
rename from benchmarks/datasets/google_message1/dataset.google_message1_proto2.pb
rename to benchmarks/datasets/google_message1/proto2/dataset.google_message1_proto2.pb
diff --git a/benchmarks/datasets/google_message1/benchmark_message1_proto3.proto b/benchmarks/datasets/google_message1/proto3/benchmark_message1_proto3.proto
similarity index 100%
rename from benchmarks/datasets/google_message1/benchmark_message1_proto3.proto
rename to benchmarks/datasets/google_message1/proto3/benchmark_message1_proto3.proto
diff --git a/benchmarks/datasets/google_message1/dataset.google_message1_proto3.pb b/benchmarks/datasets/google_message1/proto3/dataset.google_message1_proto3.pb
similarity index 100%
rename from benchmarks/datasets/google_message1/dataset.google_message1_proto3.pb
rename to benchmarks/datasets/google_message1/proto3/dataset.google_message1_proto3.pb
diff --git a/benchmarks/go_benchmark_test.go b/benchmarks/go_benchmark_test.go
new file mode 100644
index 000000000..6f10d8135
--- /dev/null
+++ b/benchmarks/go_benchmark_test.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"errors"
+	"io/ioutil"
+	"flag"
+	"testing"
+	"os"
+
+	benchmarkWrapper "./tmp"
+	proto "github.com/golang/protobuf/proto"
+	googleMessage1Proto3 "./tmp/datasets/google_message1/proto3"
+	googleMessage1Proto2 "./tmp/datasets/google_message1/proto2"
+	googleMessage2 "./tmp/datasets/google_message2"
+	googleMessage3 "./tmp/datasets/google_message3"
+	googleMessage4 "./tmp/datasets/google_message4"
+
+)
+
+// Data is returned by the Load function.
+type Data struct {
+	// Marshalled is a slice of marshalled protocol
+	// buffers. 1:1 with Unmarshalled.
+	Marshalled [][]byte
+
+	// Unmarshalled is a slice of unmarshalled protocol
+	// buffers. 1:1 with Marshalled.
+	Unmarshalled []proto.Message
+
+	count int
+}
+
+var data *Data
+var counter int
+
+type GetDefaultInstanceFunction func() proto.Message
+var getDefaultInstance GetDefaultInstanceFunction
+
+// This is used to getDefaultInstance for a message type.
+func generateGetDefaltInstanceFunction(dataset benchmarkWrapper.BenchmarkDataset) error {
+	switch dataset.MessageName {
+	case "benchmarks.proto3.GoogleMessage1":
+		getDefaultInstance = func() proto.Message { return &googleMessage1Proto3.GoogleMessage1{} }
+		return nil
+	case "benchmarks.proto2.GoogleMessage1":
+		getDefaultInstance = func() proto.Message { return &googleMessage1Proto2.GoogleMessage1{} }
+		return nil
+	case "benchmarks.proto2.GoogleMessage2":
+		getDefaultInstance = func() proto.Message { return &googleMessage2.GoogleMessage2{} }
+		return nil
+	case "benchmarks.google_message3.GoogleMessage3":
+		getDefaultInstance = func() proto.Message { return &googleMessage3.GoogleMessage3{} }
+		return nil
+	case "benchmarks.google_message4.GoogleMessage4":
+		getDefaultInstance = func() proto.Message { return &googleMessage4.GoogleMessage4{} }
+		return nil
+	default:
+		return errors.New("Unknown message type: " + dataset.MessageName)
+	}
+}
+
+func TestMain(m *testing.M) {
+	flag.Parse()
+	data = new(Data)
+	rawData, error := ioutil.ReadFile(flag.Arg(0))
+	if error != nil {
+		panic("Couldn't find file" + flag.Arg(0))
+	}
+	var dataset benchmarkWrapper.BenchmarkDataset
+
+	if err1 := proto.Unmarshal(rawData, &dataset); err1 != nil {
+		panic("The raw input data can't be parse into BenchmarkDataset message.")
+	}
+
+	generateGetDefaltInstanceFunction(dataset)
+
+	for _, payload := range dataset.Payload {
+		data.Marshalled = append(data.Marshalled, payload)
+		m := getDefaultInstance()
+		proto.Unmarshal(payload, m)
+		data.Unmarshalled = append(data.Unmarshalled, m)
+	}
+	data.count = len(data.Unmarshalled)
+
+	os.Exit(m.Run())
+}
+
+func BenchmarkUnmarshal(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		payload := data.Marshalled[counter % data.count]
+		out := getDefaultInstance()
+		if err := proto.Unmarshal(payload, out); err != nil {
+			b.Fatalf("can't unmarshal message %d %v", counter % data.count, err)
+		}
+		counter++
+	}
+}
+
+func BenchmarkMarshal(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		m := data.Unmarshalled[counter % data.count]
+		if _, err := proto.Marshal(m); err != nil {
+			b.Fatalf("can't marshal message %d %+v: %v", counter % data.count, m, err)
+		}
+		counter++
+	}
+}
+
+func BenchmarkSize(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		proto.Size(data.Unmarshalled[counter % data.count])
+		counter++
+	}
+}
+
+func BenchmarkClone(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		proto.Clone(data.Unmarshalled[counter % data.count])
+		counter++
+	}
+}
+
+func BenchmarkMerge(b *testing.B) {
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		out := getDefaultInstance()
+		proto.Merge(out, data.Unmarshalled[counter % data.count])
+		counter++
+	}
+}
+
diff --git a/benchmarks/py_benchmark.py b/benchmarks/py_benchmark.py
index 48234f033..8858f4aac 100755
--- a/benchmarks/py_benchmark.py
+++ b/benchmarks/py_benchmark.py
@@ -17,8 +17,8 @@ elif sys.argv[1] != "false":
   raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
 # END CPP GENERATED MESSAGE
 
-import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
-import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
+import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
+import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
 import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
 import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
 import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2