Commit Brian Olson's gzip stream implementations.

2009-04-16 22:30:56 +00:00 · 2009-04-16 22:30:56 +00:00 · e59427a62c
commit e59427a62c
parent f663b1602d
12 changed files with 864 additions and 4 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -19,6 +19,9 @@
  * Message interface has method ParseFromBoundedZeroCopyStream() which parses
    a limited number of bytes from an input stream rather than parsing until
    EOF.
+  * GzipInputStream and GzipOutputStream support reading/writing gzip- or
+    zlib-compressed streams if zlib is available.
+    (google/protobuf/io/gzip_stream.h)

  Java
  * Fixed bug where Message.mergeFrom(Message) failed to merge extensions.
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@ -34,7 +34,7 @@ Documentation:
 Maven packaging:
  Gregory Kick <gak@google.com>

-Non-Google patch contributors:
+Patch contributors:
  Kevin Ko <kevin.s.ko@gmail.com>
    * Small patch to handle trailing slashes in --proto_path flag.
  Johan Euphrosine <proppy@aminche.com>
@ -57,3 +57,5 @@ Non-Google patch contributors:
    * Slicing support for repeated scalar fields for the Python API.
  Oleg Smolsky <oleg.smolsky@gmail.com>
    * MS Visual Studio error format option.
+  Brian Olson <brianolson@google.com>
+    * gzip/zlib I/O support.
--- a/configure.ac
+++ b/configure.ac
@ -15,6 +15,11 @@ AC_CONFIG_HEADERS([config.h])
 AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE

+AC_ARG_WITH([zlib],
+  [AS_HELP_STRING([--with-zlib],
+    [include classes for streaming compressed data in and out @<:@default=check@:>@])],
+  [],[with_zlib=check])
+
 # Checks for programs.
 AC_PROG_CC
 AC_PROG_CXX
@ -38,6 +43,16 @@ AC_FUNC_MEMCMP
 AC_FUNC_STRTOD
 AC_CHECK_FUNCS([ftruncate memset mkdir strchr strerror strtol])

+HAVE_ZLIB=0
+AS_IF([test "$with_zlib" != no],
+  [AC_SEARCH_LIBS([zlibVersion], [z],
+    [AC_DEFINE([HAVE_ZLIB], [1], [Enable classes using zlib compression.])
+     HAVE_ZLIB=1],
+    [if test "$with_zlib" != check; then
+      AC_MSG_FAILURE([--with-zlib was given, but test for zlib failed])
+     fi])])
+AM_CONDITIONAL([HAVE_ZLIB], [test $HAVE_ZLIB = 1])
+
 ACX_PTHREAD
 AC_CXX_STL_HASH

--- a/src/Makefile.am
+++ b/src/Makefile.am
@ -1,5 +1,15 @@
 ## Process this file with automake to produce Makefile.in

+if HAVE_ZLIB
+GZCHECKPROGRAMS = zcgzip zcgunzip
+GZHEADERS = google/protobuf/io/gzip_stream.h
+GZTESTS = google/protobuf/io/gzip_stream_unittest.sh
+else
+GZCHECKPROGRAMS =
+GZHEADERS =
+GZTESTS =
+endif
+
 if GCC
 # These are good warnings to turn on by default
 AM_CXXFLAGS = $(PTHREAD_CFLAGS) -Wall -Wwrite-strings -Woverloaded-virtual -Wno-sign-compare
@ -40,6 +50,7 @@ nobase_include_HEADERS =                                       \
  google/protobuf/wire_format.h                                \
  google/protobuf/wire_format_inl.h                            \
  google/protobuf/io/coded_stream.h                            \
+  $(GZHEADERS)                                                 \
  google/protobuf/io/printer.h                                 \
  google/protobuf/io/tokenizer.h                               \
  google/protobuf/io/zero_copy_stream.h                        \
@ -83,6 +94,7 @@ libprotobuf_la_SOURCES =                                       \
  google/protobuf/unknown_field_set.cc                         \
  google/protobuf/wire_format.cc                               \
  google/protobuf/io/coded_stream.cc                           \
+  google/protobuf/io/gzip_stream.cc                            \
  google/protobuf/io/printer.cc                                \
  google/protobuf/io/tokenizer.cc                              \
  google/protobuf/io/zero_copy_stream.cc                       \
@ -159,6 +171,8 @@ protoc_inputs =                                                \
 EXTRA_DIST =                                                   \
  $(protoc_inputs)                                             \
  solaris/libstdc++.la                                         \
+  google/protobuf/io/gzip_stream.h                             \
+  google/protobuf/io/gzip_stream_unittest.sh                   \
  google/protobuf/testdata/golden_message                      \
  google/protobuf/testdata/golden_packed_fields_message        \
  google/protobuf/testdata/text_format_unittest_data.txt       \
@ -206,7 +220,7 @@ unittest_proto_middleman: protoc$(EXEEXT) $(protoc_inputs)

 $(protoc_outputs): unittest_proto_middleman

-check_PROGRAMS = protobuf-test
+check_PROGRAMS = protobuf-test $(GZCHECKPROGRAMS)
 protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la
 protobuf_test_SOURCES =                                        \
  google/protobuf/stubs/common_unittest.cc                     \
@ -256,6 +270,14 @@ protobuf_test_SOURCES =                                        \
  gtest/internal/gtest-port.h                                  \
  gtest/internal/gtest-string.h

+if HAVE_ZLIB
+zcgzip_LDADD = $(PTHREAD_LIBS) libprotobuf.la
+zcgzip_SOURCES = google/protobuf/testing/zcgzip.cc
+
+zcgunzip_LDADD = $(PTHREAD_LIBS) libprotobuf.la
+zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc
+endif
+
 nodist_protobuf_test_SOURCES = $(protoc_outputs)

-TESTS = protobuf-test
+TESTS = protobuf-test $(GZTESTS)
--- a/src/google/protobuf/io/gzip_stream.cc
+++ b/src/google/protobuf/io/gzip_stream.cc
@ -0,0 +1,296 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2009 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the implementation of classes GzipInputStream and
+// GzipOutputStream.
+
+#include "config.h"
+
+#if HAVE_ZLIB
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+static const int kDefaultBufferSize = 65536;
+
+GzipInputStream::GzipInputStream(
+    ZeroCopyInputStream* sub_stream, Format format, int buffer_size)
+    : format_(format), sub_stream_(sub_stream), zerror_(Z_OK) {
+  zcontext_.zalloc = Z_NULL;
+  zcontext_.zfree = Z_NULL;
+  zcontext_.opaque = Z_NULL;
+  zcontext_.total_out = 0;
+  zcontext_.next_in = NULL;
+  zcontext_.avail_in = 0;
+  zcontext_.total_in = 0;
+  zcontext_.msg = NULL;
+  if (buffer_size == -1) {
+    output_buffer_length_ = kDefaultBufferSize;
+  } else {
+    output_buffer_length_ = buffer_size;
+  }
+  output_buffer_ = operator new(output_buffer_length_);
+  GOOGLE_CHECK(output_buffer_ != NULL);
+  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+  zcontext_.avail_out = output_buffer_length_;
+  output_position_ = output_buffer_;
+}
+GzipInputStream::~GzipInputStream() {
+  operator delete(output_buffer_);
+  zerror_ = inflateEnd(&zcontext_);
+}
+
+int GzipInputStream::Inflate(int flush) {
+  if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
+    // previous inflate filled output buffer. don't change input params yet.
+  } else if (zcontext_.avail_in == 0) {
+    const void* in;
+    int in_size;
+    bool first = zcontext_.next_in == NULL;
+    bool ok = sub_stream_->Next(&in, &in_size);
+    if (!ok) {
+      zcontext_.next_out = NULL;
+      zcontext_.avail_out = 0;
+      return Z_STREAM_END;
+    }
+    zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
+    zcontext_.avail_in = in_size;
+    if (first) {
+      int windowBitsFormat = 0;
+      switch (format_) {
+        case GZIP: windowBitsFormat = 16; break;
+        case AUTO: windowBitsFormat = 32; break;
+        case ZLIB: windowBitsFormat = 0; break;
+      }
+      int error = inflateInit2(&zcontext_,
+        /* windowBits */15 | windowBitsFormat);
+      if (error != Z_OK) {
+        return error;
+      }
+    }
+  }
+  zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+  zcontext_.avail_out = output_buffer_length_;
+  output_position_ = output_buffer_;
+  int error = inflate(&zcontext_, flush);
+  return error;
+}
+
+void GzipInputStream::DoNextOutput(const void** data, int* size) {
+  *data = output_position_;
+  *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_);
+  output_position_ = zcontext_.next_out;
+}
+
+// implements ZeroCopyInputStream ----------------------------------
+bool GzipInputStream::Next(const void** data, int* size) {
+  bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END)
+      || (zerror_ == Z_BUF_ERROR);
+  if ((!ok) || (zcontext_.next_out == NULL)) {
+    return false;
+  }
+  if (zcontext_.next_out != output_position_) {
+    DoNextOutput(data, size);
+    return true;
+  }
+  if (zerror_ == Z_STREAM_END) {
+    *data = NULL;
+    *size = 0;
+    return false;
+  }
+  zerror_ = Inflate(Z_NO_FLUSH);
+  if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
+    // The underlying stream's Next returned false inside Inflate.
+    return false;
+  }
+  ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END)
+      || (zerror_ == Z_BUF_ERROR);
+  if (!ok) {
+    return false;
+  }
+  DoNextOutput(data, size);
+  return true;
+}
+void GzipInputStream::BackUp(int count) {
+  output_position_ = reinterpret_cast<void*>(
+      reinterpret_cast<uintptr_t>(output_position_) - count);
+}
+bool GzipInputStream::Skip(int count) {
+  const void* data;
+  int size;
+  bool ok = Next(&data, &size);
+  while (ok && (size < count)) {
+    count -= size;
+    ok = Next(&data, &size);
+  }
+  if (size > count) {
+    BackUp(size - count);
+  }
+  return ok;
+}
+int64 GzipInputStream::ByteCount() const {
+  return zcontext_.total_out +
+    (((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_));
+}
+
+// =========================================================================
+
+GzipOutputStream::GzipOutputStream(
+    ZeroCopyOutputStream* sub_stream, Format format, int buffer_size)
+  : sub_stream_(sub_stream), sub_data_(NULL), sub_data_size_(0) {
+  if (buffer_size == -1) {
+    input_buffer_length_ = kDefaultBufferSize;
+  } else {
+    input_buffer_length_ = buffer_size;
+  }
+  input_buffer_ = operator new(input_buffer_length_);
+  GOOGLE_CHECK(input_buffer_ != NULL);
+
+  zcontext_.zalloc = Z_NULL;
+  zcontext_.zfree = Z_NULL;
+  zcontext_.opaque = Z_NULL;
+  zcontext_.next_out = NULL;
+  zcontext_.avail_out = 0;
+  zcontext_.total_out = 0;
+  zcontext_.next_in = NULL;
+  zcontext_.avail_in = 0;
+  zcontext_.total_in = 0;
+  zcontext_.msg = NULL;
+  // default to GZIP format
+  int windowBitsFormat = 16;
+  if (format == ZLIB) {
+    windowBitsFormat = 0;
+  }
+  zerror_ = deflateInit2(
+      &zcontext_,
+      Z_BEST_COMPRESSION,
+      Z_DEFLATED,
+      /* windowBits */15 | windowBitsFormat,
+      /* memLevel (default) */8,
+      Z_DEFAULT_STRATEGY);
+}
+GzipOutputStream::~GzipOutputStream() {
+  Close();
+  if (input_buffer_ != NULL) {
+    operator delete(input_buffer_);
+  }
+}
+
+// private
+int GzipOutputStream::Deflate(int flush) {
+  int error = Z_OK;
+  do {
+    if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) {
+      bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_);
+      if (!ok) {
+        sub_data_ = NULL;
+        sub_data_size_ = 0;
+        return Z_BUF_ERROR;
+      }
+      GOOGLE_CHECK_GT(sub_data_size_, 0);
+      zcontext_.next_out = static_cast<Bytef*>(sub_data_);
+      zcontext_.avail_out = sub_data_size_;
+    }
+    error = deflate(&zcontext_, flush);
+  } while (error == Z_OK && zcontext_.avail_out == 0);
+  if (((flush == Z_FULL_FLUSH) || (flush == Z_FINISH))
+      && (zcontext_.avail_out != sub_data_size_)) {
+    // Notify lower layer of data.
+    sub_stream_->BackUp(zcontext_.avail_out);
+    // We don't own the buffer anymore.
+    sub_data_ = NULL;
+    sub_data_size_ = 0;
+  }
+  return error;
+}
+
+// implements ZeroCopyOutputStream ---------------------------------
+bool GzipOutputStream::Next(void** data, int* size) {
+  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+    return false;
+  }
+  if (zcontext_.avail_in != 0) {
+    zerror_ = Deflate(Z_NO_FLUSH);
+    if (zerror_ != Z_OK) {
+      return false;
+    }
+  }
+  if (zcontext_.avail_in == 0) {
+    // all input was consumed. reset the buffer.
+    zcontext_.next_in = static_cast<Bytef*>(input_buffer_);
+    zcontext_.avail_in = input_buffer_length_;
+    *data = input_buffer_;
+    *size = input_buffer_length_;
+  } else {
+    // The loop in Deflate should consume all avail_in
+    GOOGLE_LOG(DFATAL) << "Deflate left bytes unconsumed";
+  }
+  return true;
+}
+void GzipOutputStream::BackUp(int count) {
+  GOOGLE_CHECK_GE(zcontext_.avail_in, count);
+  zcontext_.avail_in -= count;
+}
+int64 GzipOutputStream::ByteCount() const {
+  return zcontext_.total_in + zcontext_.avail_in;
+}
+
+bool GzipOutputStream::Flush() {
+  do {
+    zerror_ = Deflate(Z_FULL_FLUSH);
+  } while (zerror_ == Z_OK);
+  return zerror_ == Z_OK;
+}
+
+bool GzipOutputStream::Close() {
+  if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+    return false;
+  }
+  do {
+    zerror_ = Deflate(Z_FINISH);
+  } while (zerror_ == Z_OK);
+  zerror_ = deflateEnd(&zcontext_);
+  bool ok = zerror_ == Z_OK;
+  zerror_ = Z_STREAM_END;
+  return ok;
+}
+
+}  // namespace io
+}  // namespace protobuf
+}  // namespace google
+
+#endif  // HAVE_ZLIB
--- a/src/google/protobuf/io/gzip_stream.h
+++ b/src/google/protobuf/io/gzip_stream.h
@ -0,0 +1,178 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2009 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the definition for classes GzipInputStream and
+// GzipOutputStream.
+//
+// GzipInputStream decompresses data from an underlying
+// ZeroCopyInputStream and provides the decompressed data as a
+// ZeroCopyInputStream.
+//
+// GzipOutputStream is an ZeroCopyOutputStream that compresses data to
+// an underlying ZeroCopyOutputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
+
+#include <zlib.h>
+
+#include <google/protobuf/io/zero_copy_stream.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A ZeroCopyInputStream that reads compressed data through zlib
+class LIBPROTOBUF_EXPORT GzipInputStream : public ZeroCopyInputStream {
+ public:
+  // Format key for constructor
+  enum Format {
+    // zlib will autodetect gzip header or deflate stream
+    AUTO = 0,
+
+    // GZIP streams have some extra header data for file attributes.
+    GZIP = 1,
+
+    // Simpler zlib stream format.
+    ZLIB = 2,
+  };
+
+  // buffer_size and format may be -1 for default of 64kB and GZIP format
+  explicit GzipInputStream(
+      ZeroCopyInputStream* sub_stream,
+      Format format = AUTO,
+      int buffer_size = -1);
+  virtual ~GzipInputStream();
+
+  // Return last error message or NULL if no error.
+  inline const char* ZlibErrorMessage() const {
+    return zcontext_.msg;
+  }
+  inline int ZlibErrorCode() const {
+    return zerror_;
+  }
+
+  // implements ZeroCopyInputStream ----------------------------------
+  bool Next(const void** data, int* size);
+  void BackUp(int count);
+  bool Skip(int count);
+  int64 ByteCount() const;
+
+ private:
+  Format format_;
+
+  ZeroCopyInputStream* sub_stream_;
+
+  z_stream zcontext_;
+  int zerror_;
+
+  void* output_buffer_;
+  void* output_position_;
+  size_t output_buffer_length_;
+
+  int Inflate(int flush);
+  void DoNextOutput(const void** data, int* size);
+
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipInputStream);
+};
+
+
+class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
+ public:
+  // Format key for constructor
+  enum Format {
+    // GZIP streams have some extra header data for file attributes.
+    GZIP = 1,
+
+    // Simpler zlib stream format.
+    ZLIB = 2,
+  };
+
+  // buffer_size and format may be -1 for default of 64kB and GZIP format
+  explicit GzipOutputStream(
+      ZeroCopyOutputStream* sub_stream,
+      Format format = GZIP,
+      int buffer_size = -1);
+  virtual ~GzipOutputStream();
+
+  // Return last error message or NULL if no error.
+  inline const char* ZlibErrorMessage() const {
+    return zcontext_.msg;
+  }
+  inline int ZlibErrorCode() const {
+    return zerror_;
+  }
+
+  // Flushes data written so far to zipped data in the underlying stream.
+  // It is the caller's responsibility to flush the underlying stream if
+  // necessary.
+  // Compression may be less efficient stopping and starting around flushes.
+  // Returns true if no error.
+  bool Flush();
+
+  // Writes out all data and closes the gzip stream.
+  // It is the caller's responsibility to close the underlying stream if
+  // necessary.
+  // Returns true if no error.
+  bool Close();
+
+  // implements ZeroCopyOutputStream ---------------------------------
+  bool Next(void** data, int* size);
+  void BackUp(int count);
+  int64 ByteCount() const;
+
+ private:
+  ZeroCopyOutputStream* sub_stream_;
+  // Result from calling Next() on sub_stream_
+  void* sub_data_;
+  int sub_data_size_;
+
+  z_stream zcontext_;
+  int zerror_;
+  void* input_buffer_;
+  size_t input_buffer_length_;
+
+  // Do some compression.
+  // Takes zlib flush mode.
+  // Returns zlib error code.
+  int Deflate(int flush);
+
+  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipOutputStream);
+};
+
+}  // namespace io
+}  // namespace protobuf
+}  // namespace google
+
+#endif  // GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
--- a/src/google/protobuf/io/gzip_stream_unittest.sh
+++ b/src/google/protobuf/io/gzip_stream_unittest.sh
@ -0,0 +1,44 @@
+#!/bin/sh -x
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2009 Google Inc.  All rights reserved.
+# http://code.google.com/p/protobuf/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: brianolson@google.com (Brian Olson)
+#
+# Test compatibility between command line gzip/gunzip binaries and
+# ZeroCopyStream versions.
+
+TESTFILE=Makefile
+
+(./zcgzip < ${TESTFILE} | gunzip | cmp - ${TESTFILE}) && \
+(gzip < ${TESTFILE} | ./zcgunzip | cmp - ${TESTFILE})
+
+# Result of "(cmd) && (cmd)" implicitly becomes result of this script
+# and thus the test.
--- a/src/google/protobuf/io/zero_copy_stream_impl.cc
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@ -528,6 +528,10 @@ bool FileOutputStream::Close() {
  return copying_output_.Close() && flush_succeeded;
 }

+bool FileOutputStream::Flush() {
+  return impl_.Flush();
+}
+
 bool FileOutputStream::Next(void** data, int* size) {
  return impl_.Next(data, size);
 }
--- a/src/google/protobuf/io/zero_copy_stream_impl.h
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@ -431,6 +431,11 @@ class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream {
  // Even if an error occurs, the file descriptor is closed when this returns.
  bool Close();

+  // Flushes FileOutputStream's buffers but does not close the
+  // underlying file. No special measures are taken to ensure that
+  // underlying operating system file object is synchronized to disk.
+  bool Flush();
+
  // By default, the file descriptor is not closed when the stream is
  // destroyed.  Call SetCloseOnDelete(true) to change that.  WARNING:
  // This leaves no way for the caller to detect if close() fails.  If
--- a/src/google/protobuf/io/zero_copy_stream_unittest.cc
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@ -46,6 +46,8 @@
 //   "parametized tests" so that one set of tests can be used on all the
 //   implementations.

+#include "config.h"
+
 #ifdef _MSC_VER
 #include <io.h>
 #else
@ -59,6 +61,9 @@
 #include <sstream>

 #include <google/protobuf/io/zero_copy_stream_impl.h>
+#if HAVE_ZLIB
+#include <google/protobuf/io/gzip_stream.h>
+#endif

 #include <google/protobuf/stubs/common.h>
 #include <google/protobuf/testing/googletest.h>
@ -141,6 +146,8 @@ bool IoTest::WriteToOutput(ZeroCopyOutputStream* output,
  }
 }

+#define MAX_REPEATED_ZEROS 100
+
 int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) {
  uint8* out = reinterpret_cast<uint8*>(data);
  int out_size = size;
@ -148,11 +155,19 @@ int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) {
  const void* in;
  int in_size = 0;

+  int repeated_zeros = 0;
+
  while (true) {
    if (!input->Next(&in, &in_size)) {
      return size - out_size;
    }
-    EXPECT_GT(in_size, 0);
+    EXPECT_GT(in_size, -1);
+    if (in_size == 0) {
+      repeated_zeros++;
+    } else {
+      repeated_zeros = 0;
+    }
+    EXPECT_LT(repeated_zeros, MAX_REPEATED_ZEROS);

    if (out_size <= in_size) {
      memcpy(out, in, out_size);
@ -263,6 +278,95 @@ TEST_F(IoTest, ArrayIo) {
  }
 }

+#if HAVE_ZLIB
+TEST_F(IoTest, GzipIo) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  for (int i = 0; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      for (int z = 0; z < kBlockSizeCount; z++) {
+        int gzip_buffer_size = kBlockSizes[z];
+        int size;
+        {
+          ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+          GzipOutputStream gzout(
+              &output, GzipOutputStream::GZIP, gzip_buffer_size);
+          WriteStuff(&gzout);
+          gzout.Close();
+          size = output.ByteCount();
+        }
+        {
+          ArrayInputStream input(buffer, size, kBlockSizes[j]);
+          GzipInputStream gzin(
+              &input, GzipInputStream::GZIP, gzip_buffer_size);
+          ReadStuff(&gzin);
+        }
+      }
+    }
+  }
+  delete [] buffer;
+}
+
+TEST_F(IoTest, ZlibIo) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  for (int i = 0; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      for (int z = 0; z < kBlockSizeCount; z++) {
+        int gzip_buffer_size = kBlockSizes[z];
+        int size;
+        {
+          ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+          GzipOutputStream gzout(
+              &output, GzipOutputStream::ZLIB, gzip_buffer_size);
+          WriteStuff(&gzout);
+          gzout.Close();
+          size = output.ByteCount();
+        }
+        {
+          ArrayInputStream input(buffer, size, kBlockSizes[j]);
+          GzipInputStream gzin(
+              &input, GzipInputStream::ZLIB, gzip_buffer_size);
+          ReadStuff(&gzin);
+        }
+      }
+    }
+  }
+  delete [] buffer;
+}
+
+TEST_F(IoTest, ZlibIoInputAutodetect) {
+  const int kBufferSize = 2*1024;
+  uint8* buffer = new uint8[kBufferSize];
+  int size;
+  {
+    ArrayOutputStream output(buffer, kBufferSize);
+    GzipOutputStream gzout(&output, GzipOutputStream::ZLIB);
+    WriteStuff(&gzout);
+    gzout.Close();
+    size = output.ByteCount();
+  }
+  {
+    ArrayInputStream input(buffer, size);
+    GzipInputStream gzin(&input, GzipInputStream::AUTO);
+    ReadStuff(&gzin);
+  }
+  {
+    ArrayOutputStream output(buffer, kBufferSize);
+    GzipOutputStream gzout(&output, GzipOutputStream::GZIP);
+    WriteStuff(&gzout);
+    gzout.Close();
+    size = output.ByteCount();
+  }
+  {
+    ArrayInputStream input(buffer, size);
+    GzipInputStream gzin(&input, GzipInputStream::AUTO);
+    ReadStuff(&gzin);
+  }
+  delete [] buffer;
+}
+#endif
+
 // There is no string input, only string output.  Also, it doesn't support
 // explicit block sizes.  So, we'll only run one test and we'll use
 // ArrayInput to read back the results.
@ -310,6 +414,41 @@ TEST_F(IoTest, FileIo) {
  }
 }

+#if HAVE_ZLIB
+TEST_F(IoTest, GzipFileIo) {
+  string filename = TestTempDir() + "/zero_copy_stream_test_file";
+
+  for (int i = 0; i < kBlockSizeCount; i++) {
+    for (int j = 0; j < kBlockSizeCount; j++) {
+      // Make a temporary file.
+      int file =
+        open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777);
+      ASSERT_GE(file, 0);
+      {
+        FileOutputStream output(file, kBlockSizes[i]);
+        GzipOutputStream gzout(&output);
+        WriteStuffLarge(&gzout);
+        gzout.Close();
+        output.Flush();
+        EXPECT_EQ(0, output.GetErrno());
+      }
+
+      // Rewind.
+      ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1);
+
+      {
+        FileInputStream input(file, kBlockSizes[j]);
+        GzipInputStream gzin(&input);
+        ReadStuffLarge(&gzin);
+        EXPECT_EQ(0, input.GetErrno());
+      }
+
+      close(file);
+    }
+  }
+}
+#endif
+
 // MSVC raises various debugging exceptions if we try to use a file
 // descriptor of -1, defeating our tests below.  This class will disable
 // these debug assertions while in scope.
--- a/src/google/protobuf/testing/zcgunzip.cc
+++ b/src/google/protobuf/testing/zcgunzip.cc
@ -0,0 +1,73 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2009 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Test program to verify that GzipInputStream is compatible with command line
+// gunzip or java.util.zip.GzipInputStream
+//
+// Reads gzip stream on standard input and writes decompressed data to standard
+// output.
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+using google::protobuf::io::FileInputStream;
+using google::protobuf::io::GzipInputStream;
+
+int main(int argc, const char** argv) {
+  FileInputStream fin(STDIN_FILENO);
+  GzipInputStream in(&fin);
+
+  while (true) {
+    const void* inptr;
+    int inlen;
+    bool ok;
+    ok = in.Next(&inptr, &inlen);
+    if (!ok) {
+      break;
+    }
+    if (inlen > 0) {
+      int err = write(STDOUT_FILENO, inptr, inlen);
+      assert(err == inlen);
+    }
+  }
+
+  return 0;
+}
--- a/src/google/protobuf/testing/zcgzip.cc
+++ b/src/google/protobuf/testing/zcgzip.cc
@ -0,0 +1,79 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2009 Google Inc.  All rights reserved.
+// http://code.google.com/p/protobuf/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//  Based on original Protocol Buffers design by
+//  Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Test program to verify that GzipOutputStream is compatible with command line
+// gzip or java.util.zip.GzipOutputStream
+//
+// Reads data on standard input and writes compressed gzip stream to standard
+// output.
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+using google::protobuf::io::FileOutputStream;
+using google::protobuf::io::GzipOutputStream;
+
+int main(int argc, const char** argv) {
+  FileOutputStream fout(STDOUT_FILENO);
+  GzipOutputStream out(&fout);
+  int readlen;
+
+  while (true) {
+    void* outptr;
+    int outlen;
+    bool ok;
+    do {
+      ok = out.Next(&outptr, &outlen);
+      if (!ok) {
+        break;
+      }
+    } while (outlen <= 0);
+    readlen = read(STDIN_FILENO, outptr, outlen);
+    if (readlen <= 0) {
+      out.BackUp(outlen);
+      break;
+    }
+    if (readlen < outlen) {
+      out.BackUp(outlen - readlen);
+    }
+  }
+
+  return 0;
+}