From 4e43931eaf897d719bc718d5baf1ab84444fdfee Mon Sep 17 00:00:00 2001
From: Thomas Van Lenten <thomasvl@google.com>
Date: Mon, 6 Jul 2015 12:29:08 -0400
Subject: [PATCH] Add support for a file listing expected package to objc
 prefixes for validation.

- Add a env var to pass a set of expected prefixes for validation.
- Report warnings/errors based on the expected prefixes vs. the data in the files compiled.
- Use some helpers from common directory.
---
 objectivec/generate_descriptors_proto.sh      |   5 +-
 .../compiler/objectivec/objectivec_file.cc    |   3 -
 .../objectivec/objectivec_generator.cc        |   7 +-
 .../objectivec/objectivec_generator.h         |   1 +
 .../compiler/objectivec/objectivec_helpers.cc | 358 ++++++++++++++----
 .../compiler/objectivec/objectivec_helpers.h  |   8 +-
 6 files changed, 288 insertions(+), 94 deletions(-)

diff --git a/objectivec/generate_descriptors_proto.sh b/objectivec/generate_descriptors_proto.sh
index b3ecf3989..84ba07380 100755
--- a/objectivec/generate_descriptors_proto.sh
+++ b/objectivec/generate_descriptors_proto.sh
@@ -11,7 +11,6 @@ set -eu
 
 readonly ScriptDir=$(dirname "$(echo $0 | sed -e "s,^\([^/]\),$(pwd)/\1,")")
 readonly ProtoRootDir="${ScriptDir}/.."
-readonly ProtoC="${ProtoRootDir}/src/protoc"
 
 pushd "${ProtoRootDir}" > /dev/null
 
@@ -35,7 +34,7 @@ fi
 cd src
 make $@ protoc
 
-declare -a RUNTIME_PROTO_FILES=(\
+declare -a RUNTIME_PROTO_FILES=( \
   google/protobuf/any.proto \
   google/protobuf/api.proto \
   google/protobuf/descriptor.proto \
@@ -49,5 +48,3 @@ declare -a RUNTIME_PROTO_FILES=(\
   google/protobuf/wrappers.proto)
 
 ./protoc --objc_out="${ProtoRootDir}/objectivec" ${RUNTIME_PROTO_FILES[@]}
-
-popd > /dev/null
diff --git a/src/google/protobuf/compiler/objectivec/objectivec_file.cc b/src/google/protobuf/compiler/objectivec/objectivec_file.cc
index e60ae5a6e..184a84a37 100644
--- a/src/google/protobuf/compiler/objectivec/objectivec_file.cc
+++ b/src/google/protobuf/compiler/objectivec/objectivec_file.cc
@@ -54,9 +54,6 @@ FileGenerator::FileGenerator(const FileDescriptor *file)
     : file_(file),
       root_class_name_(FileClassName(file)),
       is_public_dep_(false) {
-  // Validate the objc prefix.
-  ValidateObjCClassPrefix(file_);
-
   for (int i = 0; i < file_->enum_type_count(); i++) {
     EnumGenerator *generator = new EnumGenerator(file_->enum_type(i));
     enum_generators_.push_back(generator);
diff --git a/src/google/protobuf/compiler/objectivec/objectivec_generator.cc b/src/google/protobuf/compiler/objectivec/objectivec_generator.cc
index 4449087a1..375b4e0f5 100644
--- a/src/google/protobuf/compiler/objectivec/objectivec_generator.cc
+++ b/src/google/protobuf/compiler/objectivec/objectivec_generator.cc
@@ -57,8 +57,13 @@ bool ObjectiveCGenerator::Generate(const FileDescriptor* file,
     return false;
   }
 
-  FileGenerator file_generator(file);
+  // Validate the objc prefix/package pairing.
+  if (!ValidateObjCClassPrefix(file, error)) {
+    // *error will have been filled in.
+    return false;
+  }
 
+  FileGenerator file_generator(file);
   string filepath = FilePath(file);
 
   // Generate header.
diff --git a/src/google/protobuf/compiler/objectivec/objectivec_generator.h b/src/google/protobuf/compiler/objectivec/objectivec_generator.h
index 24286ac92..09266b042 100644
--- a/src/google/protobuf/compiler/objectivec/objectivec_generator.h
+++ b/src/google/protobuf/compiler/objectivec/objectivec_generator.h
@@ -53,6 +53,7 @@ class LIBPROTOC_EXPORT ObjectiveCGenerator : public CodeGenerator {
  private:
   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ObjectiveCGenerator);
 };
+
 }  // namespace objectivec
 }  // namespace compiler
 }  // namespace protobuf
diff --git a/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc b/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc
index 45d122d16..b724d35c8 100644
--- a/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc
+++ b/src/google/protobuf/compiler/objectivec/objectivec_helpers.cc
@@ -28,10 +28,18 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
 #include <climits>
+#include <errno.h>
+#include <fcntl.h>
 #include <fstream>
 #include <iostream>
 #include <sstream>
+#include <stdlib.h>
 #include <vector>
 
 #include <google/protobuf/stubs/hash.h>
@@ -39,6 +47,7 @@
 #include <google/protobuf/io/coded_stream.h>
 #include <google/protobuf/io/zero_copy_stream_impl.h>
 #include <google/protobuf/descriptor.pb.h>
+#include <google/protobuf/stubs/common.h>
 #include <google/protobuf/stubs/strutil.h>
 
 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
@@ -51,45 +60,6 @@ namespace objectivec {
 
 namespace {
 
-// islower()/isupper()/tolower()/toupper() change based on locale.
-//
-// src/google/protobuf/stubs/strutil.h:150 has the same pattern. For the
-// Objective C plugin, test failures were seen on TravisCI because isupper('A')
-// was coming back false for some server's locale.  This approach avoids any
-// such issues.
-
-bool IsLower(const char c) {
-  return ('a' <= c && c <= 'z');
-}
-
-bool IsUpper(const char c) {
-  return ('A' <= c && c <= 'Z');
-}
-
-char ToLower(char c) {
-  if ('A' <= c && c <= 'Z') {
-    c += 'a' - 'A';
-  }
-  return c;
-}
-
-// toupper() changes based on locale.  We don't want this!
-char ToUpper(char c) {
-  if ('a' <= c && c <= 'z') {
-    c += 'A' - 'a';
-  }
-  return c;
-}
-
-string TrimString(const string& s) {
-  string::size_type start = s.find_first_not_of(" \n\r\t");
-  if (start == string::npos) {
-    return "";
-  }
-  string::size_type end = s.find_last_not_of(" \n\r\t") + 1;
-  return s.substr(start, end - start);
-}
-
 hash_set<string> MakeWordsMap(const char* const words[], size_t num_words) {
   hash_set<string> result;
   for (int i = 0; i < num_words; i++) {
@@ -115,7 +85,7 @@ string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
   bool last_char_was_upper = false;
   for (int i = 0; i < input.size(); i++) {
     char c = input[i];
-    if (c >= '0' && c <= '9') {
+    if (ascii_isdigit(c)) {
       if (!last_char_was_number) {
         values.push_back(current);
         current = "";
@@ -123,7 +93,7 @@ string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
       current += c;
       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
       last_char_was_number = true;
-    } else if (IsLower(c)) {
+    } else if (ascii_islower(c)) {
       // lowercase letter can follow a lowercase or uppercase letter
       if (!last_char_was_lower && !last_char_was_upper) {
         values.push_back(current);
@@ -132,12 +102,12 @@ string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
       current += c;  // already lower
       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
       last_char_was_lower = true;
-    } else if (IsUpper(c)) {
+    } else if (ascii_isupper(c)) {
       if (!last_char_was_upper) {
         values.push_back(current);
         current = "";
       }
-      current += ToLower(c);
+      current += ascii_tolower(c);
       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
       last_char_was_upper = true;
     } else {
@@ -151,7 +121,7 @@ string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
     bool all_upper = (kUpperSegments.count(value) > 0);
     for (int j = 0; j < value.length(); j++) {
       if (j == 0 || all_upper) {
-        value[j] = ToUpper(value[j]);
+        value[j] = ascii_toupper(value[j]);
       } else {
         // Nothing, already in lower.
       }
@@ -163,7 +133,7 @@ string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
     result += *i;
   }
   if ((result.length() != 0) && !first_capitalized) {
-    result[0] = ToLower(result[0]);
+    result[0] = ascii_tolower(result[0]);
   }
   return result;
 }
@@ -272,7 +242,7 @@ bool IsSpecialName(const string& name, const string* special_names,
         // If name is longer than the retained_name[i] that it matches
         // the next character must be not lower case (newton vs newTon vs
         // new_ton).
-        return !IsLower(name[length]);
+        return !ascii_islower(name[length]);
       } else {
         return true;
       }
@@ -342,30 +312,6 @@ string FileClassPrefix(const FileDescriptor* file) {
   return result;
 }
 
-void ValidateObjCClassPrefix(const FileDescriptor* file) {
-  string prefix = file->options().objc_class_prefix();
-  if (prefix.length() > 0) {
-    // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
-    // error cases, so it seems to be ok to use as a back door for errors.
-    if (!IsUpper(prefix[0])) {
-      cerr << endl
-           << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
-           << prefix << "\";' in '" << file->name() << "';"
-           << " it should start with a capital letter."
-           << endl;
-      cerr.flush();
-    }
-    if (prefix.length() < 3) {
-      cerr << endl
-           << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
-           << prefix << "\";' in '" << file->name() << "';"
-           << " Apple recommends they should be at least 3 characters long."
-           << endl;
-      cerr.flush();
-    }
-  }
-}
-
 string FileClassName(const FileDescriptor* file) {
   string name = FileClassPrefix(file);
   name += UnderscoresToCamelCase(StripProto(BaseFileName(file)), true);
@@ -453,10 +399,10 @@ string UnCamelCaseEnumShortName(const string& name) {
   string result;
   for (int i = 0; i < name.size(); i++) {
     char c = name[i];
-    if (i > 0 && c >= 'A' && c <= 'Z') {
+    if (i > 0 && ascii_isupper(c)) {
       result += '_';
     }
-    result += ToUpper(c);
+    result += ascii_toupper(c);
   }
   return result;
 }
@@ -487,7 +433,7 @@ string FieldNameCapitalized(const FieldDescriptor* field) {
   // name.
   string result = FieldName(field);
   if (result.length() > 0) {
-    result[0] = ToUpper(result[0]);
+    result[0] = ascii_toupper(result[0]);
   }
   return result;
 }
@@ -511,7 +457,7 @@ string OneofNameCapitalized(const OneofDescriptor* descriptor) {
   // Use the common handling and then up-case the first letter.
   string result = OneofName(descriptor);
   if (result.length() > 0) {
-    result[0] = ToUpper(result[0]);
+    result[0] = ascii_toupper(result[0]);
   }
   return result;
 }
@@ -526,8 +472,8 @@ string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) {
   }
   if (field->type() == FieldDescriptor::TYPE_GROUP) {
     if (worker.length() > 0) {
-      if (worker[0] >= 'a' && worker[0] <= 'z') {
-        worker[0] = ToUpper(worker[0]);
+      if (ascii_islower(worker[0])) {
+        worker[0] = ascii_toupper(worker[0]);
       }
     }
     return worker;
@@ -535,11 +481,11 @@ string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) {
     string result;
     for (int i = 0; i < worker.size(); i++) {
       char c = worker[i];
-      if (c >= 'A' && c <= 'Z') {
+      if (ascii_isupper(c)) {
         if (i > 0) {
           result += '_';
         }
-        result += ToLower(c);
+        result += ascii_tolower(c);
       } else {
         result += c;
       }
@@ -831,6 +777,252 @@ string BuildCommentsString(const SourceLocation& location) {
   return final_comments;
 }
 
+namespace {
+
+// Internal helper class that parses the expected package to prefix mappings
+// file.
+class Parser {
+ public:
+  Parser(map<string, string>* inout_package_to_prefix_map)
+      : prefix_map_(inout_package_to_prefix_map), line_(0) {}
+
+  // Parses a check of input, returning success/failure.
+  bool ParseChunk(StringPiece chunk);
+
+  // Should be called to finish parsing (after all input has been provided via
+  // ParseChunk()).  Returns success/failure.
+  bool Finish();
+
+  int last_line() const { return line_; }
+  string error_str() const { return error_str_; }
+
+ private:
+  bool ParseLoop();
+
+  map<string, string>* prefix_map_;
+  int line_;
+  string error_str_;
+  StringPiece p_;
+  string leftover_;
+};
+
+bool Parser::ParseChunk(StringPiece chunk) {
+  if (!leftover_.empty()) {
+    chunk.AppendToString(&leftover_);
+    p_ = StringPiece(leftover_);
+  } else {
+    p_ = chunk;
+  }
+  bool result = ParseLoop();
+  if (p_.empty()) {
+    leftover_.clear();
+  } else {
+    leftover_ = p_.ToString();
+  }
+  return result;
+}
+
+bool Parser::Finish() {
+  if (leftover_.empty()) {
+    return true;
+  }
+  // Force a newline onto the end to finish parsing.
+  p_ = StringPiece(leftover_ + "\n");
+  if (!ParseLoop()) {
+    return false;
+  }
+  return p_.empty();  // Everything used?
+}
+
+static bool ascii_isnewline(char c) { return c == '\n' || c == '\r'; }
+
+bool ReadLine(StringPiece* input, StringPiece* line) {
+  for (int len = 0; len < input->size(); ++len) {
+    if (ascii_isnewline((*input)[len])) {
+      *line = StringPiece(input->data(), len);
+      ++len;  // advance over the newline
+      *input = StringPiece(input->data() + len, input->size() - len);
+      return true;
+    }
+  }
+  return false;  // Ran out of input with no newline.
+}
+
+void TrimWhitespace(StringPiece* input) {
+  while (!input->empty() && ascii_isspace(*input->data())) {
+    input->remove_prefix(1);
+  }
+  while (!input->empty() && ascii_isspace((*input)[input->length() - 1])) {
+    input->remove_suffix(1);
+  }
+}
+
+void RemoveComment(StringPiece* input) {
+  int offset = input->find('#');
+  if (offset != StringPiece::npos) {
+    input->remove_suffix(input->length() - offset);
+  }
+}
+
+bool Parser::ParseLoop() {
+  StringPiece line;
+  while (ReadLine(&p_, &line)) {
+    ++line_;
+    RemoveComment(&line);
+    TrimWhitespace(&line);
+    if (line.size() == 0) {
+      continue;  // Blank line.
+    }
+    int offset = line.find('=');
+    if (offset == StringPiece::npos) {
+      error_str_ =
+          string("Line without equal sign: '") + line.ToString() + "'.";
+      return false;
+    }
+    StringPiece package(line, 0, offset);
+    StringPiece prefix(line, offset + 1, line.length() - offset - 1);
+    TrimWhitespace(&package);
+    TrimWhitespace(&prefix);
+    // Don't really worry about error checking the the package/prefix for
+    // being valid.  Assume the file is validated when it is created/edited.
+    (*prefix_map_)[package.ToString()] = prefix.ToString();
+  }
+  return true;
+}
+
+bool LoadExpectedPackagePrefixes(map<string, string>* prefix_map,
+                                 string* out_expect_file_path,
+                                 string* out_error) {
+  const char* file_path = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES");
+  if (file_path == NULL) {
+    return true;
+  }
+
+  int fd;
+  do {
+    fd = open(file_path, O_RDONLY);
+  } while (fd < 0 && errno == EINTR);
+  if (fd < 0) {
+    *out_error =
+        string(file_path) + ":0:0: error: Unable to open." + strerror(errno);
+    return false;
+  }
+  io::FileInputStream file_stream(fd);
+  file_stream.SetCloseOnDelete(true);
+  *out_expect_file_path = file_path;
+
+  Parser parser(prefix_map);
+  const void* buf;
+  int buf_len;
+  while (file_stream.Next(&buf, &buf_len)) {
+    if (buf_len == 0) {
+      continue;
+    }
+
+    if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) {
+      *out_error = string(file_path) + ":" + SimpleItoa(parser.last_line()) +
+                   ":0: error: " + parser.error_str();
+      return false;
+    }
+  }
+  return parser.Finish();
+}
+
+}  // namespace
+
+bool ValidateObjCClassPrefix(const FileDescriptor* file, string* out_error) {
+  const string prefix = file->options().objc_class_prefix();
+  const string package = file->package();
+
+  // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
+  // error cases, so it seems to be ok to use as a back door for warnings.
+
+  // First Check: Warning - if there is a prefix, ensure it is is a reasonable
+  // value according to Apple's rules.
+  if (prefix.length()) {
+    if (!ascii_isupper(prefix[0])) {
+      cerr << endl
+           << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
+           << prefix << "\";' in '" << file->name() << "';"
+           << " it should start with a capital letter." << endl;
+      cerr.flush();
+    }
+    if (prefix.length() < 3) {
+      cerr << endl
+           << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
+           << prefix << "\";' in '" << file->name() << "';"
+           << " Apple recommends they should be at least 3 characters long."
+           << endl;
+      cerr.flush();
+    }
+  }
+
+  // Load any expected package prefixes to validate against those.
+  map<string, string> expected_package_prefixes;
+  string expect_file_path;
+  if (!LoadExpectedPackagePrefixes(&expected_package_prefixes,
+                                   &expect_file_path, out_error)) {
+    return false;
+  }
+
+  // If there are no expected prefixes, out of here.
+  if (expected_package_prefixes.size() == 0) {
+    return true;
+  }
+
+  // Second Check: Error - See if there was an expected prefix for the package
+  // and report if it doesn't match.
+  map<string, string>::iterator package_match =
+      expected_package_prefixes.find(package);
+  if (package_match != expected_package_prefixes.end()) {
+    // There was an entry, and...
+    if (package_match->second == prefix) {
+      // ...it matches.  All good, out of here!
+      return true;
+    } else {
+      // ...it didn't match!
+      *out_error = "protoc:0: error: Expected 'option objc_class_prefix = \"" +
+                   package_match->second + "\";' in '" + file->name() + "'";
+      if (prefix.length()) {
+        *out_error += "; but found '" + prefix + "' instead";
+      }
+      *out_error += ".";
+      return false;
+    }
+  }
+
+  // Third Check: Error - If there was a prefix make sure it wasn't expected
+  // for a different package instead (overlap is allowed, but it has to be
+  // listed as an expected overlap).
+  if (prefix.length()) {
+    for (map<string, string>::iterator i = expected_package_prefixes.begin();
+         i != expected_package_prefixes.end(); ++i) {
+      if (i->second == prefix) {
+        *out_error =
+            "protoc:0: error: Found 'option objc_class_prefix = \"" + prefix +
+            "\";' in '" + file->name() +
+            "'; that prefix is already used for 'package " + i->first +
+            ";'. It can only be reused by listing it in the expected file (" +
+            expect_file_path + ").";
+        return false;  // Only report first usage of the prefix.
+      }
+    }
+  }
+
+  // Fourth Check: Warning - If there was a prefix, and it wasn't expected,
+  // issue a warning suggesting it gets added to the file.
+  if (prefix.length()) {
+    cerr << endl
+         << "protoc:0: warning: Found 'option objc_class_prefix = \"" << prefix
+         << "\";' in '" << file->name() << "';"
+         << " should you add it to the expected prefixes file ("
+         << expect_file_path << ")?" << endl;
+    cerr.flush();
+  }
+
+  return true;
+}
+
 void TextFormatDecodeData::AddString(int32 key,
                                      const string& input_for_decode,
                                      const string& desired_output) {
@@ -898,7 +1090,7 @@ class DecodeDataBuilder {
 
   void AddChar(const char desired) {
     ++segment_len_;
-    is_all_upper_ &= IsUpper(desired);
+    is_all_upper_ &= ascii_isupper(desired);
   }
 
   void Push() {
@@ -913,9 +1105,9 @@ class DecodeDataBuilder {
   bool AddFirst(const char desired, const char input) {
     if (desired == input) {
       op_ = kOpAsIs;
-    } else if (desired == ToUpper(input)) {
+    } else if (desired == ascii_toupper(input)) {
       op_ = kOpFirstUpper;
-    } else if (desired == ToLower(input)) {
+    } else if (desired == ascii_tolower(input)) {
       op_ = kOpFirstLower;
     } else {
       // Can't be transformed to match.
@@ -953,7 +1145,7 @@ bool DecodeDataBuilder::AddCharacter(const char desired, const char input) {
   if (desired == input) {
     // If we aren't transforming it, or we're upper casing it and it is
     // supposed to be uppercase; just add it to the segment.
-    if ((op_ != kOpAllUpper) || IsUpper(desired)) {
+    if ((op_ != kOpAllUpper) || ascii_isupper(desired)) {
       AddChar(desired);
       return true;
     }
@@ -965,7 +1157,7 @@ bool DecodeDataBuilder::AddCharacter(const char desired, const char input) {
 
   // If we need to uppercase, and everything so far has been uppercase,
   // promote op to AllUpper.
-  if ((desired == ToUpper(input)) && is_all_upper_) {
+  if ((desired == ascii_toupper(input)) && is_all_upper_) {
     op_ = kOpAllUpper;
     AddChar(desired);
     return true;
diff --git a/src/google/protobuf/compiler/objectivec/objectivec_helpers.h b/src/google/protobuf/compiler/objectivec/objectivec_helpers.h
index 10d51a349..072a2e57c 100644
--- a/src/google/protobuf/compiler/objectivec/objectivec_helpers.h
+++ b/src/google/protobuf/compiler/objectivec/objectivec_helpers.h
@@ -62,9 +62,6 @@ string FileName(const FileDescriptor* file);
 // declared in the proto package.
 string FilePath(const FileDescriptor* file);
 
-// Checks the prefix for a given file and outputs any warnings/errors needed.
-void ValidateObjCClassPrefix(const FileDescriptor* file);
-
 // Gets the name of the root class we'll generate in the file.  This class
 // is not meant for external consumption, but instead contains helpers that
 // the rest of the the classes need
@@ -145,6 +142,11 @@ string BuildFlagsString(const vector<string>& strings);
 
 string BuildCommentsString(const SourceLocation& location);
 
+// Checks the prefix for a given file and outputs any warnings needed, if
+// there are flat out errors, then out_error is filled in and the result is
+// false.
+bool ValidateObjCClassPrefix(const FileDescriptor* file, string *out_error);
+
 // Generate decode data needed for ObjC's GPBDecodeTextFormatName() to transform
 // the input into the the expected output.
 class LIBPROTOC_EXPORT TextFormatDecodeData {