Convert scanner buffers to use standard character types.

R=marja@chromium.org BUG= Review URL: https://codereview.chromium.org/198583003 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19883 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2014-03-13 09:15:14 +00:00 · 2014-03-13 09:15:14 +00:00 · 619d812a02
commit 619d812a02
parent ee6b885d25
4 changed files with 71 additions and 64 deletions
--- a/src/parser.cc
+++ b/src/parser.cc
@ -3512,8 +3512,8 @@ class SingletonLogger : public ParserRecorder {
  };

  // Logs a symbol creation of a literal or identifier.
-  virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
-  virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
+  virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) { }
+  virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) { }

  // Logs an error message and marks the log as containing an error.
  // Further logging will be ignored, and ExtractData will return a vector
--- a/src/preparse-data.h
+++ b/src/preparse-data.h
@ -52,8 +52,8 @@ class ParserRecorder {
                           StrictMode strict_mode) = 0;

  // Logs a symbol creation of a literal or identifier.
-  virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
-  virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
+  virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) = 0;
+  virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) = 0;

  // Logs an error message and marks the log as containing an error.
  // Further logging will be ignored, and ExtractData will return a vector
@ -148,8 +148,8 @@ class FunctionLoggingParserRecorder : public ParserRecorder {
 class PartialParserRecorder : public FunctionLoggingParserRecorder {
 public:
  PartialParserRecorder() : FunctionLoggingParserRecorder() { }
-  virtual void LogAsciiSymbol(int start, Vector<const char> literal) { }
-  virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { }
+  virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) { }
+  virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) { }
  virtual ~PartialParserRecorder() { }
  virtual Vector<unsigned> ExtractData();
  virtual int symbol_position() { return 0; }
@ -165,13 +165,13 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
  CompleteParserRecorder();
  virtual ~CompleteParserRecorder() { }

-  virtual void LogAsciiSymbol(int start, Vector<const char> literal) {
+  virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) {
    if (!is_recording_) return;
    int hash = vector_hash(literal);
-    LogSymbol(start, hash, true, Vector<const byte>::cast(literal));
+    LogSymbol(start, hash, true, literal);
  }

-  virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) {
+  virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) {
    if (!is_recording_) return;
    int hash = vector_hash(literal);
    LogSymbol(start, hash, false, Vector<const byte>::cast(literal));
--- a/src/scanner.cc
+++ b/src/scanner.cc
@ -909,7 +909,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
  KEYWORD("yield", Token::YIELD)


-static Token::Value KeywordOrIdentifierToken(const char* input,
+static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
                                             int input_length,
                                             bool harmony_scoping,
                                             bool harmony_modules) {
@ -985,7 +985,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
  literal.Complete();

  if (next_.literal_chars->is_one_byte()) {
-    Vector<const char> chars = next_.literal_chars->one_byte_literal();
+    Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
    return KeywordOrIdentifierToken(chars.start(),
                                    chars.length(),
                                    harmony_scoping_,
@ -1123,7 +1123,7 @@ Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
        Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured);
  } else {
    return isolate->factory()->NewStringFromTwoByte(
-          next_literal_utf16_string(), tenured);
+          next_literal_two_byte_string(), tenured);
  }
 }

@ -1131,10 +1131,10 @@ Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
 Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
  if (is_literal_one_byte()) {
    return isolate->factory()->InternalizeOneByteString(
-        Vector<const uint8_t>::cast(literal_one_byte_string()));
+        literal_one_byte_string());
  } else {
    return isolate->factory()->InternalizeTwoByteString(
-        literal_utf16_string());
+        literal_two_byte_string());
  }
 }

@ -1142,7 +1142,7 @@ Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
 double Scanner::DoubleValue() {
  ASSERT(is_literal_one_byte());
  return StringToDouble(
-      unicode_cache_, literal_one_byte_string(),
+      unicode_cache_, Vector<const char>::cast(literal_one_byte_string()),
      ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
 }

@ -1154,32 +1154,32 @@ int Scanner::FindNumber(DuplicateFinder* finder, int value) {

 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
  if (is_literal_one_byte()) {
-    return finder->AddAsciiSymbol(literal_one_byte_string(), value);
+    return finder->AddOneByteSymbol(literal_one_byte_string(), value);
  }
-  return finder->AddUtf16Symbol(literal_utf16_string(), value);
+  return finder->AddTwoByteSymbol(literal_two_byte_string(), value);
 }


 void Scanner::LogSymbol(ParserRecorder* log, int position) {
  if (is_literal_one_byte()) {
-    log->LogAsciiSymbol(position, literal_one_byte_string());
+    log->LogOneByteSymbol(position, literal_one_byte_string());
  } else {
-    log->LogUtf16Symbol(position, literal_utf16_string());
+    log->LogTwoByteSymbol(position, literal_two_byte_string());
  }
 }


-int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {
-  return AddSymbol(Vector<const byte>::cast(key), true, value);
+int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
+  return AddSymbol(key, true, value);
 }


-int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {
-  return AddSymbol(Vector<const byte>::cast(key), false, value);
+int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
+  return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
 }


-int DuplicateFinder::AddSymbol(Vector<const byte> key,
+int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
                               bool is_one_byte,
                               int value) {
  uint32_t hash = Hash(key, is_one_byte);
@ -1192,15 +1192,16 @@ int DuplicateFinder::AddSymbol(Vector<const byte> key,
 }


-int DuplicateFinder::AddNumber(Vector<const char> key, int value) {
+int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
  ASSERT(key.length() > 0);
  // Quick check for already being in canonical form.
  if (IsNumberCanonical(key)) {
-    return AddAsciiSymbol(key, value);
+    return AddOneByteSymbol(key, value);
  }

  int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
-  double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
+  double double_value = StringToDouble(
+      unicode_constants_, Vector<const char>::cast(key), flags, 0.0);
  int length;
  const char* string;
  if (!std::isfinite(double_value)) {
@ -1216,7 +1217,7 @@ int DuplicateFinder::AddNumber(Vector<const char> key, int value) {
 }


-bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
+bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
  // Test for a safe approximation of number literals that are already
  // in canonical form: max 15 digits, no leading zeroes, except an
  // integer part that is a single zero, and no trailing zeros below
@ -1235,7 +1236,7 @@ bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
  pos++;
  bool invalid_last_digit = true;
  while (pos < length) {
-    byte digit = number[pos] - '0';
+    uint8_t digit = number[pos] - '0';
    if (digit > '9' - '0') return false;
    invalid_last_digit = (digit == 0);
    pos++;
@ -1244,7 +1245,7 @@ bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
 }


-uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_one_byte) {
+uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
  // Primitive hash function, almost identical to the one used
  // for strings (except that it's seeded by the length and ASCII-ness).
  int length = key.length();
@ -1280,7 +1281,7 @@ bool DuplicateFinder::Match(void* first, void* second) {
 }


-byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
+byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
                                 bool is_one_byte) {
  uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
  backing_store_.StartSequence();
@ -1290,15 +1291,18 @@ byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
    if (one_byte_length >= (1 << 14)) {
      if (one_byte_length >= (1 << 21)) {
        if (one_byte_length >= (1 << 28)) {
-          backing_store_.Add(static_cast<byte>((one_byte_length >> 28) | 0x80));
+          backing_store_.Add(
+              static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
        }
-        backing_store_.Add(static_cast<byte>((one_byte_length >> 21) | 0x80u));
+        backing_store_.Add(
+            static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
      }
-      backing_store_.Add(static_cast<byte>((one_byte_length >> 14) | 0x80u));
+      backing_store_.Add(
+          static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
    }
-    backing_store_.Add(static_cast<byte>((one_byte_length >> 7) | 0x80u));
+    backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
  }
-  backing_store_.Add(static_cast<byte>(one_byte_length & 0x7f));
+  backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

  backing_store_.AddBlock(bytes);
  return backing_store_.EndSequence().start();
--- a/src/scanner.h
+++ b/src/scanner.h
@ -120,8 +120,8 @@ class Utf16CharacterStream {
  virtual bool ReadBlock() = 0;
  virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;

-  const uc16* buffer_cursor_;
-  const uc16* buffer_end_;
+  const uint16_t* buffer_cursor_;
+  const uint16_t* buffer_end_;
  unsigned pos_;
 };

@ -169,32 +169,32 @@ class DuplicateFinder {
        backing_store_(16),
        map_(&Match) { }

-  int AddAsciiSymbol(Vector<const char> key, int value);
-  int AddUtf16Symbol(Vector<const uint16_t> key, int value);
+  int AddOneByteSymbol(Vector<const uint8_t> key, int value);
+  int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
  // Add a a number literal by converting it (if necessary)
  // to the string that ToString(ToNumber(literal)) would generate.
  // and then adding that string with AddAsciiSymbol.
  // This string is the actual value used as key in an object literal,
  // and the one that must be different from the other keys.
-  int AddNumber(Vector<const char> key, int value);
+  int AddNumber(Vector<const uint8_t> key, int value);

 private:
-  int AddSymbol(Vector<const byte> key, bool is_one_byte, int value);
+  int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
  // Backs up the key and its length in the backing store.
  // The backup is stored with a base 127 encoding of the
-  // length (plus a bit saying whether the string is ASCII),
+  // length (plus a bit saying whether the string is one byte),
  // followed by the bytes of the key.
-  byte* BackupKey(Vector<const byte> key, bool is_one_byte);
+  uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);

  // Compare two encoded keys (both pointing into the backing store)
  // for having the same base-127 encoded lengths and ASCII-ness,
  // and then having the same 'length' bytes following.
  static bool Match(void* first, void* second);
  // Creates a hash from a sequence of bytes.
-  static uint32_t Hash(Vector<const byte> key, bool is_one_byte);
+  static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
  // Checks whether a string containing a JS number is its canonical
  // form.
-  static bool IsNumberCanonical(Vector<const char> key);
+  static bool IsNumberCanonical(Vector<const uint8_t> key);

  // Size of buffer. Sufficient for using it to call DoubleToCString in
  // from conversions.h.
@ -230,10 +230,10 @@ class LiteralBuffer {
        position_ += kOneByteSize;
        return;
      }
-      ConvertToUtf16();
+      ConvertToTwoByte();
    }
    ASSERT(code_unit < 0x10000u);
-    *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
+    *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
    position_ += kUC16Size;
  }

@ -244,18 +244,18 @@ class LiteralBuffer {
        (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
  }

-  Vector<const uc16> utf16_literal() {
+  Vector<const uint16_t> two_byte_literal() {
    ASSERT(!is_one_byte_);
    ASSERT((position_ & 0x1) == 0);
-    return Vector<const uc16>(
-        reinterpret_cast<const uc16*>(backing_store_.start()),
+    return Vector<const uint16_t>(
+        reinterpret_cast<const uint16_t*>(backing_store_.start()),
        position_ >> 1);
  }

-  Vector<const char> one_byte_literal() {
+  Vector<const uint8_t> one_byte_literal() {
    ASSERT(is_one_byte_);
-    return Vector<const char>(
-        reinterpret_cast<const char*>(backing_store_.start()),
+    return Vector<const uint8_t>(
+        reinterpret_cast<const uint8_t*>(backing_store_.start()),
        position_);
  }

@ -286,7 +286,7 @@ class LiteralBuffer {
    backing_store_ = new_store;
  }

-  void ConvertToUtf16() {
+  void ConvertToTwoByte() {
    ASSERT(is_one_byte_);
    Vector<byte> new_store;
    int new_content_size = position_ * kUC16Size;
@ -298,7 +298,7 @@ class LiteralBuffer {
      new_store = backing_store_;
    }
    uint8_t* src = backing_store_.start();
-    uc16* dst = reinterpret_cast<uc16*>(new_store.start());
+    uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
    for (int i = position_ - 1; i >= 0; i--) {
      dst[i] = src[i];
    }
@ -408,7 +408,9 @@ class Scanner {
    if (is_literal_one_byte() &&
        literal_length() == length &&
        !literal_contains_escapes()) {
-      return !strncmp(literal_one_byte_string().start(), data, length);
+      const char* token =
+          reinterpret_cast<const char*>(literal_one_byte_string().start());
+      return !strncmp(token, data, length);
    }
    return false;
  }
@ -416,7 +418,8 @@ class Scanner {
    if (is_literal_one_byte() &&
        literal_length() == 3 &&
        !literal_contains_escapes()) {
-      const char* token = literal_one_byte_string().start();
+      const char* token =
+          reinterpret_cast<const char*>(literal_one_byte_string().start());
      *is_get = strncmp(token, "get", 3) == 0;
      *is_set = !*is_get && strncmp(token, "set", 3) == 0;
    }
@ -551,13 +554,13 @@ class Scanner {
  // numbers.
  // These functions only give the correct result if the literal
  // was scanned between calls to StartLiteral() and TerminateLiteral().
-  Vector<const char> literal_one_byte_string() {
+  Vector<const uint8_t> literal_one_byte_string() {
    ASSERT_NOT_NULL(current_.literal_chars);
    return current_.literal_chars->one_byte_literal();
  }
-  Vector<const uc16> literal_utf16_string() {
+  Vector<const uint16_t> literal_two_byte_string() {
    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->utf16_literal();
+    return current_.literal_chars->two_byte_literal();
  }
  bool is_literal_one_byte() {
    ASSERT_NOT_NULL(current_.literal_chars);
@ -569,13 +572,13 @@ class Scanner {
  }
  // Returns the literal string for the next token (the token that
  // would be returned if Next() were called).
-  Vector<const char> next_literal_one_byte_string() {
+  Vector<const uint8_t> next_literal_one_byte_string() {
    ASSERT_NOT_NULL(next_.literal_chars);
    return next_.literal_chars->one_byte_literal();
  }
-  Vector<const uc16> next_literal_utf16_string() {
+  Vector<const uint16_t> next_literal_two_byte_string() {
    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->utf16_literal();
+    return next_.literal_chars->two_byte_literal();
  }
  bool is_next_literal_one_byte() {
    ASSERT_NOT_NULL(next_.literal_chars);