Replace the use CharacterStreams in Heap::AllocateSymbolInternal and String::ComputeHash

R=yangguo@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/11593007 Patch from Dan Carney <dcarney@google.com>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13242 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2012-12-19 13:27:20 +00:00 · 2012-12-19 13:27:20 +00:00 · 9569b20db2
commit 9569b20db2
parent 362218a037
10 changed files with 435 additions and 363 deletions
--- a/src/heap-inl.h
+++ b/src/heap-inl.h
@ -98,12 +98,34 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
 }


+template<>
+bool inline Heap::IsOneByte(Vector<const char> str, int chars) {
+  // TODO(dcarney): incorporate Latin-1 check when Latin-1 is supported?
+  // ASCII only check.
+  return chars == str.length();
+}
+
+
+template<>
+bool inline Heap::IsOneByte(String* str, int chars) {
+  return str->IsOneByteRepresentation();
+}
+
+
 MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
                                  int chars,
                                  uint32_t hash_field) {
-  unibrow::Utf8InputBuffer<> buffer(str.start(),
-                                    static_cast<unsigned>(str.length()));
-  return AllocateInternalSymbol(&buffer, chars, hash_field);
+  if (IsOneByte(str, chars)) return AllocateAsciiSymbol(str, hash_field);
+  return AllocateInternalSymbol<false>(str, chars, hash_field);
+}
+
+
+template<typename T>
+MaybeObject* Heap::AllocateInternalSymbol(T t, int chars, uint32_t hash_field) {
+  if (IsOneByte(t, chars)) {
+    return AllocateInternalSymbol<true>(t, chars, hash_field);
+  }
+  return AllocateInternalSymbol<false>(t, chars, hash_field);
 }


--- a/src/heap.cc
+++ b/src/heap.cc
@ -3302,8 +3302,8 @@ static inline bool Between(uint32_t character, uint32_t from, uint32_t to) {

 MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
    Heap* heap,
-    uint32_t c1,
-    uint32_t c2) {
+    uint16_t c1,
+    uint16_t c2) {
  String* symbol;
  // Numeric strings have a different hash algorithm not known by
  // LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
@ -3352,8 +3352,8 @@ MaybeObject* Heap::AllocateConsString(String* first, String* second) {
  // dictionary.  Check whether we already have the string in the symbol
  // table to prevent creation of many unneccesary strings.
  if (length == 2) {
-    unsigned c1 = first->Get(0);
-    unsigned c2 = second->Get(0);
+    uint16_t c1 = first->Get(0);
+    uint16_t c2 = second->Get(0);
    return MakeOrFindTwoCharacterString(this, c1, c2);
  }

@ -3467,8 +3467,8 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
    // Optimization for 2-byte strings often used as keys in a decompression
    // dictionary.  Check whether we already have the string in the symbol
    // table to prevent creation of many unneccesary strings.
-    unsigned c1 = buffer->Get(start);
-    unsigned c2 = buffer->Get(start + 1);
+    uint16_t c1 = buffer->Get(start);
+    uint16_t c2 = buffer->Get(start + 1);
    return MakeOrFindTwoCharacterString(this, c1, c2);
  }

@ -4624,27 +4624,88 @@ Map* Heap::SymbolMapForString(String* string) {
 }


-MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
+template<typename T>
+class AllocateInternalSymbolHelper {
+ public:
+  static void WriteOneByteData(T t, char* chars, int len);
+  static void WriteTwoByteData(T t, uint16_t* chars, int len);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
+};
+
+
+template<>
+class AllocateInternalSymbolHelper< Vector<const char> > {
+ public:
+  static inline void WriteOneByteData(Vector<const char> vector,
+                                      char* chars,
+                                      int len) {
+    // Only works for ascii.
+    ASSERT(vector.length() == len);
+    memcpy(chars, vector.start(), len);
+  }
+
+  static inline void WriteTwoByteData(Vector<const char> vector,
+                                      uint16_t* chars,
+                                      int len) {
+    const uint8_t* stream = reinterpret_cast<const uint8_t*>(vector.start());
+    unsigned stream_length = vector.length();
+    while (stream_length != 0) {
+      unsigned consumed = 0;
+      uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length, &consumed);
+      ASSERT(c != unibrow::Utf8::kBadChar);
+      ASSERT(consumed <= stream_length);
+      stream_length -= consumed;
+      stream += consumed;
+      if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+        len -= 2;
+        if (len < 0) break;
+        *chars++ = unibrow::Utf16::LeadSurrogate(c);
+        *chars++ = unibrow::Utf16::TrailSurrogate(c);
+      } else {
+        len -= 1;
+        if (len < 0) break;
+        *chars++ = c;
+      }
+    }
+    ASSERT(stream_length == 0);
+    ASSERT(len == 0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
+};
+
+
+template<>
+class AllocateInternalSymbolHelper<String*> {
+ public:
+  static inline void WriteOneByteData(String* s, char* chars, int len) {
+    ASSERT(s->length() == len);
+    String::WriteToFlat(s, chars, 0, len);
+  }
+
+  static inline void WriteTwoByteData(String* s, uint16_t* chars, int len) {
+    ASSERT(s->length() == len);
+    String::WriteToFlat(s, chars, 0, len);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper<String*>);
+};
+
+
+template<bool is_one_byte, typename T>
+MaybeObject* Heap::AllocateInternalSymbol(T t,
                                          int chars,
                                          uint32_t hash_field) {
+  typedef AllocateInternalSymbolHelper<T> H;
  ASSERT(chars >= 0);
-  // Ensure the chars matches the number of characters in the buffer.
-  ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
-  // Determine whether the string is ASCII.
-  bool is_ascii = true;
-  while (buffer->has_more()) {
-    if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
-      is_ascii = false;
-      break;
-    }
-  }
-  buffer->Rewind();
-
  // Compute map and object size.
  int size;
  Map* map;

-  if (is_ascii) {
+  if (is_one_byte) {
    if (chars > SeqOneByteString::kMaxLength) {
      return Failure::OutOfMemoryException();
    }
@ -4674,21 +4735,26 @@ MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,

  ASSERT_EQ(size, answer->Size());

-  // Fill in the characters.
-  int i = 0;
-  while (i < chars) {
-    uint32_t character = buffer->GetNext();
-    if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
-      answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
-      answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
-    } else {
-      answer->Set(i++, character);
-    }
+  if (is_one_byte) {
+    H::WriteOneByteData(t, SeqOneByteString::cast(answer)->GetChars(), chars);
+  } else {
+    H::WriteTwoByteData(t, SeqTwoByteString::cast(answer)->GetChars(), chars);
  }
  return answer;
 }


+// Need explicit instantiations.
+template
+MaybeObject* Heap::AllocateInternalSymbol<true>(String*, int, uint32_t);
+template
+MaybeObject* Heap::AllocateInternalSymbol<false>(String*, int, uint32_t);
+template
+MaybeObject* Heap::AllocateInternalSymbol<false>(Vector<const char>,
+                                                 int,
+                                                 uint32_t);
+
+
 MaybeObject* Heap::AllocateRawOneByteString(int length,
                                            PretenureFlag pretenure) {
  if (length < 0 || length > SeqOneByteString::kMaxLength) {
--- a/src/heap.h
+++ b/src/heap.h
@ -764,12 +764,16 @@ class Heap {
        Vector<const uc16> str,
        uint32_t hash_field);

-  MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
-      unibrow::CharacterStream* buffer, int chars, uint32_t hash_field);
+  template<typename T>
+  static inline bool IsOneByte(T t, int chars);

-  MUST_USE_RESULT MaybeObject* AllocateExternalSymbol(
-      Vector<const char> str,
-      int chars);
+  template<typename T>
+  MUST_USE_RESULT inline MaybeObject* AllocateInternalSymbol(
+      T t, int chars, uint32_t hash_field);
+
+  template<bool is_one_byte, typename T>
+  MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
+      T t, int chars, uint32_t hash_field);

  // Allocates and partially initializes a String.  There are two String
  // encodings: ASCII and two byte.  These functions allocate a string of the
--- a/src/json-parser.h
+++ b/src/json-parser.h
@ -631,7 +631,17 @@ Handle<String> JsonParser<seq_ascii>::ScanJsonString() {
                                                        position_);
      }
      if (c0 < 0x20) return Handle<String>::null();
-      running_hash = StringHasher::AddCharacterCore(running_hash, c0);
+      if (static_cast<uint32_t>(c0) >
+          unibrow::Utf16::kMaxNonSurrogateCharCode) {
+        running_hash =
+            StringHasher::AddCharacterCore(running_hash,
+                                           unibrow::Utf16::LeadSurrogate(c0));
+        running_hash =
+            StringHasher::AddCharacterCore(running_hash,
+                                           unibrow::Utf16::TrailSurrogate(c0));
+      } else {
+        running_hash = StringHasher::AddCharacterCore(running_hash, c0);
+      }
      position++;
      if (position >= source_length_) return Handle<String>::null();
      c0 = seq_source_->SeqOneByteStringGet(position);
--- a/src/objects-inl.h
+++ b/src/objects-inl.h
@ -2584,8 +2584,7 @@ void String::Visit(

      case kConsStringTag | kOneByteStringTag:
      case kConsStringTag | kTwoByteStringTag:
-        string = cons_op.Operate(ConsString::cast(string), &offset, &type,
-            &length);
+        string = cons_op.Operate(string, &offset, &type, &length);
        if (string == NULL) return;
        slice_offset = offset;
        ASSERT(length == static_cast<unsigned>(string->length()));
@ -2777,6 +2776,11 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
 }


+String* ConsStringNullOp::Operate(String*, unsigned*, int32_t*, unsigned*) {
+  return NULL;
+}
+
+
 unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
  return depth & kDepthMask;
 }
@ -2805,42 +2809,38 @@ void ConsStringIteratorOp::Pop() {
 }


-void ConsStringIteratorOp::Reset() {
-  depth_ = 0;
-  maximum_depth_ = 0;
-}
-
-
 bool ConsStringIteratorOp::HasMore() {
  return depth_ != 0;
 }


-bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
+void ConsStringIteratorOp::Reset() {
+  depth_ = 0;
+}
+
+
+String* ConsStringIteratorOp::ContinueOperation(int32_t* type_out,
+                                                unsigned* length_out) {
  bool blew_stack;
-  int32_t type;
-  unsigned length;
-  String* string = NextLeaf(&blew_stack, &type, &length);
+  String* string = NextLeaf(&blew_stack, type_out, length_out);
  // String found.
  if (string != NULL) {
-    consumed_ += length;
-    response->string_ = string;
-    response->offset_ = 0;
-    response->length_ = length;
-    response->type_ = type;
-    return true;
+    // Verify output.
+    ASSERT(*length_out == static_cast<unsigned>(string->length()));
+    ASSERT(*type_out == string->map()->instance_type());
+    return string;
  }
  // Traversal complete.
-  if (!blew_stack) return false;
-  // Restart search.
-  Reset();
-  // TODO(dcarney) This is unnecessary.
-  // After a reset, we don't need a String::Visit
-  response->string_ = root_;
-  response->offset_ = consumed_;
-  response->length_ = root_length_;
-  response->type_ = root_type_;
-  return true;
+  if (!blew_stack) return NULL;
+  // Restart search from root.
+  unsigned offset_out;
+  string = Search(&offset_out, type_out, length_out);
+  // Verify output.
+  ASSERT(string == NULL || offset_out == 0);
+  ASSERT(string == NULL ||
+         *length_out == static_cast<unsigned>(string->length()));
+  ASSERT(string == NULL || *type_out == string->map()->instance_type());
+  return string;
 }


@ -2857,18 +2857,24 @@ StringCharacterStream::StringCharacterStream(
    end_(NULL),
    op_(op) {
  op->Reset();
-  String::Visit(string,
-      offset, *this, *op, string->map()->instance_type(), string->length());
+  int32_t type = string->map()->instance_type();
+  unsigned length = string->length();
+  String::Visit(string, offset, *this, *op, type, length);
 }


 bool StringCharacterStream::HasMore() {
  if (buffer8_ != end_) return true;
  if (!op_->HasMore()) return false;
-  ConsStringIteratorOp::ContinueResponse response;
-  if (!op_->ContinueOperation(&response)) return false;
-  String::Visit(response.string_,
-      response.offset_, *this, *op_, response.type_, response.length_);
+  unsigned length;
+  int32_t type;
+  String* string = op_->ContinueOperation(&type, &length);
+  if (string == NULL) return false;
+  ASSERT(!string->IsConsString());
+  ASSERT(string->length() != 0);
+  ConsStringNullOp null_op;
+  String::Visit(string, 0, *this, null_op, type, length);
+  ASSERT(buffer8_ != end_);
  return true;
 }

@ -5138,7 +5144,7 @@ bool StringHasher::has_trivial_hash() {
 }


-uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint32_t c) {
+uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint16_t c) {
  running_hash += c;
  running_hash += (running_hash << 10);
  running_hash ^= (running_hash >> 6);
@ -5157,66 +5163,62 @@ uint32_t StringHasher::GetHashCore(uint32_t running_hash) {
 }


-void StringHasher::AddCharacter(uint32_t c) {
-  if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
-    AddSurrogatePair(c);  // Not inlined.
-    return;
-  }
+void StringHasher::AddCharacter(uint16_t c) {
  // Use the Jenkins one-at-a-time hash function to update the hash
  // for the given character.
  raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
-  // Incremental array index computation.
-  if (is_array_index_) {
-    if (c < '0' || c > '9') {
+}
+
+
+bool StringHasher::UpdateIndex(uint16_t c) {
+  ASSERT(is_array_index_);
+  if (c < '0' || c > '9') {
+    is_array_index_ = false;
+    return false;
+  }
+  int d = c - '0';
+  if (is_first_char_) {
+    is_first_char_ = false;
+    if (c == '0' && length_ > 1) {
      is_array_index_ = false;
-    } else {
-      int d = c - '0';
-      if (is_first_char_) {
-        is_first_char_ = false;
-        if (c == '0' && length_ > 1) {
-          is_array_index_ = false;
-          return;
-        }
-      }
-      if (array_index_ > 429496729U - ((d + 2) >> 3)) {
-        is_array_index_ = false;
-      } else {
-        array_index_ = array_index_ * 10 + d;
+      return false;
+    }
+  }
+  if (array_index_ > 429496729U - ((d + 2) >> 3)) {
+    is_array_index_ = false;
+    return false;
+  }
+  array_index_ = array_index_ * 10 + d;
+  return true;
+}
+
+
+template<typename Char>
+inline void StringHasher::AddCharacters(const Char* chars, int length) {
+  ASSERT(sizeof(Char) == 1 || sizeof(Char) == 2);
+  int i = 0;
+  if (is_array_index_) {
+    for (; i < length; i++) {
+      AddCharacter(chars[i]);
+      if (!UpdateIndex(chars[i])) {
+        i++;
+        break;
      }
    }
  }
-}
-
-
-void StringHasher::AddCharacterNoIndex(uint32_t c) {
-  ASSERT(!is_array_index());
-  if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
-    AddSurrogatePairNoIndex(c);  // Not inlined.
-    return;
+  for (; i < length; i++) {
+    ASSERT(!is_array_index_);
+    AddCharacter(chars[i]);
  }
-  raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
-}
-
-
-uint32_t StringHasher::GetHash() {
-  // Get the calculated raw hash value and do some more bit ops to distribute
-  // the hash further. Ensure that we never return zero as the hash value.
-  return GetHashCore(raw_running_hash_);
 }


 template <typename schar>
-uint32_t HashSequentialString(const schar* chars, int length, uint32_t seed) {
+uint32_t StringHasher::HashSequentialString(const schar* chars,
+                                            int length,
+                                            uint32_t seed) {
  StringHasher hasher(length, seed);
-  if (!hasher.has_trivial_hash()) {
-    int i;
-    for (i = 0; hasher.is_array_index() && (i < length); i++) {
-      hasher.AddCharacter(chars[i]);
-    }
-    for (; i < length; i++) {
-      hasher.AddCharacterNoIndex(chars[i]);
-    }
-  }
+  if (!hasher.has_trivial_hash()) hasher.AddCharacters(chars, length);
  return hasher.GetHashField();
 }

--- a/src/objects.cc
+++ b/src/objects.cc
@ -7029,24 +7029,36 @@ void StringInputBuffer::Seek(unsigned pos) {
 }


-String* ConsStringIteratorOp::Operate(ConsString* cons_string,
-    unsigned* offset_out, int32_t* type_out, unsigned* length_out) {
-  ASSERT(*length_out == (unsigned)cons_string->length());
-  ASSERT(depth_ == 0);
-  // Push the root string.
-  PushLeft(cons_string);
+String* ConsStringIteratorOp::Operate(String* string,
+                                      unsigned* offset_out,
+                                      int32_t* type_out,
+                                      unsigned* length_out) {
+  ASSERT(string->IsConsString());
+  ConsString* cons_string = ConsString::cast(string);
+  // Set up search data.
  root_ = cons_string;
-  root_type_ = *type_out;
-  root_length_ = *length_out;
  consumed_ = *offset_out;
-  unsigned targetOffset = *offset_out;
+  // Now search.
+  return Search(offset_out, type_out, length_out);
+}
+
+
+String* ConsStringIteratorOp::Search(unsigned* offset_out,
+                                     int32_t* type_out,
+                                     unsigned* length_out) {
+  ConsString* cons_string = root_;
+  // Reset the stack, pushing the root string.
+  depth_ = 1;
+  maximum_depth_ = 1;
+  frames_[0] = cons_string;
+  const unsigned consumed = consumed_;
  unsigned offset = 0;
  while (true) {
    // Loop until the string is found which contains the target offset.
    String* string = cons_string->first();
    unsigned length = string->length();
    int32_t type;
-    if (targetOffset < offset + length) {
+    if (consumed < offset + length) {
      // Target offset is in the left branch.
      // Keep going if we're still in a ConString.
      type = string->map()->instance_type();
@ -7075,6 +7087,7 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
      // Account for the possibility of an empty right leaf.
      // This happens only if we have asked for an offset outside the string.
      if (length == 0) {
+        // Reset depth so future operations will return null immediately.
        Reset();
        return NULL;
      }
@ -7085,9 +7098,8 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
    }
    ASSERT(length != 0);
    // Adjust return values and exit.
-    unsigned innerOffset = targetOffset - offset;
-    consumed_ += length - innerOffset;
-    *offset_out = innerOffset;
+    consumed_ = offset + length;
+    *offset_out = consumed - offset;
    *type_out = type;
    *length_out = length;
    return string;
@ -7097,8 +7109,9 @@ String* ConsStringIteratorOp::Operate(ConsString* cons_string,
 }


-String* ConsStringIteratorOp::NextLeaf(
-    bool* blew_stack, int32_t* type_out, unsigned* length_out) {
+String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,
+                                       int32_t* type_out,
+                                       unsigned* length_out) {
  while (true) {
    // Tree traversal complete.
    if (depth_ == 0) {
@ -7122,6 +7135,7 @@ String* ConsStringIteratorOp::NextLeaf(
      if (length == 0) continue;
      *length_out = length;
      *type_out = type;
+      consumed_ += length;
      return string;
    }
    cons_string = ConsString::cast(string);
@ -7134,8 +7148,11 @@ String* ConsStringIteratorOp::NextLeaf(
      type = string->map()->instance_type();
      if ((type & kStringRepresentationMask) != kConsStringTag) {
        AdjustMaximumDepth();
+        unsigned length = (unsigned) string->length();
+        ASSERT(length != 0);
+        *length_out = length;
        *type_out = type;
-        *length_out = string->length();
+        consumed_ += length;
        return string;
      }
      cons_string = ConsString::cast(string);
@ -7674,28 +7691,62 @@ bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
 }


+class IteratingStringHasher: public StringHasher {
+ public:
+  static inline uint32_t Hash(String* string, uint32_t seed) {
+    const unsigned len = static_cast<unsigned>(string->length());
+    IteratingStringHasher hasher(len, seed);
+    if (hasher.has_trivial_hash()) {
+      return hasher.GetHashField();
+    }
+    int32_t type = string->map()->instance_type();
+    ConsStringNullOp null_op;
+    String::Visit(string, 0, hasher, null_op, type, len);
+    // Flat strings terminate immediately.
+    if (hasher.consumed_ == len) {
+      ASSERT(!string->IsConsString());
+      return hasher.GetHashField();
+    }
+    ASSERT(string->IsConsString());
+    // This is a ConsString, iterate across it.
+    ConsStringIteratorOp op;
+    unsigned offset = 0;
+    unsigned leaf_length = len;
+    string = op.Operate(string, &offset, &type, &leaf_length);
+    while (true) {
+      ASSERT(hasher.consumed_ < len);
+      String::Visit(string, 0, hasher, null_op, type, leaf_length);
+      if (hasher.consumed_ == len) break;
+      string = op.ContinueOperation(&type, &leaf_length);
+      // This should be taken care of by the length check.
+      ASSERT(string != NULL);
+    }
+    return hasher.GetHashField();
+  }
+  inline void VisitOneByteString(const uint8_t* chars, unsigned length) {
+    AddCharacters(chars, static_cast<int>(length));
+    consumed_ += length;
+  }
+  inline void VisitTwoByteString(const uint16_t* chars, unsigned length) {
+    AddCharacters(chars, static_cast<int>(length));
+    consumed_ += length;
+  }
+
+ private:
+  inline IteratingStringHasher(int len, uint32_t seed)
+    : StringHasher(len, seed),
+      consumed_(0) {}
+  unsigned consumed_;
+  DISALLOW_COPY_AND_ASSIGN(IteratingStringHasher);
+};
+
+
 uint32_t String::ComputeAndSetHash() {
  // Should only be called if hash code has not yet been computed.
  ASSERT(!HasHashCode());

-  const int len = length();
-
-  // Compute the hash code.
-  uint32_t field = 0;
-  if (StringShape(this).IsSequentialAscii()) {
-    field = HashSequentialString(SeqOneByteString::cast(this)->GetChars(),
-                                 len,
-                                 GetHeap()->HashSeed());
-  } else if (StringShape(this).IsSequentialTwoByte()) {
-    field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(),
-                                 len,
-                                 GetHeap()->HashSeed());
-  } else {
-    StringInputBuffer buffer(this);
-    field = ComputeHashField(&buffer, len, GetHeap()->HashSeed());
-  }
-
  // Store the hash code in the object.
+  uint32_t field = IteratingStringHasher::Hash(this, GetHeap()->HashSeed());
  set_hash_field(field);

  // Check the hash code is there.
@ -7799,57 +7850,59 @@ uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) {
 }


-void StringHasher::AddSurrogatePair(uc32 c) {
-  uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
-  AddCharacter(lead);
-  uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
-  AddCharacter(trail);
-}
-
-
-void StringHasher::AddSurrogatePairNoIndex(uc32 c) {
-  uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
-  AddCharacterNoIndex(lead);
-  uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
-  AddCharacterNoIndex(trail);
-}
-
-
 uint32_t StringHasher::GetHashField() {
  if (length_ <= String::kMaxHashCalcLength) {
-    if (is_array_index()) {
-      return MakeArrayIndexHash(array_index(), length_);
+    if (is_array_index_) {
+      return MakeArrayIndexHash(array_index_, length_);
    }
-    return (GetHash() << String::kHashShift) | String::kIsNotArrayIndexMask;
+    return (GetHashCore(raw_running_hash_) << String::kHashShift) |
+           String::kIsNotArrayIndexMask;
  } else {
    return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;
  }
 }


-uint32_t String::ComputeHashField(unibrow::CharacterStream* buffer,
-                                  int length,
-                                  uint32_t seed) {
+uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer,
+                                        int length,
+                                        uint32_t seed) {
+  typedef unibrow::Utf16 u;
  StringHasher hasher(length, seed);
-
  // Very long strings have a trivial hash that doesn't inspect the
  // string contents.
  if (hasher.has_trivial_hash()) {
    return hasher.GetHashField();
  }
-
  // Do the iterative array index computation as long as there is a
  // chance this is an array index.
-  while (buffer->has_more() && hasher.is_array_index()) {
-    hasher.AddCharacter(buffer->GetNext());
+  if (hasher.is_array_index_) {
+    while (buffer->has_more()) {
+      uint32_t c = buffer->GetNext();
+      if (c > u::kMaxNonSurrogateCharCode) {
+        uint16_t c1 = u::LeadSurrogate(c);
+        uint16_t c2 = u::TrailSurrogate(c);
+        hasher.AddCharacter(c1);
+        hasher.AddCharacter(c2);
+        if (!hasher.UpdateIndex(c1)) break;
+        if (!hasher.UpdateIndex(c2)) break;
+      } else {
+        hasher.AddCharacter(c);
+        if (!hasher.UpdateIndex(c)) break;
+      }
+    }
  }
-
  // Process the remaining characters without updating the array
  // index.
  while (buffer->has_more()) {
-    hasher.AddCharacterNoIndex(buffer->GetNext());
+    ASSERT(!hasher.is_array_index_);
+    uint32_t c = buffer->GetNext();
+    if (c > u::kMaxNonSurrogateCharCode) {
+      hasher.AddCharacter(u::LeadSurrogate(c));
+      hasher.AddCharacter(u::TrailSurrogate(c));
+    } else {
+      hasher.AddCharacter(c);
+    }
  }
-
  return hasher.GetHashField();
 }

@ -11667,7 +11720,7 @@ class Utf8SymbolKey : public HashTableKey {
    unibrow::Utf8InputBuffer<> buffer(string_.start(),
                                      static_cast<unsigned>(string_.length()));
    chars_ = buffer.Utf16Length();
-    hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);
+    hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);
    uint32_t result = hash_field_ >> String::kHashShift;
    ASSERT(result != 0);  // Ensure that the hash value of 0 is never computed.
    return result;
@ -11697,29 +11750,9 @@ class SequentialSymbolKey : public HashTableKey {
      : string_(string), hash_field_(0), seed_(seed) { }

  uint32_t Hash() {
-    StringHasher hasher(string_.length(), seed_);
-
-    // Very long strings have a trivial hash that doesn't inspect the
-    // string contents.
-    if (hasher.has_trivial_hash()) {
-      hash_field_ = hasher.GetHashField();
-    } else {
-      int i = 0;
-      // Do the iterative array index computation as long as there is a
-      // chance this is an array index.
-      while (i < string_.length() && hasher.is_array_index()) {
-        hasher.AddCharacter(static_cast<uc32>(string_[i]));
-        i++;
-      }
-
-      // Process the remaining characters without updating the array
-      // index.
-      while (i < string_.length()) {
-        hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));
-        i++;
-      }
-      hash_field_ = hasher.GetHashField();
-    }
+    hash_field_ = StringHasher::HashSequentialString<Char>(string_.start(),
+                                                           string_.length(),
+                                                           seed_);

    uint32_t result = hash_field_ >> String::kHashShift;
    ASSERT(result != 0);  // Ensure that the hash value of 0 is never computed.
@ -11764,32 +11797,9 @@ class SubStringAsciiSymbolKey : public HashTableKey {
  uint32_t Hash() {
    ASSERT(length_ >= 0);
    ASSERT(from_ + length_ <= string_->length());
-    StringHasher hasher(length_, string_->GetHeap()->HashSeed());
-
-    // Very long strings have a trivial hash that doesn't inspect the
-    // string contents.
-    if (hasher.has_trivial_hash()) {
-      hash_field_ = hasher.GetHashField();
-    } else {
-      int i = 0;
-      // Do the iterative array index computation as long as there is a
-      // chance this is an array index.
-      while (i < length_ && hasher.is_array_index()) {
-        hasher.AddCharacter(static_cast<uc32>(
-            string_->SeqOneByteStringGet(i + from_)));
-        i++;
-      }
-
-      // Process the remaining characters without updating the array
-      // index.
-      while (i < length_) {
-        hasher.AddCharacterNoIndex(static_cast<uc32>(
-            string_->SeqOneByteStringGet(i + from_)));
-        i++;
-      }
-      hash_field_ = hasher.GetHashField();
-    }
-
+    char* chars = string_->GetChars() + from_;
+    hash_field_ = StringHasher::HashSequentialString(
+        chars, length_, string_->GetHeap()->HashSeed());
    uint32_t result = hash_field_ >> String::kHashShift;
    ASSERT(result != 0);  // Ensure that the hash value of 0 is never computed.
    return result;
@ -11864,8 +11874,7 @@ class SymbolKey : public HashTableKey {
      return string_;
    }
    // Otherwise allocate a new symbol.
-    StringInputBuffer buffer(string_);
-    return heap->AllocateInternalSymbol(&buffer,
+    return heap->AllocateInternalSymbol(string_,
                                        string_->length(),
                                        string_->hash_field());
  }
@ -12635,7 +12644,7 @@ MaybeObject* SymbolTable::LookupString(String* string, Object** s) {
 // algorithm.
 class TwoCharHashTableKey : public HashTableKey {
 public:
-  TwoCharHashTableKey(uint32_t c1, uint32_t c2, uint32_t seed)
+  TwoCharHashTableKey(uint16_t c1, uint16_t c2, uint32_t seed)
    : c1_(c1), c2_(c2) {
    // Char 1.
    uint32_t hash = seed;
@ -12651,17 +12660,17 @@ class TwoCharHashTableKey : public HashTableKey {
    hash ^= hash >> 11;
    hash += hash << 15;
    if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;
+    hash_ = hash;
 #ifdef DEBUG
-    StringHasher hasher(2, seed);
-    hasher.AddCharacter(c1);
-    hasher.AddCharacter(c2);
    // If this assert fails then we failed to reproduce the two-character
    // version of the string hashing algorithm above.  One reason could be
    // that we were passed two digits as characters, since the hash
    // algorithm is different in that case.
-    ASSERT_EQ(static_cast<int>(hasher.GetHash()), static_cast<int>(hash));
+    uint16_t chars[2] = {c1, c2};
+    uint32_t check_hash = StringHasher::HashSequentialString(chars, 2, seed);
+    hash = (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
+    ASSERT_EQ(static_cast<int32_t>(hash), static_cast<int32_t>(check_hash));
 #endif
-    hash_ = hash;
  }

  bool IsMatch(Object* o) {
@ -12686,8 +12695,8 @@ class TwoCharHashTableKey : public HashTableKey {
  }

 private:
-  uint32_t c1_;
-  uint32_t c2_;
+  uint16_t c1_;
+  uint16_t c2_;
  uint32_t hash_;
 };

@ -12706,8 +12715,8 @@ bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {
 }


-bool SymbolTable::LookupTwoCharsSymbolIfExists(uint32_t c1,
-                                               uint32_t c2,
+bool SymbolTable::LookupTwoCharsSymbolIfExists(uint16_t c1,
+                                               uint16_t c2,
                                               String** symbol) {
  TwoCharHashTableKey key(c1, c2, GetHeap()->HashSeed());
  int entry = FindEntry(&key);
--- a/src/objects.h
+++ b/src/objects.h
@ -3043,7 +3043,7 @@ class SymbolTable: public HashTable<SymbolTableShape, HashTableKey*> {
  // true if it is found, assigning the symbol to the given output
  // parameter.
  bool LookupSymbolIfExists(String* str, String** symbol);
-  bool LookupTwoCharsSymbolIfExists(uint32_t c1, uint32_t c2, String** symbol);
+  bool LookupTwoCharsSymbolIfExists(uint16_t c1, uint16_t c2, String** symbol);

  // Casting.
  static inline SymbolTable* cast(Object* obj);
@ -6929,30 +6929,14 @@ class StringHasher {
 public:
  explicit inline StringHasher(int length, uint32_t seed);

-  // Returns true if the hash of this string can be computed without
-  // looking at the contents.
-  inline bool has_trivial_hash();
+  template <typename schar>
+  static inline uint32_t HashSequentialString(const schar* chars,
+                                              int length,
+                                              uint32_t seed);

-  // Add a character to the hash and update the array index calculation.
-  inline void AddCharacter(uint32_t c);
-
-  // Adds a character to the hash but does not update the array index
-  // calculation.  This can only be called when it has been verified
-  // that the input is not an array index.
-  inline void AddCharacterNoIndex(uint32_t c);
-
-  // Add a character above 0xffff as a surrogate pair.  These can get into
-  // the hasher through the routines that take a UTF-8 string and make a symbol.
-  void AddSurrogatePair(uc32 c);
-  void AddSurrogatePairNoIndex(uc32 c);
-
-  // Returns the value to store in the hash field of a string with
-  // the given length and contents.
-  uint32_t GetHashField();
-
-  // Returns true if the characters seen so far make up a legal array
-  // index.
-  bool is_array_index() { return is_array_index_; }
+  static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
+                                   int length,
+                                   uint32_t seed);

  // Calculated hash value for a string consisting of 1 to
  // String::kMaxArrayIndexSize digits with no leading zeros (except "0").
@ -6964,51 +6948,36 @@ class StringHasher {
  // use 27 instead.
  static const int kZeroHash = 27;

- private:
-  uint32_t array_index() {
-    ASSERT(is_array_index());
-    return array_index_;
-  }
-
-  inline uint32_t GetHash();
-
  // Reusable parts of the hashing algorithm.
-  INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint32_t c));
+  INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint16_t c));
  INLINE(static uint32_t GetHashCore(uint32_t running_hash));

+ protected:
+  // Returns the value to store in the hash field of a string with
+  // the given length and contents.
+  uint32_t GetHashField();
+  // Returns true if the hash of this string can be computed without
+  // looking at the contents.
+  inline bool has_trivial_hash();
+  // Adds a block of characters to the hash.
+  template<typename Char>
+  inline void AddCharacters(const Char* chars, int len);
+
+ private:
+  // Add a character to the hash.
+  inline void AddCharacter(uint16_t c);
+  // Update index. Returns true if string is still an index.
+  inline bool UpdateIndex(uint16_t c);
+
  int length_;
  uint32_t raw_running_hash_;
  uint32_t array_index_;
  bool is_array_index_;
  bool is_first_char_;
-  friend class TwoCharHashTableKey;
-
-  template <bool seq_ascii> friend class JsonParser;
+  DISALLOW_COPY_AND_ASSIGN(StringHasher);
 };


-class IncrementalAsciiStringHasher {
- public:
-  explicit inline IncrementalAsciiStringHasher(uint32_t seed, char first_char);
-  inline void AddCharacter(uc32 c);
-  inline uint32_t GetHash();
-
- private:
-  int length_;
-  uint32_t raw_running_hash_;
-  uint32_t array_index_;
-  bool is_array_index_;
-  char first_char_;
-};
-
-
-// Calculates string hash.
-template <typename schar>
-inline uint32_t HashSequentialString(const schar* chars,
-                                     int length,
-                                     uint32_t seed);
-
-
 // The characteristics of a string are stored in its map.  Retrieving these
 // few bits of information is moderately expensive, involving two memory
 // loads where the second is dependent on the first.  To improve efficiency
@ -7227,10 +7196,6 @@ class String: public HeapObject {
  // Returns a hash value used for the property table
  inline uint32_t Hash();

-  static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
-                                   int length,
-                                   uint32_t seed);
-
  static bool ComputeArrayIndex(unibrow::CharacterStream* buffer,
                                uint32_t* index,
                                int length);
@ -7870,22 +7835,30 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
 };


+// A ConsStringOp that returns null.
+// Useful when the operation to apply on a ConsString
+// requires an expensive data structure.
+class ConsStringNullOp {
+ public:
+  inline ConsStringNullOp() {}
+  static inline String* Operate(String*, unsigned*, int32_t*, unsigned*);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ConsStringNullOp);
+};
+
+
 // This maintains an off-stack representation of the stack frames required
 // to traverse a ConsString, allowing an entirely iterative and restartable
 // traversal of the entire string
 // Note: this class is not GC-safe.
 class ConsStringIteratorOp {
 public:
-  struct ContinueResponse {
-    String* string_;
-    unsigned offset_;
-    unsigned length_;
-    int32_t type_;
-  };
  inline ConsStringIteratorOp() {}
-  String* Operate(ConsString* cons_string, unsigned* offset_out,
-      int32_t* type_out, unsigned* length_out);
-  inline bool ContinueOperation(ContinueResponse* response);
+  String* Operate(String* string,
+                  unsigned* offset_out,
+                  int32_t* type_out,
+                  unsigned* length_out);
+  inline String* ContinueOperation(int32_t* type_out, unsigned* length_out);
  inline void Reset();
  inline bool HasMore();

@ -7902,6 +7875,9 @@ class ConsStringIteratorOp {
  inline void AdjustMaximumDepth();
  inline void Pop();
  String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out);
+  String* Search(unsigned* offset_out,
+                 int32_t* type_out,
+                 unsigned* length_out);

  unsigned depth_;
  unsigned maximum_depth_;
@ -7910,8 +7886,6 @@ class ConsStringIteratorOp {
  ConsString* frames_[kStackSize];
  unsigned consumed_;
  ConsString* root_;
-  int32_t root_type_;
-  unsigned root_length_;
  DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
 };

@ -7919,8 +7893,9 @@ class ConsStringIteratorOp {
 // Note: this class is not GC-safe.
 class StringCharacterStream {
 public:
-  inline StringCharacterStream(
-      String* string, unsigned offset, ConsStringIteratorOp* op);
+  inline StringCharacterStream(String* string,
+                               unsigned offset,
+                               ConsStringIteratorOp* op);
  inline uint16_t GetNext();
  inline bool HasMore();
  inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
--- a/src/profile-generator.cc
+++ b/src/profile-generator.cc
@ -112,7 +112,7 @@ const char* StringsStorage::GetCopy(const char* src) {
  OS::StrNCpy(dst, src, len);
  dst[len] = '\0';
  uint32_t hash =
-      HashSequentialString(dst.start(), len, HEAP->HashSeed());
+      StringHasher::HashSequentialString(dst.start(), len, HEAP->HashSeed());
  return AddOrDisposeString(dst.start(), hash);
 }

@ -145,7 +145,7 @@ const char* StringsStorage::GetVFormatted(const char* format, va_list args) {
    DeleteArray(str.start());
    return format;
  }
-  uint32_t hash = HashSequentialString(
+  uint32_t hash = StringHasher::HashSequentialString(
      str.start(), len, HEAP->HashSeed());
  return AddOrDisposeString(str.start(), hash);
 }
@ -156,8 +156,8 @@ const char* StringsStorage::GetName(String* name) {
    int length = Min(kMaxNameSize, name->length());
    SmartArrayPointer<char> data =
        name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0, length);
-    uint32_t hash =
-        HashSequentialString(*data, length, name->GetHeap()->HashSeed());
+    uint32_t hash = StringHasher::HashSequentialString(
+        *data, length, name->GetHeap()->HashSeed());
    return AddOrDisposeString(data.Detach(), hash);
  }
  return "";
@ -1451,9 +1451,9 @@ void HeapObjectsMap::RemoveDeadEntries() {
 SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) {
  SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash());
  const char* label = info->GetLabel();
-  id ^= HashSequentialString(label,
-                             static_cast<int>(strlen(label)),
-                             HEAP->HashSeed());
+  id ^= StringHasher::HashSequentialString(label,
+                                           static_cast<int>(strlen(label)),
+                                           HEAP->HashSeed());
  intptr_t element_count = info->GetElementCount();
  if (element_count != -1)
    id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count),
@ -2940,9 +2940,10 @@ class NativeGroupRetainedObjectInfo : public v8::RetainedObjectInfo {
 NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo(
    const char* label) {
  const char* label_copy = collection_->names()->GetCopy(label);
-  uint32_t hash = HashSequentialString(label_copy,
-                                       static_cast<int>(strlen(label_copy)),
-                                       HEAP->HashSeed());
+  uint32_t hash = StringHasher::HashSequentialString(
+      label_copy,
+      static_cast<int>(strlen(label_copy)),
+      HEAP->HashSeed());
  HashMap::Entry* entry = native_groups_.Lookup(const_cast<char*>(label_copy),
                                                hash, true);
  if (entry->value == NULL) {
--- a/src/unicode.h
+++ b/src/unicode.h
@ -170,10 +170,6 @@ class Utf8 {
  // that match are coded as a 4 byte UTF-8 sequence.
  static const unsigned kBytesSavedByCombiningSurrogates = 2;
  static const unsigned kSizeOfUnmatchedSurrogate = 3;
-
- private:
-  template <unsigned s> friend class Utf8InputBuffer;
-  friend class Test;
  static inline uchar ValueOf(const byte* str,
                              unsigned length,
                              unsigned* cursor);
--- a/test/cctest/test-strings.cc
+++ b/test/cctest/test-strings.cc
@ -345,37 +345,24 @@ void AccumulateStats(Handle<String> cons_string, ConsStringStats* stats) {

 void AccumulateStatsWithOperator(
    ConsString* cons_string, ConsStringStats* stats) {
-  // Init op.
+  unsigned offset = 0;
+  int32_t type = cons_string->map()->instance_type();
+  unsigned length = static_cast<unsigned>(cons_string->length());
  ConsStringIteratorOp op;
-  op.Reset();
-  // Use response for initial search and on blown stack.
-  ConsStringIteratorOp::ContinueResponse response;
-  response.string_ = cons_string;
-  response.offset_ = 0;
-  response.type_ = cons_string->map()->instance_type();
-  response.length_ = (uint32_t) cons_string->length();
+  String* string = op.Operate(cons_string, &offset, &type, &length);
+  CHECK(string != NULL);
  while (true) {
-    String* string = op.Operate(ConsString::cast(response.string_),
-                                &response.offset_,
-                                &response.type_,
-                                &response.length_);
-    CHECK(string != NULL);
-    while (true) {
-      // Accumulate stats.
-      stats->leaves_++;
-      stats->chars_ += string->length();
-      // Check for completion.
-      bool keep_going_fast_check = op.HasMore();
-      bool keep_going = op.ContinueOperation(&response);
-      if (!keep_going) return;
-      // Verify no false positives for fast check.
-      CHECK(keep_going_fast_check);
-      CHECK(response.string_ != NULL);
-      // Blew stack. Restart outer loop.
-      if (response.string_->IsConsString()) break;
-      string = response.string_;
-    }
-  };
+    ASSERT(!string->IsConsString());
+    // Accumulate stats.
+    stats->leaves_++;
+    stats->chars_ += string->length();
+    // Check for completion.
+    bool keep_going_fast_check = op.HasMore();
+    string = op.ContinueOperation(&type, &length);
+    if (string == NULL) return;
+    // Verify no false positives for fast check.
+    CHECK(keep_going_fast_check);
+  }
 }