Keep track of the first non-ascii word/char to avoid redoing the work.

Review URL: https://chromiumcodereview.appspot.com/11194053 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12762 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2012-10-18 15:08:11 +00:00 · 2012-10-18 15:08:11 +00:00 · d04a1fff1f
commit d04a1fff1f
parent 852de2e1ab
4 changed files with 36 additions and 16 deletions
--- a/src/heap-inl.h
+++ b/src/heap-inl.h
@ -85,13 +85,16 @@ void PromotionQueue::ActivateGuardIfOnTheSamePage() {
 MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
                                          PretenureFlag pretenure) {
  // Check for ASCII first since this is the common case.
-  if (String::IsAscii(str.start(), str.length())) {
+  const char* start = str.start();
+  int length = str.length();
+  int non_ascii_start = String::NonAsciiStart(start, length);
+  if (non_ascii_start >= length) {
    // If the string is ASCII, we do not need to convert the characters
    // since UTF8 is backwards compatible with ASCII.
    return AllocateStringFromAscii(str, pretenure);
  }
  // Non-ASCII and we need to decode.
-  return AllocateStringFromUtf8Slow(str, pretenure);
+  return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
 }


--- a/src/heap.cc
+++ b/src/heap.cc
@ -4428,13 +4428,14 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,


 MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+                                              int non_ascii_start,
                                              PretenureFlag pretenure) {
-  // Count the number of characters in the UTF-8 string and check if
-  // it is an ASCII string.
+  // Continue counting the number of characters in the UTF-8 string, starting
+  // from the first non-ascii character or word.
+  int chars = non_ascii_start;
  Access<UnicodeCache::Utf8Decoder>
      decoder(isolate_->unicode_cache()->utf8_decoder());
-  decoder->Reset(string.start(), string.length());
-  int chars = 0;
+  decoder->Reset(string.start() + non_ascii_start, string.length() - chars);
  while (decoder->has_more()) {
    uint32_t r = decoder->GetNext();
    if (r <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
--- a/src/heap.h
+++ b/src/heap.h
@ -705,6 +705,7 @@ class Heap {
      PretenureFlag pretenure = NOT_TENURED);
  MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
      Vector<const char> str,
+      int non_ascii_start,
      PretenureFlag pretenure = NOT_TENURED);
  MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
      Vector<const uc16> str,
--- a/src/objects.h
+++ b/src/objects.h
@ -7413,32 +7413,47 @@ class String: public HeapObject {
                          int from,
                          int to);

-  static inline bool IsAscii(const char* chars, int length) {
+  // The return value may point to the first aligned word containing the
+  // first non-ascii character, rather than directly to the non-ascii character.
+  // If the return value is >= the passed length, the entire string was ASCII.
+  static inline int NonAsciiStart(const char* chars, int length) {
+    const char* start = chars;
    const char* limit = chars + length;
 #ifdef V8_HOST_CAN_READ_UNALIGNED
    ASSERT(kMaxAsciiCharCode == 0x7F);
    const uintptr_t non_ascii_mask = kUintptrAllBitsSet / 0xFF * 0x80;
    while (chars + sizeof(uintptr_t) <= limit) {
      if (*reinterpret_cast<const uintptr_t*>(chars) & non_ascii_mask) {
-        return false;
+        return chars - start;
      }
      chars += sizeof(uintptr_t);
    }
 #endif
    while (chars < limit) {
-      if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) return false;
+      if (static_cast<uint8_t>(*chars) > kMaxAsciiCharCodeU) {
+        return chars - start;
+      }
      ++chars;
    }
-    return true;
+    return chars - start;
+  }
+
+  static inline bool IsAscii(const char* chars, int length) {
+    return NonAsciiStart(chars, length) >= length;
+  }
+
+  static inline int NonAsciiStart(const uc16* chars, int length) {
+    const uc16* limit = chars + length;
+    const uc16* start = chars;
+    while (chars < limit) {
+      if (*chars > kMaxAsciiCharCodeU) return chars - start;
+      ++chars;
+    }
+    return chars - start;
  }

  static inline bool IsAscii(const uc16* chars, int length) {
-    const uc16* limit = chars + length;
-    while (chars < limit) {
-      if (*chars > kMaxAsciiCharCodeU) return false;
-      ++chars;
-    }
-    return true;
+    return NonAsciiStart(chars, length) >= length;
  }

 protected: