From 702cc25def18bbce84a6531b70c335cef03f7eed Mon Sep 17 00:00:00 2001
From: "yangguo@chromium.org"
 <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Date: Mon, 3 Dec 2012 16:48:17 +0000
Subject: [PATCH] Optimize non-ASCII string splitting with single-character
 search pattern

Review URL: https://chromiumcodereview.appspot.com/11299163
Patch from Ben Noordhuis <ben@c9.io>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
---
 src/runtime.cc               | 59 ++++++++++++++++++++++++++++--------
 test/mjsunit/string-split.js | 17 +++++++++++
 2 files changed, 64 insertions(+), 12 deletions(-)
diff --git a/src/runtime.cc b/src/runtime.cc
index 14bbae7911..78deb66468 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
 }
 
 
+void FindTwoByteStringIndices(const Vector<const uc16> subject,
+                              uc16 pattern,
+                              ZoneList<int>* indices,
+                              unsigned int limit,
+                              Zone* zone) {
+  ASSERT(limit > 0);
+  const uc16* subject_start = subject.start();
+  const uc16* subject_end = subject_start + subject.length();
+  for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
+    if (*pos == pattern) {
+      indices->Add(static_cast<int>(pos - subject_start), zone);
+      limit--;
+    }
+  }
+}
+
+
 template <typename SubjectChar, typename PatternChar>
 void FindStringIndices(Isolate* isolate,
                        Vector<const SubjectChar> subject,
@@ -2826,19 +2843,37 @@ void FindStringIndicesDispatch(Isolate* isolate,
     } else {
       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
       if (pattern_content.IsAscii()) {
-        FindStringIndices(isolate,
-                          subject_vector,
-                          pattern_content.ToAsciiVector(),
-                          indices,
-                          limit,
-                          zone);
+        Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
+        if (pattern_vector.length() == 1) {
+          FindTwoByteStringIndices(subject_vector,
+                                   pattern_vector[0],
+                                   indices,
+                                   limit,
+                                   zone);
+        } else {
+          FindStringIndices(isolate,
+                            subject_vector,
+                            pattern_vector,
+                            indices,
+                            limit,
+                            zone);
+        }
       } else {
-        FindStringIndices(isolate,
-                          subject_vector,
-                          pattern_content.ToUC16Vector(),
-                          indices,
-                          limit,
-                          zone);
+        Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
+        if (pattern_vector.length() == 1) {
+          FindTwoByteStringIndices(subject_vector,
+                                   pattern_vector[0],
+                                   indices,
+                                   limit,
+                                   zone);
+        } else {
+          FindStringIndices(isolate,
+                            subject_vector,
+                            pattern_vector,
+                            indices,
+                            limit,
+                            zone);
+        }
       }
     }
   }
diff --git a/test/mjsunit/string-split.js b/test/mjsunit/string-split.js
index d8412f0eed..1308244cab 100644
--- a/test/mjsunit/string-split.js
+++ b/test/mjsunit/string-split.js
@@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
 
 assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
 
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+                  "Wenige sind auserwählt.".split(" "));
+
+assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
+
+assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
+
+assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
+
+assertArrayEquals(["Wenige", "sind", "auserwählt."],
+                  "Wenige sind auserwählt.".split(" ", 3));
+
+assertArrayEquals(["Wenige sind auserw", "hlt."],
+                  "Wenige sind auserwählt.".split("ä"));
+
+assertArrayEquals(["Wenige sind ", "."],
+                  "Wenige sind auserwählt.".split("auserwählt"));
 
 /* "ab".split(/((?=.))/)
  *