From 702cc25def18bbce84a6531b70c335cef03f7eed Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Mon, 3 Dec 2012 16:48:17 +0000 Subject: [PATCH] Optimize non-ASCII string splitting with single-character search pattern Review URL: https://chromiumcodereview.appspot.com/11299163 Patch from Ben Noordhuis . git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/runtime.cc | 59 ++++++++++++++++++++++++++++-------- test/mjsunit/string-split.js | 17 +++++++++++ 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/src/runtime.cc b/src/runtime.cc index 14bbae7911..78deb66468 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector subject, } +void FindTwoByteStringIndices(const Vector subject, + uc16 pattern, + ZoneList* indices, + unsigned int limit, + Zone* zone) { + ASSERT(limit > 0); + const uc16* subject_start = subject.start(); + const uc16* subject_end = subject_start + subject.length(); + for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) { + if (*pos == pattern) { + indices->Add(static_cast(pos - subject_start), zone); + limit--; + } + } +} + + template void FindStringIndices(Isolate* isolate, Vector subject, @@ -2826,19 +2843,37 @@ void FindStringIndicesDispatch(Isolate* isolate, } else { Vector subject_vector = subject_content.ToUC16Vector(); if (pattern_content.IsAscii()) { - FindStringIndices(isolate, - subject_vector, - pattern_content.ToAsciiVector(), - indices, - limit, - zone); + Vector pattern_vector = pattern_content.ToAsciiVector(); + if (pattern_vector.length() == 1) { + FindTwoByteStringIndices(subject_vector, + pattern_vector[0], + indices, + limit, + zone); + } else { + FindStringIndices(isolate, + subject_vector, + pattern_vector, + indices, + limit, + zone); + } } else { - FindStringIndices(isolate, - subject_vector, - pattern_content.ToUC16Vector(), - indices, - limit, - zone); + Vector pattern_vector = pattern_content.ToUC16Vector(); + if (pattern_vector.length() == 1) { + FindTwoByteStringIndices(subject_vector, + pattern_vector[0], + indices, + limit, + zone); + } else { + FindStringIndices(isolate, + subject_vector, + pattern_vector, + indices, + limit, + zone); + } } } } diff --git a/test/mjsunit/string-split.js b/test/mjsunit/string-split.js index d8412f0eed..1308244cab 100644 --- a/test/mjsunit/string-split.js +++ b/test/mjsunit/string-split.js @@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/ assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/)); +assertArrayEquals(["Wenige", "sind", "auserwählt."], + "Wenige sind auserwählt.".split(" ")); + +assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0)); + +assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1)); + +assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2)); + +assertArrayEquals(["Wenige", "sind", "auserwählt."], + "Wenige sind auserwählt.".split(" ", 3)); + +assertArrayEquals(["Wenige sind auserw", "hlt."], + "Wenige sind auserwählt.".split("ä")); + +assertArrayEquals(["Wenige sind ", "."], + "Wenige sind auserwählt.".split("auserwählt")); /* "ab".split(/((?=.))/) *