Optimize non-ASCII string splitting with single-character search pattern

Review URL: https://chromiumcodereview.appspot.com/11299163
Patch from Ben Noordhuis <ben@c9.io>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2012-12-03 16:48:17 +00:00
parent 00bde58530
commit 702cc25def
2 changed files with 64 additions and 12 deletions

View File

@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
}
void FindTwoByteStringIndices(const Vector<const uc16> subject,
uc16 pattern,
ZoneList<int>* indices,
unsigned int limit,
Zone* zone) {
ASSERT(limit > 0);
const uc16* subject_start = subject.start();
const uc16* subject_end = subject_start + subject.length();
for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
if (*pos == pattern) {
indices->Add(static_cast<int>(pos - subject_start), zone);
limit--;
}
}
}
template <typename SubjectChar, typename PatternChar>
void FindStringIndices(Isolate* isolate,
Vector<const SubjectChar> subject,
@ -2826,20 +2843,38 @@ void FindStringIndicesDispatch(Isolate* isolate,
} else {
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern_content.IsAscii()) {
FindStringIndices(isolate,
subject_vector,
pattern_content.ToAsciiVector(),
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices,
limit,
zone);
} else {
FindStringIndices(isolate,
subject_vector,
pattern_content.ToUC16Vector(),
pattern_vector,
indices,
limit,
zone);
}
} else {
Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
if (pattern_vector.length() == 1) {
FindTwoByteStringIndices(subject_vector,
pattern_vector[0],
indices,
limit,
zone);
} else {
FindStringIndices(isolate,
subject_vector,
pattern_vector,
indices,
limit,
zone);
}
}
}
}
}

View File

@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" "));
assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
assertArrayEquals(["Wenige", "sind", "auserwählt."],
"Wenige sind auserwählt.".split(" ", 3));
assertArrayEquals(["Wenige sind auserw", "hlt."],
"Wenige sind auserwählt.".split("ä"));
assertArrayEquals(["Wenige sind ", "."],
"Wenige sind auserwählt.".split("auserwählt"));
/* "ab".split(/((?=.))/)
*