Optimize non-ASCII string splitting with single-character search pattern
Review URL: https://chromiumcodereview.appspot.com/11299163 Patch from Ben Noordhuis <ben@c9.io>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13119 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
00bde58530
commit
702cc25def
@ -2762,6 +2762,23 @@ void FindAsciiStringIndices(Vector<const char> subject,
|
||||
}
|
||||
|
||||
|
||||
void FindTwoByteStringIndices(const Vector<const uc16> subject,
|
||||
uc16 pattern,
|
||||
ZoneList<int>* indices,
|
||||
unsigned int limit,
|
||||
Zone* zone) {
|
||||
ASSERT(limit > 0);
|
||||
const uc16* subject_start = subject.start();
|
||||
const uc16* subject_end = subject_start + subject.length();
|
||||
for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
|
||||
if (*pos == pattern) {
|
||||
indices->Add(static_cast<int>(pos - subject_start), zone);
|
||||
limit--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename SubjectChar, typename PatternChar>
|
||||
void FindStringIndices(Isolate* isolate,
|
||||
Vector<const SubjectChar> subject,
|
||||
@ -2826,19 +2843,37 @@ void FindStringIndicesDispatch(Isolate* isolate,
|
||||
} else {
|
||||
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
|
||||
if (pattern_content.IsAscii()) {
|
||||
FindStringIndices(isolate,
|
||||
subject_vector,
|
||||
pattern_content.ToAsciiVector(),
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
Vector<const char> pattern_vector = pattern_content.ToAsciiVector();
|
||||
if (pattern_vector.length() == 1) {
|
||||
FindTwoByteStringIndices(subject_vector,
|
||||
pattern_vector[0],
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
} else {
|
||||
FindStringIndices(isolate,
|
||||
subject_vector,
|
||||
pattern_vector,
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
}
|
||||
} else {
|
||||
FindStringIndices(isolate,
|
||||
subject_vector,
|
||||
pattern_content.ToUC16Vector(),
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
|
||||
if (pattern_vector.length() == 1) {
|
||||
FindTwoByteStringIndices(subject_vector,
|
||||
pattern_vector[0],
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
} else {
|
||||
FindStringIndices(isolate,
|
||||
subject_vector,
|
||||
pattern_vector,
|
||||
indices,
|
||||
limit,
|
||||
zone);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,6 +66,23 @@ assertArrayEquals(["div", "#i", "d", ".class"], "div#id.class".split(/(?=[d#.])/
|
||||
|
||||
assertArrayEquals(["a", "b", "c"], "abc".split(/(?=.)/));
|
||||
|
||||
assertArrayEquals(["Wenige", "sind", "auserwählt."],
|
||||
"Wenige sind auserwählt.".split(" "));
|
||||
|
||||
assertArrayEquals([], "Wenige sind auserwählt.".split(" ", 0));
|
||||
|
||||
assertArrayEquals(["Wenige"], "Wenige sind auserwählt.".split(" ", 1));
|
||||
|
||||
assertArrayEquals(["Wenige", "sind"], "Wenige sind auserwählt.".split(" ", 2));
|
||||
|
||||
assertArrayEquals(["Wenige", "sind", "auserwählt."],
|
||||
"Wenige sind auserwählt.".split(" ", 3));
|
||||
|
||||
assertArrayEquals(["Wenige sind auserw", "hlt."],
|
||||
"Wenige sind auserwählt.".split("ä"));
|
||||
|
||||
assertArrayEquals(["Wenige sind ", "."],
|
||||
"Wenige sind auserwählt.".split("auserwählt"));
|
||||
|
||||
/* "ab".split(/((?=.))/)
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user