Simplify code by removing special-casing for single-character patterns

Review URL: http://codereview.chromium.org/3276004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5380 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
sandholm@chromium.org 2010-08-31 09:22:53 +00:00
parent 663f378da5
commit 97ccb64a06

View File

@ -2818,40 +2818,6 @@ static int BoyerMooreIndexOf(Vector<const schar> subject,
}
template <typename schar>
static inline int SingleCharIndexOf(Vector<const schar> string,
schar pattern_char,
int start_index) {
if (sizeof(schar) == 1) {
const schar* pos = reinterpret_cast<const schar*>(
memchr(string.start() + start_index,
pattern_char,
string.length() - start_index));
if (pos == NULL) return -1;
return static_cast<int>(pos - string.start());
}
for (int i = start_index, n = string.length(); i < n; i++) {
if (pattern_char == string[i]) {
return i;
}
}
return -1;
}
template <typename schar>
static int SingleCharLastIndexOf(Vector<const schar> string,
schar pattern_char,
int start_index) {
for (int i = start_index; i >= 0; i--) {
if (pattern_char == string[i]) {
return i;
}
}
return -1;
}
// Trivial string search for shorter strings.
// On return, if "complete" is set to true, the return value is the
// final result of searching for the patter in the subject.
@ -2863,6 +2829,7 @@ static int SimpleIndexOf(Vector<const schar> subject,
Vector<const pchar> pattern,
int idx,
bool* complete) {
ASSERT(pattern.length() > 1);
// Badness is a count of how much work we have done. When we have
// done enough work we decide it's probably worth switching to a better
// algorithm.
@ -2925,12 +2892,12 @@ static int SimpleIndexOf(Vector<const schar> subject,
if (subject[i] != pattern_first_char) continue;
}
int j = 1;
do {
while (j < pattern.length()) {
if (pattern[j] != subject[i+j]) {
break;
}
j++;
} while (j < pattern.length());
}
if (j == pattern.length()) {
return i;
}
@ -3032,54 +2999,15 @@ int Runtime::StringMatch(Handle<String> sub,
int subject_length = sub->length();
if (start_index + pattern_length > subject_length) return -1;
if (!sub->IsFlat()) {
FlattenString(sub);
}
// Searching for one specific character is common. For one
// character patterns linear search is necessary, so any smart
// algorithm is unnecessary overhead.
if (pattern_length == 1) {
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
if (seq_sub->IsAsciiRepresentation()) {
uc16 pchar = pat->Get(0);
if (pchar > String::kMaxAsciiCharCode) {
return -1;
}
Vector<const char> ascii_vector =
seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
const void* pos = memchr(ascii_vector.start(),
static_cast<const char>(pchar),
static_cast<size_t>(ascii_vector.length()));
if (pos == NULL) {
return -1;
}
return static_cast<int>(reinterpret_cast<const char*>(pos)
- ascii_vector.start() + start_index);
}
return SingleCharIndexOf(seq_sub->ToUC16Vector(),
pat->Get(0),
start_index);
}
if (!pat->IsFlat()) {
FlattenString(pat);
}
if (!sub->IsFlat()) FlattenString(sub);
if (!pat->IsFlat()) FlattenString(pat);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining asciiness.
String* seq_sub = *sub;
if (seq_sub->IsConsString()) {
seq_sub = ConsString::cast(seq_sub)->first();
}
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
String* seq_pat = *pat;
if (seq_pat->IsConsString()) {
seq_pat = ConsString::cast(seq_pat)->first();
}
if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
// dispatch on type of strings
if (seq_pat->IsAsciiRepresentation()) {
@ -3169,30 +3097,8 @@ static Object* Runtime_StringLastIndexOf(Arguments args) {
return Smi::FromInt(start_index);
}
if (!sub->IsFlat()) {
FlattenString(sub);
}
if (pat_length == 1) {
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
if (sub->IsAsciiRepresentation()) {
uc16 pchar = pat->Get(0);
if (pchar > String::kMaxAsciiCharCode) {
return Smi::FromInt(-1);
}
return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(),
static_cast<char>(pat->Get(0)),
start_index));
} else {
return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(),
pat->Get(0),
start_index));
}
}
if (!pat->IsFlat()) {
FlattenString(pat);
}
if (!sub->IsFlat()) FlattenString(sub);
if (!pat->IsFlat()) FlattenString(pat);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
@ -3370,88 +3276,6 @@ static void SetLastMatchInfoNoCaptures(Handle<String> subject,
}
template <typename schar>
static bool SearchCharMultiple(Vector<schar> subject,
String* pattern,
schar pattern_char,
FixedArrayBuilder* builder,
int* match_pos) {
// Position of last match.
int pos = *match_pos;
int subject_length = subject.length();
while (pos < subject_length) {
int match_end = pos + 1;
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
if (new_pos >= 0) {
// Match has been found.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
}
if (pos + 1 < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
}
*match_pos = pos;
return true;
}
static bool SearchCharMultiple(Handle<String> subject,
Handle<String> pattern,
Handle<JSArray> last_match_info,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
ASSERT_EQ(1, pattern->length());
uc16 pattern_char = pattern->Get(0);
// Treating position before first as initial "previous match position".
int match_pos = -1;
for (;;) { // Break when search complete.
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
AssertNoAllocation no_gc;
if (subject->IsAsciiRepresentation()) {
if (pattern_char > String::kMaxAsciiCharCode) {
break;
}
Vector<const char> subject_vector = subject->ToAsciiVector();
char pattern_ascii_char = static_cast<char>(pattern_char);
bool complete = SearchCharMultiple<const char>(subject_vector,
*pattern,
pattern_ascii_char,
builder,
&match_pos);
if (complete) break;
} else {
Vector<const uc16> subject_vector = subject->ToUC16Vector();
bool complete = SearchCharMultiple<const uc16>(subject_vector,
*pattern,
pattern_char,
builder,
&match_pos);
if (complete) break;
}
}
if (match_pos >= 0) {
SetLastMatchInfoNoCaptures(subject,
last_match_info,
match_pos,
match_pos + 1);
return true;
}
return false; // No matches at all.
}
template <typename schar, typename pchar>
static bool SearchStringMultiple(Vector<schar> subject,
String* pattern,
@ -3529,7 +3353,6 @@ static bool SearchStringMultiple(Handle<String> subject,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
ASSERT(pattern->IsFlat());
ASSERT(pattern->length() > 1);
// Treating as if a previous match was before first character.
int match_pos = -pattern->length();
@ -3787,14 +3610,6 @@ static Object* Runtime_RegExpExecMultiple(Arguments args) {
if (regexp->TypeTag() == JSRegExp::ATOM) {
Handle<String> pattern(
String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
int pattern_length = pattern->length();
if (pattern_length == 1) {
if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
return *builder.ToJSArray(result_array);
}
return Heap::null_value();
}
if (!pattern->IsFlat()) FlattenString(pattern);
if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
return *builder.ToJSArray(result_array);
@ -5396,23 +5211,6 @@ void FindStringIndices(Vector<const schar> subject,
}
}
template <typename schar>
inline void FindCharIndices(Vector<const schar> subject,
const schar pattern_char,
ZoneList<int>* indices,
unsigned int limit) {
// Collect indices of pattern_char in subject, and the end-of-string index.
// Stop after finding at most limit values.
int index = 0;
while (limit > 0) {
index = SingleCharIndexOf(subject, pattern_char, index);
if (index < 0) return;
indices->Add(index);
index++;
limit--;
}
}
static Object* Runtime_StringSplit(Arguments args) {
ASSERT(args.length() == 3);
@ -5438,49 +5236,33 @@ static Object* Runtime_StringSplit(Arguments args) {
// Find (up to limit) indices of separator and end-of-string in subject
int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
ZoneList<int> indices(initial_capacity);
if (pattern_length == 1) {
// Special case, go directly to fast single-character split.
AssertNoAllocation nogc;
uc16 pattern_char = pattern->Get(0);
if (subject->IsTwoByteRepresentation()) {
FindCharIndices(subject->ToUC16Vector(), pattern_char,
&indices,
limit);
} else if (pattern_char <= String::kMaxAsciiCharCode) {
FindCharIndices(subject->ToAsciiVector(),
static_cast<char>(pattern_char),
&indices,
limit);
if (!pattern->IsFlat()) FlattenString(pattern);
AssertNoAllocation nogc;
if (subject->IsAsciiRepresentation()) {
Vector<const char> subject_vector = subject->ToAsciiVector();
if (pattern->IsAsciiRepresentation()) {
FindStringIndices(subject_vector,
pattern->ToAsciiVector(),
&indices,
limit);
} else {
FindStringIndices(subject_vector,
pattern->ToUC16Vector(),
&indices,
limit);
}
} else {
if (!pattern->IsFlat()) FlattenString(pattern);
AssertNoAllocation nogc;
if (subject->IsAsciiRepresentation()) {
Vector<const char> subject_vector = subject->ToAsciiVector();
if (pattern->IsAsciiRepresentation()) {
FindStringIndices(subject_vector,
pattern->ToAsciiVector(),
&indices,
limit);
} else {
FindStringIndices(subject_vector,
pattern->ToUC16Vector(),
&indices,
limit);
}
Vector<const uc16> subject_vector = subject->ToUC16Vector();
if (pattern->IsAsciiRepresentation()) {
FindStringIndices(subject_vector,
pattern->ToAsciiVector(),
&indices,
limit);
} else {
Vector<const uc16> subject_vector = subject->ToUC16Vector();
if (pattern->IsAsciiRepresentation()) {
FindStringIndices(subject_vector,
pattern->ToAsciiVector(),
&indices,
limit);
} else {
FindStringIndices(subject_vector,
pattern->ToUC16Vector(),
&indices,
limit);
}
FindStringIndices(subject_vector,
pattern->ToUC16Vector(),
&indices,
limit);
}
}
if (static_cast<uint32_t>(indices.length()) < limit) {