Simplify code by removing special-casing for single-character patterns
Review URL: http://codereview.chromium.org/3276004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5380 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
663f378da5
commit
97ccb64a06
284
src/runtime.cc
284
src/runtime.cc
@ -2818,40 +2818,6 @@ static int BoyerMooreIndexOf(Vector<const schar> subject,
|
||||
}
|
||||
|
||||
|
||||
template <typename schar>
|
||||
static inline int SingleCharIndexOf(Vector<const schar> string,
|
||||
schar pattern_char,
|
||||
int start_index) {
|
||||
if (sizeof(schar) == 1) {
|
||||
const schar* pos = reinterpret_cast<const schar*>(
|
||||
memchr(string.start() + start_index,
|
||||
pattern_char,
|
||||
string.length() - start_index));
|
||||
if (pos == NULL) return -1;
|
||||
return static_cast<int>(pos - string.start());
|
||||
}
|
||||
for (int i = start_index, n = string.length(); i < n; i++) {
|
||||
if (pattern_char == string[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
template <typename schar>
|
||||
static int SingleCharLastIndexOf(Vector<const schar> string,
|
||||
schar pattern_char,
|
||||
int start_index) {
|
||||
for (int i = start_index; i >= 0; i--) {
|
||||
if (pattern_char == string[i]) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
// Trivial string search for shorter strings.
|
||||
// On return, if "complete" is set to true, the return value is the
|
||||
// final result of searching for the patter in the subject.
|
||||
@ -2863,6 +2829,7 @@ static int SimpleIndexOf(Vector<const schar> subject,
|
||||
Vector<const pchar> pattern,
|
||||
int idx,
|
||||
bool* complete) {
|
||||
ASSERT(pattern.length() > 1);
|
||||
// Badness is a count of how much work we have done. When we have
|
||||
// done enough work we decide it's probably worth switching to a better
|
||||
// algorithm.
|
||||
@ -2925,12 +2892,12 @@ static int SimpleIndexOf(Vector<const schar> subject,
|
||||
if (subject[i] != pattern_first_char) continue;
|
||||
}
|
||||
int j = 1;
|
||||
do {
|
||||
while (j < pattern.length()) {
|
||||
if (pattern[j] != subject[i+j]) {
|
||||
break;
|
||||
}
|
||||
j++;
|
||||
} while (j < pattern.length());
|
||||
}
|
||||
if (j == pattern.length()) {
|
||||
return i;
|
||||
}
|
||||
@ -3032,54 +2999,15 @@ int Runtime::StringMatch(Handle<String> sub,
|
||||
int subject_length = sub->length();
|
||||
if (start_index + pattern_length > subject_length) return -1;
|
||||
|
||||
if (!sub->IsFlat()) {
|
||||
FlattenString(sub);
|
||||
}
|
||||
|
||||
// Searching for one specific character is common. For one
|
||||
// character patterns linear search is necessary, so any smart
|
||||
// algorithm is unnecessary overhead.
|
||||
if (pattern_length == 1) {
|
||||
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
|
||||
String* seq_sub = *sub;
|
||||
if (seq_sub->IsConsString()) {
|
||||
seq_sub = ConsString::cast(seq_sub)->first();
|
||||
}
|
||||
if (seq_sub->IsAsciiRepresentation()) {
|
||||
uc16 pchar = pat->Get(0);
|
||||
if (pchar > String::kMaxAsciiCharCode) {
|
||||
return -1;
|
||||
}
|
||||
Vector<const char> ascii_vector =
|
||||
seq_sub->ToAsciiVector().SubVector(start_index, subject_length);
|
||||
const void* pos = memchr(ascii_vector.start(),
|
||||
static_cast<const char>(pchar),
|
||||
static_cast<size_t>(ascii_vector.length()));
|
||||
if (pos == NULL) {
|
||||
return -1;
|
||||
}
|
||||
return static_cast<int>(reinterpret_cast<const char*>(pos)
|
||||
- ascii_vector.start() + start_index);
|
||||
}
|
||||
return SingleCharIndexOf(seq_sub->ToUC16Vector(),
|
||||
pat->Get(0),
|
||||
start_index);
|
||||
}
|
||||
|
||||
if (!pat->IsFlat()) {
|
||||
FlattenString(pat);
|
||||
}
|
||||
if (!sub->IsFlat()) FlattenString(sub);
|
||||
if (!pat->IsFlat()) FlattenString(pat);
|
||||
|
||||
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
|
||||
// Extract flattened substrings of cons strings before determining asciiness.
|
||||
String* seq_sub = *sub;
|
||||
if (seq_sub->IsConsString()) {
|
||||
seq_sub = ConsString::cast(seq_sub)->first();
|
||||
}
|
||||
if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first();
|
||||
String* seq_pat = *pat;
|
||||
if (seq_pat->IsConsString()) {
|
||||
seq_pat = ConsString::cast(seq_pat)->first();
|
||||
}
|
||||
if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first();
|
||||
|
||||
// dispatch on type of strings
|
||||
if (seq_pat->IsAsciiRepresentation()) {
|
||||
@ -3169,30 +3097,8 @@ static Object* Runtime_StringLastIndexOf(Arguments args) {
|
||||
return Smi::FromInt(start_index);
|
||||
}
|
||||
|
||||
if (!sub->IsFlat()) {
|
||||
FlattenString(sub);
|
||||
}
|
||||
|
||||
if (pat_length == 1) {
|
||||
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
|
||||
if (sub->IsAsciiRepresentation()) {
|
||||
uc16 pchar = pat->Get(0);
|
||||
if (pchar > String::kMaxAsciiCharCode) {
|
||||
return Smi::FromInt(-1);
|
||||
}
|
||||
return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(),
|
||||
static_cast<char>(pat->Get(0)),
|
||||
start_index));
|
||||
} else {
|
||||
return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(),
|
||||
pat->Get(0),
|
||||
start_index));
|
||||
}
|
||||
}
|
||||
|
||||
if (!pat->IsFlat()) {
|
||||
FlattenString(pat);
|
||||
}
|
||||
if (!sub->IsFlat()) FlattenString(sub);
|
||||
if (!pat->IsFlat()) FlattenString(pat);
|
||||
|
||||
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
|
||||
|
||||
@ -3370,88 +3276,6 @@ static void SetLastMatchInfoNoCaptures(Handle<String> subject,
|
||||
}
|
||||
|
||||
|
||||
template <typename schar>
|
||||
static bool SearchCharMultiple(Vector<schar> subject,
|
||||
String* pattern,
|
||||
schar pattern_char,
|
||||
FixedArrayBuilder* builder,
|
||||
int* match_pos) {
|
||||
// Position of last match.
|
||||
int pos = *match_pos;
|
||||
int subject_length = subject.length();
|
||||
while (pos < subject_length) {
|
||||
int match_end = pos + 1;
|
||||
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
|
||||
*match_pos = pos;
|
||||
return false;
|
||||
}
|
||||
int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
|
||||
if (new_pos >= 0) {
|
||||
// Match has been found.
|
||||
if (new_pos > match_end) {
|
||||
ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
|
||||
}
|
||||
pos = new_pos;
|
||||
builder->Add(pattern);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pos + 1 < subject_length) {
|
||||
ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
|
||||
}
|
||||
*match_pos = pos;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool SearchCharMultiple(Handle<String> subject,
|
||||
Handle<String> pattern,
|
||||
Handle<JSArray> last_match_info,
|
||||
FixedArrayBuilder* builder) {
|
||||
ASSERT(subject->IsFlat());
|
||||
ASSERT_EQ(1, pattern->length());
|
||||
uc16 pattern_char = pattern->Get(0);
|
||||
// Treating position before first as initial "previous match position".
|
||||
int match_pos = -1;
|
||||
|
||||
for (;;) { // Break when search complete.
|
||||
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
|
||||
AssertNoAllocation no_gc;
|
||||
if (subject->IsAsciiRepresentation()) {
|
||||
if (pattern_char > String::kMaxAsciiCharCode) {
|
||||
break;
|
||||
}
|
||||
Vector<const char> subject_vector = subject->ToAsciiVector();
|
||||
char pattern_ascii_char = static_cast<char>(pattern_char);
|
||||
bool complete = SearchCharMultiple<const char>(subject_vector,
|
||||
*pattern,
|
||||
pattern_ascii_char,
|
||||
builder,
|
||||
&match_pos);
|
||||
if (complete) break;
|
||||
} else {
|
||||
Vector<const uc16> subject_vector = subject->ToUC16Vector();
|
||||
bool complete = SearchCharMultiple<const uc16>(subject_vector,
|
||||
*pattern,
|
||||
pattern_char,
|
||||
builder,
|
||||
&match_pos);
|
||||
if (complete) break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match_pos >= 0) {
|
||||
SetLastMatchInfoNoCaptures(subject,
|
||||
last_match_info,
|
||||
match_pos,
|
||||
match_pos + 1);
|
||||
return true;
|
||||
}
|
||||
return false; // No matches at all.
|
||||
}
|
||||
|
||||
|
||||
template <typename schar, typename pchar>
|
||||
static bool SearchStringMultiple(Vector<schar> subject,
|
||||
String* pattern,
|
||||
@ -3529,7 +3353,6 @@ static bool SearchStringMultiple(Handle<String> subject,
|
||||
FixedArrayBuilder* builder) {
|
||||
ASSERT(subject->IsFlat());
|
||||
ASSERT(pattern->IsFlat());
|
||||
ASSERT(pattern->length() > 1);
|
||||
|
||||
// Treating as if a previous match was before first character.
|
||||
int match_pos = -pattern->length();
|
||||
@ -3787,14 +3610,6 @@ static Object* Runtime_RegExpExecMultiple(Arguments args) {
|
||||
if (regexp->TypeTag() == JSRegExp::ATOM) {
|
||||
Handle<String> pattern(
|
||||
String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
|
||||
int pattern_length = pattern->length();
|
||||
if (pattern_length == 1) {
|
||||
if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
|
||||
return *builder.ToJSArray(result_array);
|
||||
}
|
||||
return Heap::null_value();
|
||||
}
|
||||
|
||||
if (!pattern->IsFlat()) FlattenString(pattern);
|
||||
if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
|
||||
return *builder.ToJSArray(result_array);
|
||||
@ -5396,23 +5211,6 @@ void FindStringIndices(Vector<const schar> subject,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename schar>
|
||||
inline void FindCharIndices(Vector<const schar> subject,
|
||||
const schar pattern_char,
|
||||
ZoneList<int>* indices,
|
||||
unsigned int limit) {
|
||||
// Collect indices of pattern_char in subject, and the end-of-string index.
|
||||
// Stop after finding at most limit values.
|
||||
int index = 0;
|
||||
while (limit > 0) {
|
||||
index = SingleCharIndexOf(subject, pattern_char, index);
|
||||
if (index < 0) return;
|
||||
indices->Add(index);
|
||||
index++;
|
||||
limit--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static Object* Runtime_StringSplit(Arguments args) {
|
||||
ASSERT(args.length() == 3);
|
||||
@ -5438,49 +5236,33 @@ static Object* Runtime_StringSplit(Arguments args) {
|
||||
// Find (up to limit) indices of separator and end-of-string in subject
|
||||
int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
|
||||
ZoneList<int> indices(initial_capacity);
|
||||
if (pattern_length == 1) {
|
||||
// Special case, go directly to fast single-character split.
|
||||
AssertNoAllocation nogc;
|
||||
uc16 pattern_char = pattern->Get(0);
|
||||
if (subject->IsTwoByteRepresentation()) {
|
||||
FindCharIndices(subject->ToUC16Vector(), pattern_char,
|
||||
&indices,
|
||||
limit);
|
||||
} else if (pattern_char <= String::kMaxAsciiCharCode) {
|
||||
FindCharIndices(subject->ToAsciiVector(),
|
||||
static_cast<char>(pattern_char),
|
||||
&indices,
|
||||
limit);
|
||||
if (!pattern->IsFlat()) FlattenString(pattern);
|
||||
AssertNoAllocation nogc;
|
||||
if (subject->IsAsciiRepresentation()) {
|
||||
Vector<const char> subject_vector = subject->ToAsciiVector();
|
||||
if (pattern->IsAsciiRepresentation()) {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToAsciiVector(),
|
||||
&indices,
|
||||
limit);
|
||||
} else {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToUC16Vector(),
|
||||
&indices,
|
||||
limit);
|
||||
}
|
||||
} else {
|
||||
if (!pattern->IsFlat()) FlattenString(pattern);
|
||||
AssertNoAllocation nogc;
|
||||
if (subject->IsAsciiRepresentation()) {
|
||||
Vector<const char> subject_vector = subject->ToAsciiVector();
|
||||
if (pattern->IsAsciiRepresentation()) {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToAsciiVector(),
|
||||
&indices,
|
||||
limit);
|
||||
} else {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToUC16Vector(),
|
||||
&indices,
|
||||
limit);
|
||||
}
|
||||
Vector<const uc16> subject_vector = subject->ToUC16Vector();
|
||||
if (pattern->IsAsciiRepresentation()) {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToAsciiVector(),
|
||||
&indices,
|
||||
limit);
|
||||
} else {
|
||||
Vector<const uc16> subject_vector = subject->ToUC16Vector();
|
||||
if (pattern->IsAsciiRepresentation()) {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToAsciiVector(),
|
||||
&indices,
|
||||
limit);
|
||||
} else {
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToUC16Vector(),
|
||||
&indices,
|
||||
limit);
|
||||
}
|
||||
FindStringIndices(subject_vector,
|
||||
pattern->ToUC16Vector(),
|
||||
&indices,
|
||||
limit);
|
||||
}
|
||||
}
|
||||
if (static_cast<uint32_t>(indices.length()) < limit) {
|
||||
|
Loading…
Reference in New Issue
Block a user