Refactor implementation for String.prototype.replace.

R=ulan@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/12177015

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13761 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2013-02-27 14:14:45 +00:00
parent fb807eeb0d
commit 4cbe7100e6
5 changed files with 160 additions and 97 deletions

View File

@ -687,7 +687,7 @@ Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
Handle<String> subject,
int capture_count,
int32_t* match) {
CHECK(last_match_info->HasFastObjectElements());
ASSERT(last_match_info->HasFastObjectElements());
int capture_register_count = (capture_count + 1) * 2;
last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
AssertNoAllocation no_gc;

View File

@ -2898,7 +2898,7 @@ void FindStringIndicesDispatch(Isolate* isolate,
template<typename ResultSeqString>
MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalAtomRegExpWithString(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> pattern_regexp,
@ -2921,9 +2921,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
isolate, *subject, pattern, &indices, 0xffffffff, zone);
int matches = indices.length();
if (matches == 0) {
return isolate->heap()->undefined_value();
}
if (matches == 0) return *subject;
// Detect integer overflow.
int64_t result_len_64 =
@ -2983,7 +2981,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
}
MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithString(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
@ -2992,7 +2990,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
ASSERT(subject->IsFlat());
ASSERT(replacement->IsFlat());
bool is_global = regexp->GetFlags().is_global();
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
@ -3005,33 +3002,30 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
subject_length);
// Shortcut for simple non-regexp global replacements
if (is_global &&
regexp->TypeTag() == JSRegExp::ATOM &&
simple_replace) {
if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
if (subject->IsOneByteConvertible() &&
replacement->IsOneByteConvertible()) {
return StringReplaceAtomRegExpWithString<SeqOneByteString>(
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
isolate, subject, regexp, replacement, last_match_info);
} else {
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
isolate, subject, regexp, replacement, last_match_info);
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return Failure::Exception();
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) {
if (global_cache.HasException()) return Failure::Exception();
return isolate->heap()->undefined_value();
return *subject;
}
// Guessing the number of parts that the final result string is built
// from. Global regexps can match any number of times, so we guess
// conservatively.
int expected_parts =
(compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
ReplacementStringBuilder builder(isolate->heap(),
subject,
expected_parts);
@ -3063,9 +3057,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
}
prev = end;
// Only continue checking for global regexps.
if (!is_global) break;
current_match = global_cache.FetchNext();
} while (current_match != NULL);
@ -3086,43 +3077,32 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
template <typename ResultSeqString>
MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithEmptyString(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_info) {
ASSERT(subject->IsFlat());
bool is_global = regexp->GetFlags().is_global();
// Shortcut for simple non-regexp global replacements
if (is_global &&
regexp->TypeTag() == JSRegExp::ATOM) {
if (regexp->TypeTag() == JSRegExp::ATOM) {
Handle<String> empty_string = isolate->factory()->empty_string();
if (subject->IsOneByteRepresentation()) {
return StringReplaceAtomRegExpWithString<SeqOneByteString>(
isolate,
subject,
regexp,
empty_string,
last_match_info);
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
isolate, subject, regexp, empty_string, last_match_info);
} else {
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
isolate,
subject,
regexp,
empty_string,
last_match_info);
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
isolate, subject, regexp, empty_string, last_match_info);
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return Failure::Exception();
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) {
if (global_cache.HasException()) return Failure::Exception();
return isolate->heap()->undefined_value();
return *subject;
}
int start = current_match[0];
@ -3142,23 +3122,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
isolate->factory()->NewRawTwoByteString(new_length));
}
if (!is_global) {
RegExpImpl::SetLastMatchInfo(
last_match_info, subject, capture_count, current_match);
if (start == end) {
return *subject;
} else {
if (start > 0) {
String::WriteToFlat(*subject, answer->GetChars(), 0, start);
}
if (end < subject_length) {
String::WriteToFlat(
*subject, answer->GetChars() + start, end, subject_length);
}
return *answer;
}
}
int prev = 0;
int position = 0;
@ -3167,8 +3130,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
end = current_match[1];
if (prev < start) {
// Add substring subject[prev;start] to answer string.
String::WriteToFlat(
*subject, answer->GetChars() + position, prev, start);
String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
position += start - prev;
}
prev = end;
@ -3210,7 +3172,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
}
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceGlobalRegExpWithString) {
ASSERT(args.length() == 4);
HandleScope scope(isolate);
@ -3220,21 +3182,23 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
if (!subject->IsFlat()) subject = FlattenGetString(subject);
ASSERT(regexp->GetFlags().is_global());
if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
if (!subject->IsFlat()) subject = FlattenGetString(subject);
if (replacement->length() == 0) {
if (subject->IsOneByteConvertible()) {
return StringReplaceRegExpWithEmptyString<SeqOneByteString>(
return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
isolate, subject, regexp, last_match_info);
} else {
return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
isolate, subject, regexp, last_match_info);
}
}
return StringReplaceRegExpWithString(
if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
return StringReplaceGlobalRegExpWithString(
isolate, subject, regexp, replacement, last_match_info);
}

View File

@ -202,7 +202,7 @@ namespace internal {
F(StringLastIndexOf, 3, 1) \
F(StringLocaleCompare, 2, 1) \
F(SubString, 3, 1) \
F(StringReplaceRegExpWithString, 4, 1) \
F(StringReplaceGlobalRegExpWithString, 4, 1) \
F(StringReplaceOneCharWithString, 3, 1) \
F(StringMatch, 3, 1) \
F(StringTrim, 3, 1) \

View File

@ -219,60 +219,79 @@ function StringReplace(search, replace) {
}
var subject = TO_STRING_INLINE(this);
// Delegate to one of the regular expression variants if necessary.
// Decision tree for dispatch
// .. regexp search
// .... string replace
// ...... non-global search
// ........ empty string replace
// ........ non-empty string replace (with $-expansion)
// ...... global search
// ........ no need to circumvent last match info override
// ........ need to circument last match info override
// .... function replace
// ...... global search
// ...... non-global search
// .. string search
// .... special case that replaces with one single character
// ...... function replace
// ...... string replace (with $-expansion)
if (IS_REGEXP(search)) {
// Emulate RegExp.prototype.exec's side effect in step 5, even though
// Emulate RegExp.prototype.exec's side effect in step 5, even if
// value is discarded.
ToInteger(search.lastIndex);
%_Log('regexp', 'regexp-replace,%0r,%1S', [search, subject]);
if (IS_SPEC_FUNCTION(replace)) {
if (search.global) {
return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
} else {
return StringReplaceNonGlobalRegExpWithFunction(subject,
search,
replace);
}
} else {
if (lastMatchInfoOverride == null) {
var answer = %StringReplaceRegExpWithString(subject,
search,
TO_STRING_INLINE(replace),
lastMatchInfo);
if (IS_UNDEFINED(answer)) { // No match. Return subject string.
search.lastIndex = 0;
if (!IS_SPEC_FUNCTION(replace)) {
if (!search.global) {
// Non-global regexp search, string replace.
var match = DoRegExpExec(search, subject, 0);
if (match == null) {
search.lastIndex = 0
return subject;
}
if (search.global) search.lastIndex = 0;
return answer;
replace = TO_STRING_INLINE(replace);
if (replace.length == 0) {
return %_SubString(subject, 0, match[CAPTURE0]) +
%_SubString(subject, match[CAPTURE1], subject.length)
}
return ExpandReplacement(replace, subject, lastMatchInfo,
%_SubString(subject, 0, match[CAPTURE0])) +
%_SubString(subject, match[CAPTURE1], subject.length);
}
// Global regexp search, string replace.
search.lastIndex = 0;
if (lastMatchInfoOverride == null) {
return %StringReplaceGlobalRegExpWithString(
subject, search, replace, lastMatchInfo);
} else {
// We use this hack to detect whether StringReplaceRegExpWithString
// found at least one hit. In that case we need to remove any
// override.
var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX];
lastMatchInfo[LAST_SUBJECT_INDEX] = 0;
var answer = %StringReplaceRegExpWithString(subject,
search,
TO_STRING_INLINE(replace),
lastMatchInfo);
if (IS_UNDEFINED(answer)) { // No match. Return subject string.
search.lastIndex = 0;
lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
return subject;
}
var answer = %StringReplaceGlobalRegExpWithString(
subject, search, replace, lastMatchInfo);
if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) {
lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
} else {
lastMatchInfoOverride = null;
}
if (search.global) search.lastIndex = 0;
return answer;
}
}
if (search.global) {
// Global regexp search, function replace.
return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
}
// Non-global regexp search, function replace.
return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
}
// Convert the search argument to a string and search for it.
search = TO_STRING_INLINE(search);
if (search.length == 1 &&
subject.length > 0xFF &&
IS_STRING(replace) &&
@ -295,8 +314,10 @@ function StringReplace(search, replace) {
} else {
reusableMatchInfo[CAPTURE0] = start;
reusableMatchInfo[CAPTURE1] = end;
replace = TO_STRING_INLINE(replace);
result = ExpandReplacement(replace, subject, reusableMatchInfo, result);
result = ExpandReplacement(TO_STRING_INLINE(replace),
subject,
reusableMatchInfo,
result);
}
return result + %_SubString(subject, end, subject.length);
@ -333,6 +354,31 @@ function ExpandReplacement(string, subject, matchInfo, result) {
} else if (peek == 39) { // $' - suffix
++position;
result += %_SubString(subject, matchInfo[CAPTURE1], subject.length);
} else if (peek >= 48 && peek <= 57) {
// Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
var scaled_index = (peek - 48) << 1;
var advance = 1;
var number_of_captures = NUMBER_OF_CAPTURES(matchInfo);
if (position + 1 < string.length) {
var next = %_StringCharCodeAt(string, position + 1);
if (next >= 48 && next <= 57) {
var new_scaled_index = scaled_index * 10 + ((next - 48) << 1);
if (new_scaled_index < number_of_captures) {
scaled_index = new_scaled_index;
advance = 2;
}
}
}
if (scaled_index != 0 && scaled_index < number_of_captures) {
var start = matchInfo[CAPTURE(scaled_index)];
if (start >= 0) {
result +=
%_SubString(subject, start, matchInfo[CAPTURE(scaled_index + 1)]);
}
position += advance;
} else {
result += '$';
}
} else {
result += '$';
}

View File

@ -212,3 +212,56 @@ var str = 'She sells seashells by the seashore.';
var re = /sh/g;
assertEquals('She sells sea$schells by the sea$schore.',
str.replace(re,"$$" + 'sch'))
var replace_obj = { length: 0, toString: function() { return "x"; }};
assertEquals("axc", "abc".replace(/b/, replace_obj));
var search_obj = { length: 1, toString: function() { return "b"; }};
assertEquals("axc", "abc".replace(search_obj, function() { return "x"; }));
var regexp99pattern = "";
var subject = "";
for (var i = 0; i < 99; i++) {
regexp99pattern += "(.)";
subject += String.fromCharCode(i + 24);
}
function testIndices99(re) {
// Test $1 .. $99
for (var i = 1; i < 100; i++) {
assertEquals(String.fromCharCode(i + 23),
subject.replace(re, "$" + i));
}
// Test $01 .. $09
for (var i = 1; i < 10; i++) {
assertEquals(String.fromCharCode(i + 23),
subject.replace(re, "$0" + i));
}
assertEquals("$0", subject.replace(re, "$0"));
assertEquals("$00", subject.replace(re, "$00"));
assertEquals(String.fromCharCode(10 + 23) + "0",
subject.replace(re, "$100"));
}
testIndices99(new RegExp(regexp99pattern));
testIndices99(new RegExp(regexp99pattern, "g"));
var regexp59pattern = "";
for (var i = 0; i < 59; i++) regexp59pattern += "(.)";
function testIndices59(re) {
// Test $60 .. $99. Captures reach up to 59. Per spec, how to deal
// with this is implementation-dependent. We interpret $60 as $6
// followed by "0", $61 as $6, followed by "1" and so on.
var tail = subject.substr(59);
for (var i = 60; i < 100; i++) {
assertEquals(String.fromCharCode(i / 10 + 23) + (i % 10) + tail,
subject.replace(re, "$" + i));
}
}
testIndices59(new RegExp(regexp59pattern));
testIndices59(new RegExp(regexp59pattern, "g"));