Refactor implementation for String.prototype.replace.
R=ulan@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/12177015 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13761 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
fb807eeb0d
commit
4cbe7100e6
@ -687,7 +687,7 @@ Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
|
||||
Handle<String> subject,
|
||||
int capture_count,
|
||||
int32_t* match) {
|
||||
CHECK(last_match_info->HasFastObjectElements());
|
||||
ASSERT(last_match_info->HasFastObjectElements());
|
||||
int capture_register_count = (capture_count + 1) * 2;
|
||||
last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
|
||||
AssertNoAllocation no_gc;
|
||||
|
@ -2898,7 +2898,7 @@ void FindStringIndicesDispatch(Isolate* isolate,
|
||||
|
||||
|
||||
template<typename ResultSeqString>
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalAtomRegExpWithString(
|
||||
Isolate* isolate,
|
||||
Handle<String> subject,
|
||||
Handle<JSRegExp> pattern_regexp,
|
||||
@ -2921,9 +2921,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
|
||||
isolate, *subject, pattern, &indices, 0xffffffff, zone);
|
||||
|
||||
int matches = indices.length();
|
||||
if (matches == 0) {
|
||||
return isolate->heap()->undefined_value();
|
||||
}
|
||||
if (matches == 0) return *subject;
|
||||
|
||||
// Detect integer overflow.
|
||||
int64_t result_len_64 =
|
||||
@ -2983,7 +2981,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
|
||||
}
|
||||
|
||||
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithString(
|
||||
Isolate* isolate,
|
||||
Handle<String> subject,
|
||||
Handle<JSRegExp> regexp,
|
||||
@ -2992,7 +2990,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
|
||||
ASSERT(subject->IsFlat());
|
||||
ASSERT(replacement->IsFlat());
|
||||
|
||||
bool is_global = regexp->GetFlags().is_global();
|
||||
int capture_count = regexp->CaptureCount();
|
||||
int subject_length = subject->length();
|
||||
|
||||
@ -3005,33 +3002,30 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
|
||||
subject_length);
|
||||
|
||||
// Shortcut for simple non-regexp global replacements
|
||||
if (is_global &&
|
||||
regexp->TypeTag() == JSRegExp::ATOM &&
|
||||
simple_replace) {
|
||||
if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
|
||||
if (subject->IsOneByteConvertible() &&
|
||||
replacement->IsOneByteConvertible()) {
|
||||
return StringReplaceAtomRegExpWithString<SeqOneByteString>(
|
||||
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
|
||||
isolate, subject, regexp, replacement, last_match_info);
|
||||
} else {
|
||||
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
|
||||
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
|
||||
isolate, subject, regexp, replacement, last_match_info);
|
||||
}
|
||||
}
|
||||
|
||||
RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
|
||||
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
|
||||
if (global_cache.HasException()) return Failure::Exception();
|
||||
|
||||
int32_t* current_match = global_cache.FetchNext();
|
||||
if (current_match == NULL) {
|
||||
if (global_cache.HasException()) return Failure::Exception();
|
||||
return isolate->heap()->undefined_value();
|
||||
return *subject;
|
||||
}
|
||||
|
||||
// Guessing the number of parts that the final result string is built
|
||||
// from. Global regexps can match any number of times, so we guess
|
||||
// conservatively.
|
||||
int expected_parts =
|
||||
(compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
|
||||
int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
|
||||
ReplacementStringBuilder builder(isolate->heap(),
|
||||
subject,
|
||||
expected_parts);
|
||||
@ -3063,9 +3057,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
|
||||
}
|
||||
prev = end;
|
||||
|
||||
// Only continue checking for global regexps.
|
||||
if (!is_global) break;
|
||||
|
||||
current_match = global_cache.FetchNext();
|
||||
} while (current_match != NULL);
|
||||
|
||||
@ -3086,43 +3077,32 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
|
||||
|
||||
|
||||
template <typename ResultSeqString>
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
|
||||
MUST_USE_RESULT static MaybeObject* StringReplaceGlobalRegExpWithEmptyString(
|
||||
Isolate* isolate,
|
||||
Handle<String> subject,
|
||||
Handle<JSRegExp> regexp,
|
||||
Handle<JSArray> last_match_info) {
|
||||
ASSERT(subject->IsFlat());
|
||||
|
||||
bool is_global = regexp->GetFlags().is_global();
|
||||
|
||||
// Shortcut for simple non-regexp global replacements
|
||||
if (is_global &&
|
||||
regexp->TypeTag() == JSRegExp::ATOM) {
|
||||
if (regexp->TypeTag() == JSRegExp::ATOM) {
|
||||
Handle<String> empty_string = isolate->factory()->empty_string();
|
||||
if (subject->IsOneByteRepresentation()) {
|
||||
return StringReplaceAtomRegExpWithString<SeqOneByteString>(
|
||||
isolate,
|
||||
subject,
|
||||
regexp,
|
||||
empty_string,
|
||||
last_match_info);
|
||||
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
|
||||
isolate, subject, regexp, empty_string, last_match_info);
|
||||
} else {
|
||||
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
|
||||
isolate,
|
||||
subject,
|
||||
regexp,
|
||||
empty_string,
|
||||
last_match_info);
|
||||
return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
|
||||
isolate, subject, regexp, empty_string, last_match_info);
|
||||
}
|
||||
}
|
||||
|
||||
RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate);
|
||||
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
|
||||
if (global_cache.HasException()) return Failure::Exception();
|
||||
|
||||
int32_t* current_match = global_cache.FetchNext();
|
||||
if (current_match == NULL) {
|
||||
if (global_cache.HasException()) return Failure::Exception();
|
||||
return isolate->heap()->undefined_value();
|
||||
return *subject;
|
||||
}
|
||||
|
||||
int start = current_match[0];
|
||||
@ -3142,23 +3122,6 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
|
||||
isolate->factory()->NewRawTwoByteString(new_length));
|
||||
}
|
||||
|
||||
if (!is_global) {
|
||||
RegExpImpl::SetLastMatchInfo(
|
||||
last_match_info, subject, capture_count, current_match);
|
||||
if (start == end) {
|
||||
return *subject;
|
||||
} else {
|
||||
if (start > 0) {
|
||||
String::WriteToFlat(*subject, answer->GetChars(), 0, start);
|
||||
}
|
||||
if (end < subject_length) {
|
||||
String::WriteToFlat(
|
||||
*subject, answer->GetChars() + start, end, subject_length);
|
||||
}
|
||||
return *answer;
|
||||
}
|
||||
}
|
||||
|
||||
int prev = 0;
|
||||
int position = 0;
|
||||
|
||||
@ -3167,8 +3130,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
|
||||
end = current_match[1];
|
||||
if (prev < start) {
|
||||
// Add substring subject[prev;start] to answer string.
|
||||
String::WriteToFlat(
|
||||
*subject, answer->GetChars() + position, prev, start);
|
||||
String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
|
||||
position += start - prev;
|
||||
}
|
||||
prev = end;
|
||||
@ -3210,7 +3172,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
|
||||
}
|
||||
|
||||
|
||||
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
|
||||
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceGlobalRegExpWithString) {
|
||||
ASSERT(args.length() == 4);
|
||||
|
||||
HandleScope scope(isolate);
|
||||
@ -3220,21 +3182,23 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) {
|
||||
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
|
||||
|
||||
if (!subject->IsFlat()) subject = FlattenGetString(subject);
|
||||
ASSERT(regexp->GetFlags().is_global());
|
||||
|
||||
if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
|
||||
if (!subject->IsFlat()) subject = FlattenGetString(subject);
|
||||
|
||||
if (replacement->length() == 0) {
|
||||
if (subject->IsOneByteConvertible()) {
|
||||
return StringReplaceRegExpWithEmptyString<SeqOneByteString>(
|
||||
return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
|
||||
isolate, subject, regexp, last_match_info);
|
||||
} else {
|
||||
return StringReplaceRegExpWithEmptyString<SeqTwoByteString>(
|
||||
return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
|
||||
isolate, subject, regexp, last_match_info);
|
||||
}
|
||||
}
|
||||
|
||||
return StringReplaceRegExpWithString(
|
||||
if (!replacement->IsFlat()) replacement = FlattenGetString(replacement);
|
||||
|
||||
return StringReplaceGlobalRegExpWithString(
|
||||
isolate, subject, regexp, replacement, last_match_info);
|
||||
}
|
||||
|
||||
|
@ -202,7 +202,7 @@ namespace internal {
|
||||
F(StringLastIndexOf, 3, 1) \
|
||||
F(StringLocaleCompare, 2, 1) \
|
||||
F(SubString, 3, 1) \
|
||||
F(StringReplaceRegExpWithString, 4, 1) \
|
||||
F(StringReplaceGlobalRegExpWithString, 4, 1) \
|
||||
F(StringReplaceOneCharWithString, 3, 1) \
|
||||
F(StringMatch, 3, 1) \
|
||||
F(StringTrim, 3, 1) \
|
||||
|
112
src/string.js
112
src/string.js
@ -219,60 +219,79 @@ function StringReplace(search, replace) {
|
||||
}
|
||||
var subject = TO_STRING_INLINE(this);
|
||||
|
||||
// Delegate to one of the regular expression variants if necessary.
|
||||
// Decision tree for dispatch
|
||||
// .. regexp search
|
||||
// .... string replace
|
||||
// ...... non-global search
|
||||
// ........ empty string replace
|
||||
// ........ non-empty string replace (with $-expansion)
|
||||
// ...... global search
|
||||
// ........ no need to circumvent last match info override
|
||||
// ........ need to circument last match info override
|
||||
// .... function replace
|
||||
// ...... global search
|
||||
// ...... non-global search
|
||||
// .. string search
|
||||
// .... special case that replaces with one single character
|
||||
// ...... function replace
|
||||
// ...... string replace (with $-expansion)
|
||||
|
||||
if (IS_REGEXP(search)) {
|
||||
// Emulate RegExp.prototype.exec's side effect in step 5, even though
|
||||
// Emulate RegExp.prototype.exec's side effect in step 5, even if
|
||||
// value is discarded.
|
||||
ToInteger(search.lastIndex);
|
||||
%_Log('regexp', 'regexp-replace,%0r,%1S', [search, subject]);
|
||||
if (IS_SPEC_FUNCTION(replace)) {
|
||||
if (search.global) {
|
||||
return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
|
||||
} else {
|
||||
return StringReplaceNonGlobalRegExpWithFunction(subject,
|
||||
search,
|
||||
replace);
|
||||
}
|
||||
} else {
|
||||
if (lastMatchInfoOverride == null) {
|
||||
var answer = %StringReplaceRegExpWithString(subject,
|
||||
search,
|
||||
TO_STRING_INLINE(replace),
|
||||
lastMatchInfo);
|
||||
if (IS_UNDEFINED(answer)) { // No match. Return subject string.
|
||||
search.lastIndex = 0;
|
||||
|
||||
if (!IS_SPEC_FUNCTION(replace)) {
|
||||
if (!search.global) {
|
||||
// Non-global regexp search, string replace.
|
||||
var match = DoRegExpExec(search, subject, 0);
|
||||
if (match == null) {
|
||||
search.lastIndex = 0
|
||||
return subject;
|
||||
}
|
||||
if (search.global) search.lastIndex = 0;
|
||||
return answer;
|
||||
replace = TO_STRING_INLINE(replace);
|
||||
if (replace.length == 0) {
|
||||
return %_SubString(subject, 0, match[CAPTURE0]) +
|
||||
%_SubString(subject, match[CAPTURE1], subject.length)
|
||||
}
|
||||
return ExpandReplacement(replace, subject, lastMatchInfo,
|
||||
%_SubString(subject, 0, match[CAPTURE0])) +
|
||||
%_SubString(subject, match[CAPTURE1], subject.length);
|
||||
}
|
||||
|
||||
// Global regexp search, string replace.
|
||||
search.lastIndex = 0;
|
||||
if (lastMatchInfoOverride == null) {
|
||||
return %StringReplaceGlobalRegExpWithString(
|
||||
subject, search, replace, lastMatchInfo);
|
||||
} else {
|
||||
// We use this hack to detect whether StringReplaceRegExpWithString
|
||||
// found at least one hit. In that case we need to remove any
|
||||
// override.
|
||||
var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX];
|
||||
lastMatchInfo[LAST_SUBJECT_INDEX] = 0;
|
||||
var answer = %StringReplaceRegExpWithString(subject,
|
||||
search,
|
||||
TO_STRING_INLINE(replace),
|
||||
lastMatchInfo);
|
||||
if (IS_UNDEFINED(answer)) { // No match. Return subject string.
|
||||
search.lastIndex = 0;
|
||||
lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
|
||||
return subject;
|
||||
}
|
||||
var answer = %StringReplaceGlobalRegExpWithString(
|
||||
subject, search, replace, lastMatchInfo);
|
||||
if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) {
|
||||
lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
|
||||
} else {
|
||||
lastMatchInfoOverride = null;
|
||||
}
|
||||
if (search.global) search.lastIndex = 0;
|
||||
return answer;
|
||||
}
|
||||
}
|
||||
|
||||
if (search.global) {
|
||||
// Global regexp search, function replace.
|
||||
return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
|
||||
}
|
||||
// Non-global regexp search, function replace.
|
||||
return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
|
||||
}
|
||||
|
||||
// Convert the search argument to a string and search for it.
|
||||
search = TO_STRING_INLINE(search);
|
||||
|
||||
if (search.length == 1 &&
|
||||
subject.length > 0xFF &&
|
||||
IS_STRING(replace) &&
|
||||
@ -295,8 +314,10 @@ function StringReplace(search, replace) {
|
||||
} else {
|
||||
reusableMatchInfo[CAPTURE0] = start;
|
||||
reusableMatchInfo[CAPTURE1] = end;
|
||||
replace = TO_STRING_INLINE(replace);
|
||||
result = ExpandReplacement(replace, subject, reusableMatchInfo, result);
|
||||
result = ExpandReplacement(TO_STRING_INLINE(replace),
|
||||
subject,
|
||||
reusableMatchInfo,
|
||||
result);
|
||||
}
|
||||
|
||||
return result + %_SubString(subject, end, subject.length);
|
||||
@ -333,6 +354,31 @@ function ExpandReplacement(string, subject, matchInfo, result) {
|
||||
} else if (peek == 39) { // $' - suffix
|
||||
++position;
|
||||
result += %_SubString(subject, matchInfo[CAPTURE1], subject.length);
|
||||
} else if (peek >= 48 && peek <= 57) {
|
||||
// Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
|
||||
var scaled_index = (peek - 48) << 1;
|
||||
var advance = 1;
|
||||
var number_of_captures = NUMBER_OF_CAPTURES(matchInfo);
|
||||
if (position + 1 < string.length) {
|
||||
var next = %_StringCharCodeAt(string, position + 1);
|
||||
if (next >= 48 && next <= 57) {
|
||||
var new_scaled_index = scaled_index * 10 + ((next - 48) << 1);
|
||||
if (new_scaled_index < number_of_captures) {
|
||||
scaled_index = new_scaled_index;
|
||||
advance = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (scaled_index != 0 && scaled_index < number_of_captures) {
|
||||
var start = matchInfo[CAPTURE(scaled_index)];
|
||||
if (start >= 0) {
|
||||
result +=
|
||||
%_SubString(subject, start, matchInfo[CAPTURE(scaled_index + 1)]);
|
||||
}
|
||||
position += advance;
|
||||
} else {
|
||||
result += '$';
|
||||
}
|
||||
} else {
|
||||
result += '$';
|
||||
}
|
||||
|
@ -212,3 +212,56 @@ var str = 'She sells seashells by the seashore.';
|
||||
var re = /sh/g;
|
||||
assertEquals('She sells sea$schells by the sea$schore.',
|
||||
str.replace(re,"$$" + 'sch'))
|
||||
|
||||
|
||||
var replace_obj = { length: 0, toString: function() { return "x"; }};
|
||||
assertEquals("axc", "abc".replace(/b/, replace_obj));
|
||||
|
||||
var search_obj = { length: 1, toString: function() { return "b"; }};
|
||||
assertEquals("axc", "abc".replace(search_obj, function() { return "x"; }));
|
||||
|
||||
var regexp99pattern = "";
|
||||
var subject = "";
|
||||
for (var i = 0; i < 99; i++) {
|
||||
regexp99pattern += "(.)";
|
||||
subject += String.fromCharCode(i + 24);
|
||||
}
|
||||
|
||||
function testIndices99(re) {
|
||||
// Test $1 .. $99
|
||||
for (var i = 1; i < 100; i++) {
|
||||
assertEquals(String.fromCharCode(i + 23),
|
||||
subject.replace(re, "$" + i));
|
||||
}
|
||||
|
||||
// Test $01 .. $09
|
||||
for (var i = 1; i < 10; i++) {
|
||||
assertEquals(String.fromCharCode(i + 23),
|
||||
subject.replace(re, "$0" + i));
|
||||
}
|
||||
|
||||
assertEquals("$0", subject.replace(re, "$0"));
|
||||
assertEquals("$00", subject.replace(re, "$00"));
|
||||
assertEquals(String.fromCharCode(10 + 23) + "0",
|
||||
subject.replace(re, "$100"));
|
||||
}
|
||||
|
||||
testIndices99(new RegExp(regexp99pattern));
|
||||
testIndices99(new RegExp(regexp99pattern, "g"));
|
||||
|
||||
var regexp59pattern = "";
|
||||
for (var i = 0; i < 59; i++) regexp59pattern += "(.)";
|
||||
|
||||
function testIndices59(re) {
|
||||
// Test $60 .. $99. Captures reach up to 59. Per spec, how to deal
|
||||
// with this is implementation-dependent. We interpret $60 as $6
|
||||
// followed by "0", $61 as $6, followed by "1" and so on.
|
||||
var tail = subject.substr(59);
|
||||
for (var i = 60; i < 100; i++) {
|
||||
assertEquals(String.fromCharCode(i / 10 + 23) + (i % 10) + tail,
|
||||
subject.replace(re, "$" + i));
|
||||
}
|
||||
}
|
||||
|
||||
testIndices59(new RegExp(regexp59pattern));
|
||||
testIndices59(new RegExp(regexp59pattern, "g"));
|
||||
|
Loading…
Reference in New Issue
Block a user