Run string replace regexp with function in C++ code loop.

Reuses the result array to save on allocation.
Matches Safari's behavior.

Review URL: http://codereview.chromium.org/1109010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4269 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-03-25 12:57:58 +00:00
parent 0737ec8dcb
commit 498b074bd0
4 changed files with 704 additions and 127 deletions

View File

@ -344,6 +344,7 @@ function RegExpToString() {
// on the captures array of the last successful match and the subject string // on the captures array of the last successful match and the subject string
// of the last successful match. // of the last successful match.
function RegExpGetLastMatch() { function RegExpGetLastMatch() {
if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; }
var regExpSubject = LAST_SUBJECT(lastMatchInfo); var regExpSubject = LAST_SUBJECT(lastMatchInfo);
return SubString(regExpSubject, return SubString(regExpSubject,
lastMatchInfo[CAPTURE0], lastMatchInfo[CAPTURE0],
@ -352,6 +353,11 @@ function RegExpGetLastMatch() {
function RegExpGetLastParen() { function RegExpGetLastParen() {
if (lastMatchInfoOverride) {
var override = lastMatchInfoOverride;
if (override.length <= 3) return '';
return override[override.length - 3];
}
var length = NUMBER_OF_CAPTURES(lastMatchInfo); var length = NUMBER_OF_CAPTURES(lastMatchInfo);
if (length <= 2) return ''; // There were no captures. if (length <= 2) return ''; // There were no captures.
// We match the SpiderMonkey behavior: return the substring defined by the // We match the SpiderMonkey behavior: return the substring defined by the
@ -368,17 +374,32 @@ function RegExpGetLastParen() {
function RegExpGetLeftContext() { function RegExpGetLeftContext() {
return SubString(LAST_SUBJECT(lastMatchInfo), var start_index;
0, var subject;
lastMatchInfo[CAPTURE0]); if (!lastMatchInfoOverride) {
start_index = lastMatchInfo[CAPTURE0];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
start_index = override[override.length - 2];
subject = override[override.length - 1];
}
return SubString(subject, 0, start_index);
} }
function RegExpGetRightContext() { function RegExpGetRightContext() {
var subject = LAST_SUBJECT(lastMatchInfo); var start_index;
return SubString(subject, var subject;
lastMatchInfo[CAPTURE1], if (!lastMatchInfoOverride) {
subject.length); start_index = lastMatchInfo[CAPTURE1];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
subject = override[override.length - 1];
start_index = override[override.length - 2] + subject.length;
}
return SubString(subject, start_index, subject.length);
} }
@ -387,6 +408,10 @@ function RegExpGetRightContext() {
// called with indices from 1 to 9. // called with indices from 1 to 9.
function RegExpMakeCaptureGetter(n) { function RegExpMakeCaptureGetter(n) {
return function() { return function() {
if (lastMatchInfoOverride) {
if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
return '';
}
var index = n * 2; var index = n * 2;
if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
var matchStart = lastMatchInfo[CAPTURE(index)]; var matchStart = lastMatchInfo[CAPTURE(index)];
@ -411,6 +436,12 @@ var lastMatchInfo = [
0, // REGEXP_FIRST_CAPTURE + 1 0, // REGEXP_FIRST_CAPTURE + 1
]; ];
// Override last match info with an array of actual substrings.
// Used internally by replace regexp with function.
// The array has the format of an "apply" argument for a replacement
// function.
var lastMatchInfoOverride = null;
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function SetupRegExp() { function SetupRegExp() {

View File

@ -1567,9 +1567,91 @@ static Object* Runtime_CharFromCode(Arguments args) {
return CharFromCode(args[0]); return CharFromCode(args[0]);
} }
class FixedArrayBuilder {
public:
explicit FixedArrayBuilder(int initial_capacity)
: array_(Factory::NewFixedArrayWithHoles(initial_capacity)),
length_(0) {
// Require a non-zero initial size. Ensures that doubling the size to
// extend the array will work.
ASSERT(initial_capacity > 0);
}
explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
: array_(backing_store),
length_(0) {
// Require a non-zero initial size. Ensures that doubling the size to
// extend the array will work.
ASSERT(backing_store->length() > 0);
}
bool HasCapacity(int elements) {
int length = array_->length();
int required_length = length_ + elements;
return (length >= required_length);
}
void EnsureCapacity(int elements) {
int length = array_->length();
int required_length = length_ + elements;
if (length < required_length) {
int new_length = length;
do {
new_length *= 2;
} while (new_length < required_length);
Handle<FixedArray> extended_array =
Factory::NewFixedArrayWithHoles(new_length);
array_->CopyTo(0, *extended_array, 0, length_);
array_ = extended_array;
}
}
void Add(Object* value) {
ASSERT(length_ < capacity());
array_->set(length_, value);
length_++;
}
void Add(Smi* value) {
ASSERT(length_ < capacity());
array_->set(length_, value);
length_++;
}
Handle<FixedArray> array() {
return array_;
}
int length() {
return length_;
}
int capacity() {
return array_->length();
}
Handle<JSArray> ToJSArray() {
Handle<JSArray> result_array = Factory::NewJSArrayWithElements(array_);
result_array->set_length(Smi::FromInt(length_));
return result_array;
}
Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
target_array->set_elements(*array_);
target_array->set_length(Smi::FromInt(length_));
return target_array;
}
private:
Handle<FixedArray> array_;
int length_;
};
// Forward declarations. // Forward declarations.
static const int kStringBuilderConcatHelperLengthBits = 11; const int kStringBuilderConcatHelperLengthBits = 11;
static const int kStringBuilderConcatHelperPositionBits = 19; const int kStringBuilderConcatHelperPositionBits = 19;
template <typename schar> template <typename schar>
static inline void StringBuilderConcatHelper(String*, static inline void StringBuilderConcatHelper(String*,
@ -1577,15 +1659,19 @@ static inline void StringBuilderConcatHelper(String*,
FixedArray*, FixedArray*,
int); int);
typedef BitField<int, 0, 11> StringBuilderSubstringLength; typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
typedef BitField<int, 11, 19> StringBuilderSubstringPosition; StringBuilderSubstringLength;
typedef BitField<int,
kStringBuilderConcatHelperLengthBits,
kStringBuilderConcatHelperPositionBits>
StringBuilderSubstringPosition;
class ReplacementStringBuilder { class ReplacementStringBuilder {
public: public:
ReplacementStringBuilder(Handle<String> subject, int estimated_part_count) ReplacementStringBuilder(Handle<String> subject, int estimated_part_count)
: subject_(subject), : array_builder_(estimated_part_count),
parts_(Factory::NewFixedArray(estimated_part_count)), subject_(subject),
part_count_(0),
character_count_(0), character_count_(0),
is_ascii_(subject->IsAsciiRepresentation()) { is_ascii_(subject->IsAsciiRepresentation()) {
// Require a non-zero initial size. Ensures that doubling the size to // Require a non-zero initial size. Ensures that doubling the size to
@ -1593,38 +1679,35 @@ class ReplacementStringBuilder {
ASSERT(estimated_part_count > 0); ASSERT(estimated_part_count > 0);
} }
void EnsureCapacity(int elements) { static inline void AddSubjectSlice(FixedArrayBuilder* builder,
int length = parts_->length(); int from,
int required_length = part_count_ + elements; int to) {
if (length < required_length) {
int new_length = length;
do {
new_length *= 2;
} while (new_length < required_length);
Handle<FixedArray> extended_array =
Factory::NewFixedArray(new_length);
parts_->CopyTo(0, *extended_array, 0, part_count_);
parts_ = extended_array;
}
}
void AddSubjectSlice(int from, int to) {
ASSERT(from >= 0); ASSERT(from >= 0);
int length = to - from; int length = to - from;
ASSERT(length > 0); ASSERT(length > 0);
// Can we encode the slice in 11 bits for length and 19 bits for
// start position - as used by StringBuilderConcatHelper?
if (StringBuilderSubstringLength::is_valid(length) && if (StringBuilderSubstringLength::is_valid(length) &&
StringBuilderSubstringPosition::is_valid(from)) { StringBuilderSubstringPosition::is_valid(from)) {
int encoded_slice = StringBuilderSubstringLength::encode(length) | int encoded_slice = StringBuilderSubstringLength::encode(length) |
StringBuilderSubstringPosition::encode(from); StringBuilderSubstringPosition::encode(from);
AddElement(Smi::FromInt(encoded_slice)); builder->Add(Smi::FromInt(encoded_slice));
} else { } else {
// Otherwise encode as two smis. // Otherwise encode as two smis.
AddElement(Smi::FromInt(-length)); builder->Add(Smi::FromInt(-length));
AddElement(Smi::FromInt(from)); builder->Add(Smi::FromInt(from));
} }
IncrementCharacterCount(length); }
void EnsureCapacity(int elements) {
array_builder_.EnsureCapacity(elements);
}
void AddSubjectSlice(int from, int to) {
AddSubjectSlice(&array_builder_, from, to);
// Can we encode the slice in 11 bits for length and 19 bits for
// start position - as used by StringBuilderConcatHelper?
IncrementCharacterCount(to - from);
} }
@ -1640,7 +1723,7 @@ class ReplacementStringBuilder {
Handle<String> ToString() { Handle<String> ToString() {
if (part_count_ == 0) { if (array_builder_.length() == 0) {
return Factory::empty_string(); return Factory::empty_string();
} }
@ -1652,8 +1735,8 @@ class ReplacementStringBuilder {
char* char_buffer = seq->GetChars(); char* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_, StringBuilderConcatHelper(*subject_,
char_buffer, char_buffer,
*parts_, *array_builder_.array(),
part_count_); array_builder_.length());
} else { } else {
// Non-ASCII. // Non-ASCII.
joined_string = NewRawTwoByteString(character_count_); joined_string = NewRawTwoByteString(character_count_);
@ -1662,8 +1745,8 @@ class ReplacementStringBuilder {
uc16* char_buffer = seq->GetChars(); uc16* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_, StringBuilderConcatHelper(*subject_,
char_buffer, char_buffer,
*parts_, *array_builder_.array(),
part_count_); array_builder_.length());
} }
return joined_string; return joined_string;
} }
@ -1676,8 +1759,14 @@ class ReplacementStringBuilder {
character_count_ += by; character_count_ += by;
} }
private: Handle<JSArray> GetParts() {
Handle<JSArray> result =
Factory::NewJSArrayWithElements(array_builder_.array());
result->set_length(Smi::FromInt(array_builder_.length()));
return result;
}
private:
Handle<String> NewRawAsciiString(int size) { Handle<String> NewRawAsciiString(int size) {
CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String); CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String);
} }
@ -1690,14 +1779,12 @@ class ReplacementStringBuilder {
void AddElement(Object* element) { void AddElement(Object* element) {
ASSERT(element->IsSmi() || element->IsString()); ASSERT(element->IsSmi() || element->IsString());
ASSERT(parts_->length() > part_count_); ASSERT(array_builder_.capacity() > array_builder_.length());
parts_->set(part_count_, element); array_builder_.Add(element);
part_count_++;
} }
FixedArrayBuilder array_builder_;
Handle<String> subject_; Handle<String> subject_;
Handle<FixedArray> parts_;
int part_count_;
int character_count_; int character_count_;
bool is_ascii_; bool is_ascii_;
}; };
@ -2105,7 +2192,6 @@ static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
} }
// Cap on the maximal shift in the Boyer-Moore implementation. By setting a // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
// limit, we can fix the size of tables. // limit, we can fix the size of tables.
static const int kBMMaxShift = 0xff; static const int kBMMaxShift = 0xff;
@ -2869,6 +2955,468 @@ static Object* Runtime_StringMatch(Arguments args) {
} }
// Two smis before and after the match, for very long strings.
const int kMaxBuilderEntriesPerRegExpMatch = 5;
static void SetLastMatchInfoNoCaptures(Handle<String> subject,
Handle<JSArray> last_match_info,
int match_start,
int match_end) {
// Fill last_match_info with a single capture.
last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
AssertNoAllocation no_gc;
FixedArray* elements = FixedArray::cast(last_match_info->elements());
RegExpImpl::SetLastCaptureCount(elements, 2);
RegExpImpl::SetLastInput(elements, *subject);
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetCapture(elements, 0, match_start);
RegExpImpl::SetCapture(elements, 1, match_end);
}
template <typename schar>
static bool SearchCharMultiple(Vector<schar> subject,
String* pattern,
schar pattern_char,
FixedArrayBuilder* builder,
int* match_pos) {
// Position of last match.
int pos = *match_pos;
int subject_length = subject.length();
while (pos < subject_length) {
int match_end = pos + 1;
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
int new_pos = SingleCharIndexOf(subject, pattern_char, match_end);
if (new_pos >= 0) {
// Match has been found.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
}
if (pos + 1 < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length);
}
*match_pos = pos;
return true;
}
static bool SearchCharMultiple(Handle<String> subject,
Handle<String> pattern,
Handle<JSArray> last_match_info,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
ASSERT_EQ(1, pattern->length());
uc16 pattern_char = pattern->Get(0);
// Treating position before first as initial "previous match position".
int match_pos = -1;
for (;;) { // Break when search complete.
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
AssertNoAllocation no_gc;
if (subject->IsAsciiRepresentation()) {
if (pattern_char > String::kMaxAsciiCharCode) {
break;
}
Vector<const char> subject_vector = subject->ToAsciiVector();
char pattern_ascii_char = static_cast<char>(pattern_char);
bool complete = SearchCharMultiple<const char>(subject_vector,
*pattern,
pattern_ascii_char,
builder,
&match_pos);
if (complete) break;
} else {
Vector<const uc16> subject_vector = subject->ToUC16Vector();
bool complete = SearchCharMultiple<const uc16>(subject_vector,
*pattern,
pattern_char,
builder,
&match_pos);
if (complete) break;
}
}
if (match_pos >= 0) {
SetLastMatchInfoNoCaptures(subject,
last_match_info,
match_pos,
match_pos + 1);
return true;
}
return false; // No matches at all.
}
template <typename schar, typename pchar>
static bool SearchStringMultiple(Vector<schar> subject,
String* pattern,
Vector<pchar> pattern_string,
FixedArrayBuilder* builder,
int* match_pos) {
int pos = *match_pos;
int subject_length = subject.length();
int pattern_length = pattern_string.length();
int max_search_start = subject_length - pattern_length;
bool is_ascii = (sizeof(schar) == 1);
StringSearchStrategy strategy =
InitializeStringSearch(pattern_string, is_ascii);
switch (strategy) {
case SEARCH_FAIL: return false;
case SEARCH_SHORT:
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
// Position of end of previous match.
int match_end = pos + pattern_length;
int new_pos = SimpleIndexOf(subject, pattern_string, match_end);
if (new_pos >= 0) {
// A match.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
}
break;
case SEARCH_LONG:
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
int new_pos = ComplexIndexOf(subject,
pattern_string,
pos + pattern_length);
if (new_pos >= 0) {
// A match has been found.
if (new_pos > pos) {
ReplacementStringBuilder::AddSubjectSlice(builder, pos, new_pos);
}
pos = new_pos;
builder->Add(pattern);
} else {
break;
}
}
break;
}
if (pos < max_search_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
pos + pattern_length,
subject_length);
}
*match_pos = pos;
return true;
}
static bool SearchStringMultiple(Handle<String> subject,
Handle<String> pattern,
Handle<JSArray> last_match_info,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
ASSERT(pattern->IsFlat());
ASSERT(pattern->length() > 1);
// Treating as if a previous match was before first character.
int match_pos = -pattern->length();
for (;;) { // Break when search complete.
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
AssertNoAllocation no_gc;
if (subject->IsAsciiRepresentation()) {
Vector<const char> subject_vector = subject->ToAsciiVector();
if (pattern->IsAsciiRepresentation()) {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToAsciiVector(),
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToUC16Vector(),
builder,
&match_pos)) break;
}
} else {
Vector<const uc16> subject_vector = subject->ToUC16Vector();
if (pattern->IsAsciiRepresentation()) {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToAsciiVector(),
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(subject_vector,
*pattern,
pattern->ToUC16Vector(),
builder,
&match_pos)) break;
}
}
}
if (match_pos >= 0) {
SetLastMatchInfoNoCaptures(subject,
last_match_info,
match_pos,
match_pos + pattern->length());
return true;
}
return false; // No matches at all.
}
static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
int match_start = -1;
int match_end = 0;
int pos = 0;
int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
OffsetsVector registers(required_registers);
Vector<int> register_vector(registers.vector(), registers.length());
int subject_length = subject->length();
for (;;) { // Break on failure, return on exception.
RegExpImpl::IrregexpResult result =
RegExpImpl::IrregexpExecOnce(regexp,
subject,
pos,
register_vector);
if (result == RegExpImpl::RE_SUCCESS) {
match_start = register_vector[0];
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
if (match_end < match_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
match_start);
}
match_end = register_vector[1];
HandleScope loop_scope;
builder->Add(*Factory::NewSubString(subject, match_start, match_end));
if (match_start != match_end) {
pos = match_end;
} else {
pos = match_end + 1;
if (pos > subject_length) break;
}
} else if (result == RegExpImpl::RE_FAILURE) {
break;
} else {
ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
return result;
}
}
if (match_start >= 0) {
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
subject_length);
}
SetLastMatchInfoNoCaptures(subject,
last_match_array,
match_start,
match_end);
return RegExpImpl::RE_SUCCESS;
} else {
return RegExpImpl::RE_FAILURE; // No matches at all.
}
}
static RegExpImpl::IrregexpResult SearchRegExpMultiple(
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
OffsetsVector registers(required_registers);
Vector<int> register_vector(registers.vector(), registers.length());
RegExpImpl::IrregexpResult result =
RegExpImpl::IrregexpExecOnce(regexp,
subject,
0,
register_vector);
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
// Position to search from.
int pos = 0;
// End of previous match. Differs from pos if match was empty.
int match_end = 0;
if (result == RegExpImpl::RE_SUCCESS) {
// Need to keep a copy of the previous match for creating last_match_info
// at the end, so we have two vectors that we swap between.
OffsetsVector registers2(required_registers);
Vector<int> prev_register_vector(registers2.vector(), registers2.length());
do {
int match_start = register_vector[0];
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
if (match_end < match_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
match_start);
}
match_end = register_vector[1];
{
// Avoid accumulating new handles inside loop.
HandleScope temp_scope;
// Arguments array to replace function is match, captures, index and
// subject, i.e., 3 + capture count in total.
Handle<FixedArray> elements = Factory::NewFixedArray(3 + capture_count);
elements->set(0, *Factory::NewSubString(subject,
match_start,
match_end));
for (int i = 1; i <= capture_count; i++) {
Handle<String> substring =
Factory::NewSubString(subject,
register_vector[i * 2],
register_vector[i * 2 + 1]);
elements->set(i, *substring);
}
elements->set(capture_count + 1, Smi::FromInt(match_start));
elements->set(capture_count + 2, *subject);
builder->Add(*Factory::NewJSArrayWithElements(elements));
}
// Swap register vectors, so the last successful match is in
// prev_register_vector.
Vector<int> tmp = prev_register_vector;
prev_register_vector = register_vector;
register_vector = tmp;
if (match_end > match_start) {
pos = match_end;
} else {
pos = match_end + 1;
if (pos > subject_length) {
break;
}
}
result = RegExpImpl::IrregexpExecOnce(regexp,
subject,
pos,
register_vector);
} while (result == RegExpImpl::RE_SUCCESS);
if (result != RegExpImpl::RE_EXCEPTION) {
// Finished matching, with at least one match.
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
subject_length);
}
int last_match_capture_count = (capture_count + 1) * 2;
int last_match_array_size =
last_match_capture_count + RegExpImpl::kLastMatchOverhead;
last_match_array->EnsureSize(last_match_array_size);
AssertNoAllocation no_gc;
FixedArray* elements = FixedArray::cast(last_match_array->elements());
RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetLastInput(elements, *subject);
for (int i = 0; i < last_match_capture_count; i++) {
RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
}
return RegExpImpl::RE_SUCCESS;
}
}
// No matches at all, return failure or exception result directly.
return result;
}
static Object* Runtime_RegExpExecMultiple(Arguments args) {
ASSERT(args.length() == 4);
HandleScope handles;
CONVERT_ARG_CHECKED(String, subject, 1);
if (!subject->IsFlat()) { FlattenString(subject); }
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_CHECKED(JSArray, last_match_info, 2);
CONVERT_ARG_CHECKED(JSArray, result_array, 3);
ASSERT(last_match_info->HasFastElements());
ASSERT(regexp->GetFlags().is_global());
Handle<FixedArray> result_elements;
if (result_array->HasFastElements()) {
result_elements =
Handle<FixedArray>(FixedArray::cast(result_array->elements()));
} else {
result_elements = Factory::NewFixedArrayWithHoles(16);
}
FixedArrayBuilder builder(result_elements);
if (regexp->TypeTag() == JSRegExp::ATOM) {
Handle<String> pattern(
String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)));
int pattern_length = pattern->length();
if (pattern_length == 1) {
if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) {
return *builder.ToJSArray(result_array);
}
return Heap::null_value();
}
if (!pattern->IsFlat()) FlattenString(pattern);
if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) {
return *builder.ToJSArray(result_array);
}
return Heap::null_value();
}
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
RegExpImpl::IrregexpResult result;
if (regexp->CaptureCount() == 0) {
result = SearchRegExpNoCaptureMultiple(subject,
regexp,
last_match_info,
&builder);
} else {
result = SearchRegExpMultiple(subject, regexp, last_match_info, &builder);
}
if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
if (result == RegExpImpl::RE_FAILURE) return Heap::null_value();
ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
return Failure::Exception();
}
static Object* Runtime_NumberToRadixString(Arguments args) { static Object* Runtime_NumberToRadixString(Arguments args) {
NoHandleAllocation ha; NoHandleAllocation ha;
ASSERT(args.length() == 2); ASSERT(args.length() == 2);

View File

@ -153,6 +153,7 @@ namespace internal {
/* Regular expressions */ \ /* Regular expressions */ \
F(RegExpCompile, 3, 1) \ F(RegExpCompile, 3, 1) \
F(RegExpExec, 4, 1) \ F(RegExpExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
\ \
/* Strings */ \ /* Strings */ \
F(StringCharCodeAt, 2, 1) \ F(StringCharCodeAt, 2, 1) \

View File

@ -405,97 +405,91 @@ function addCaptureString(builder, matchInfo, index) {
builder.addSpecialSlice(start, end); builder.addSpecialSlice(start, end);
}; };
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
// in the finally block.
var reusableReplaceArray = $Array(16);
// Helper function for replacing regular expressions with the result of a // Helper function for replacing regular expressions with the result of a
// function application in String.prototype.replace. The function application // function application in String.prototype.replace.
// must be interleaved with the regexp matching (contrary to ECMA-262
// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses
// the static properties of the RegExp constructor. Example:
// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
// should be 'abcd' and not 'dddd' (or anything else).
function StringReplaceRegExpWithFunction(subject, regexp, replace) { function StringReplaceRegExpWithFunction(subject, regexp, replace) {
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
// There's at least one match. If the regexp is global, we have to loop
// over all matches. The loop is not in C++ code here like the one in
// RegExp.prototype.exec, because of the interleaved function application.
// Unfortunately, that means this code is nearly duplicated, here and in
// jsregexp.cc.
if (regexp.global) { if (regexp.global) {
var previous = 0; var resultArray = reusableReplaceArray;
var startOfMatch; if (resultArray) {
if (NUMBER_OF_CAPTURES(matchInfo) == 2) { reusableReplaceArray = null;
// Both branches contain essentially the same loop except for the call
// to the replace function. The branch is put outside of the loop for
// speed
do {
startOfMatch = matchInfo[CAPTURE0];
result.addSpecialSlice(previous, startOfMatch);
previous = matchInfo[CAPTURE1];
var match = SubString(subject, startOfMatch, previous);
// Don't call directly to avoid exposing the built-in global object.
result.add(replace.call(null, match, startOfMatch, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
// Continue with the next match.
// Increment previous if we matched an empty string, as per ECMA-262
// 15.5.4.10.
if (previous == startOfMatch) {
// Add the skipped character to the output, if any.
if (previous < subject.length) {
result.addSpecialSlice(previous, previous + 1);
}
previous++;
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match
if (previous > subject.length) {
return result.generate();
}
}
matchInfo = DoRegExpExec(regexp, subject, previous);
} while (!IS_NULL(matchInfo));
} else { } else {
do { // Inside a nested replace (replace called from the replacement function
startOfMatch = matchInfo[CAPTURE0]; // of another replace) or we have failed to set the reusable array
result.addSpecialSlice(previous, startOfMatch); // back due to an exception in a replacement function. Create a new
previous = matchInfo[CAPTURE1]; // array to use in the future, or until the original is written back.
result.add(ApplyReplacementFunction(replace, matchInfo, subject)); resultArray = $Array(16);
// Can't use matchInfo any more from here, since the function could }
// overwrite it. try {
// Continue with the next match. // Must handle exceptions thrown by the replace functions correctly,
// Increment previous if we matched an empty string, as per ECMA-262 // including unregistering global regexps.
// 15.5.4.10. var res = %RegExpExecMultiple(regexp,
if (previous == startOfMatch) { subject,
// Add the skipped character to the output, if any. lastMatchInfo,
if (previous < subject.length) { resultArray);
result.addSpecialSlice(previous, previous + 1); regexp.lastIndex = 0;
} if (IS_NULL(res)) {
previous++; // No matches at all.
// Per ECMA-262 15.10.6.2, if the previous index is greater than the return subject;
// string length, there is no match }
if (previous > subject.length) { var len = res.length;
return result.generate(); var i = 0;
} if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
} var match_start = 0;
matchInfo = DoRegExpExec(regexp, subject, previous); while (i < len) {
} while (!IS_NULL(matchInfo)); var elem = res[i];
if (%_IsSmi(elem)) {
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
match_start = res[++i] - elem;
}
} else {
var func_result = replace.call(null, elem, match_start, subject);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
match_start += elem.length;
}
i++;
}
} else {
while (i < len) {
var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
lastMatchInfoOverride = elem;
var func_result = replace.apply(null, elem);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
}
i++;
}
}
var result = new ReplaceResultBuilder(subject, res);
return result.generate();
} finally {
lastMatchInfoOverride = null;
resultArray.length = 0;
reusableReplaceArray = resultArray;
} }
// Tack on the final right substring after the last match.
result.addSpecialSlice(previous, subject.length);
} else { // Not a global regexp, no need to loop. } else { // Not a global regexp, no need to loop.
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
result.addSpecialSlice(0, matchInfo[CAPTURE0]); result.addSpecialSlice(0, matchInfo[CAPTURE0]);
var endOfMatch = matchInfo[CAPTURE1]; var endOfMatch = matchInfo[CAPTURE1];
result.add(ApplyReplacementFunction(replace, matchInfo, subject)); result.add(ApplyReplacementFunction(replace, matchInfo, subject));
// Can't use matchInfo any more from here, since the function could // Can't use matchInfo any more from here, since the function could
// overwrite it. // overwrite it.
result.addSpecialSlice(endOfMatch, subject.length); result.addSpecialSlice(endOfMatch, subject.length);
return result.generate();
} }
return result.generate();
} }
@ -894,8 +888,11 @@ function StringSup() {
// ReplaceResultBuilder support. // ReplaceResultBuilder support.
function ReplaceResultBuilder(str) { function ReplaceResultBuilder(str) {
this.__proto__ = void 0; if (%_ArgumentsLength() > 1) {
this.elements = new $Array(); this.elements = %_Arguments(1);
} else {
this.elements = new $Array();
}
this.special_string = str; this.special_string = str;
} }