diff --git a/src/regexp.js b/src/regexp.js index e2492f7245..dc1b0429f7 100644 --- a/src/regexp.js +++ b/src/regexp.js @@ -344,6 +344,7 @@ function RegExpToString() { // on the captures array of the last successful match and the subject string // of the last successful match. function RegExpGetLastMatch() { + if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; } var regExpSubject = LAST_SUBJECT(lastMatchInfo); return SubString(regExpSubject, lastMatchInfo[CAPTURE0], @@ -352,6 +353,11 @@ function RegExpGetLastMatch() { function RegExpGetLastParen() { + if (lastMatchInfoOverride) { + var override = lastMatchInfoOverride; + if (override.length <= 3) return ''; + return override[override.length - 3]; + } var length = NUMBER_OF_CAPTURES(lastMatchInfo); if (length <= 2) return ''; // There were no captures. // We match the SpiderMonkey behavior: return the substring defined by the @@ -368,17 +374,32 @@ function RegExpGetLastParen() { function RegExpGetLeftContext() { - return SubString(LAST_SUBJECT(lastMatchInfo), - 0, - lastMatchInfo[CAPTURE0]); + var start_index; + var subject; + if (!lastMatchInfoOverride) { + start_index = lastMatchInfo[CAPTURE0]; + subject = LAST_SUBJECT(lastMatchInfo); + } else { + var override = lastMatchInfoOverride; + start_index = override[override.length - 2]; + subject = override[override.length - 1]; + } + return SubString(subject, 0, start_index); } function RegExpGetRightContext() { - var subject = LAST_SUBJECT(lastMatchInfo); - return SubString(subject, - lastMatchInfo[CAPTURE1], - subject.length); + var start_index; + var subject; + if (!lastMatchInfoOverride) { + start_index = lastMatchInfo[CAPTURE1]; + subject = LAST_SUBJECT(lastMatchInfo); + } else { + var override = lastMatchInfoOverride; + subject = override[override.length - 1]; + start_index = override[override.length - 2] + subject.length; + } + return SubString(subject, start_index, subject.length); } @@ -387,6 +408,10 @@ function RegExpGetRightContext() { // called with indices from 1 to 9. function RegExpMakeCaptureGetter(n) { return function() { + if (lastMatchInfoOverride) { + if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n]; + return ''; + } var index = n * 2; if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; var matchStart = lastMatchInfo[CAPTURE(index)]; @@ -411,6 +436,12 @@ var lastMatchInfo = [ 0, // REGEXP_FIRST_CAPTURE + 1 ]; +// Override last match info with an array of actual substrings. +// Used internally by replace regexp with function. +// The array has the format of an "apply" argument for a replacement +// function. +var lastMatchInfoOverride = null; + // ------------------------------------------------------------------- function SetupRegExp() { diff --git a/src/runtime.cc b/src/runtime.cc index b349815219..c77d518371 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -1567,9 +1567,91 @@ static Object* Runtime_CharFromCode(Arguments args) { return CharFromCode(args[0]); } + +class FixedArrayBuilder { + public: + explicit FixedArrayBuilder(int initial_capacity) + : array_(Factory::NewFixedArrayWithHoles(initial_capacity)), + length_(0) { + // Require a non-zero initial size. Ensures that doubling the size to + // extend the array will work. + ASSERT(initial_capacity > 0); + } + + explicit FixedArrayBuilder(Handle backing_store) + : array_(backing_store), + length_(0) { + // Require a non-zero initial size. Ensures that doubling the size to + // extend the array will work. + ASSERT(backing_store->length() > 0); + } + + bool HasCapacity(int elements) { + int length = array_->length(); + int required_length = length_ + elements; + return (length >= required_length); + } + + void EnsureCapacity(int elements) { + int length = array_->length(); + int required_length = length_ + elements; + if (length < required_length) { + int new_length = length; + do { + new_length *= 2; + } while (new_length < required_length); + Handle extended_array = + Factory::NewFixedArrayWithHoles(new_length); + array_->CopyTo(0, *extended_array, 0, length_); + array_ = extended_array; + } + } + + void Add(Object* value) { + ASSERT(length_ < capacity()); + array_->set(length_, value); + length_++; + } + + void Add(Smi* value) { + ASSERT(length_ < capacity()); + array_->set(length_, value); + length_++; + } + + Handle array() { + return array_; + } + + int length() { + return length_; + } + + int capacity() { + return array_->length(); + } + + Handle ToJSArray() { + Handle result_array = Factory::NewJSArrayWithElements(array_); + result_array->set_length(Smi::FromInt(length_)); + return result_array; + } + + Handle ToJSArray(Handle target_array) { + target_array->set_elements(*array_); + target_array->set_length(Smi::FromInt(length_)); + return target_array; + } + + private: + Handle array_; + int length_; +}; + + // Forward declarations. -static const int kStringBuilderConcatHelperLengthBits = 11; -static const int kStringBuilderConcatHelperPositionBits = 19; +const int kStringBuilderConcatHelperLengthBits = 11; +const int kStringBuilderConcatHelperPositionBits = 19; template static inline void StringBuilderConcatHelper(String*, @@ -1577,15 +1659,19 @@ static inline void StringBuilderConcatHelper(String*, FixedArray*, int); -typedef BitField StringBuilderSubstringLength; -typedef BitField StringBuilderSubstringPosition; +typedef BitField + StringBuilderSubstringLength; +typedef BitField + StringBuilderSubstringPosition; + class ReplacementStringBuilder { public: ReplacementStringBuilder(Handle subject, int estimated_part_count) - : subject_(subject), - parts_(Factory::NewFixedArray(estimated_part_count)), - part_count_(0), + : array_builder_(estimated_part_count), + subject_(subject), character_count_(0), is_ascii_(subject->IsAsciiRepresentation()) { // Require a non-zero initial size. Ensures that doubling the size to @@ -1593,38 +1679,35 @@ class ReplacementStringBuilder { ASSERT(estimated_part_count > 0); } - void EnsureCapacity(int elements) { - int length = parts_->length(); - int required_length = part_count_ + elements; - if (length < required_length) { - int new_length = length; - do { - new_length *= 2; - } while (new_length < required_length); - Handle extended_array = - Factory::NewFixedArray(new_length); - parts_->CopyTo(0, *extended_array, 0, part_count_); - parts_ = extended_array; - } - } - - void AddSubjectSlice(int from, int to) { + static inline void AddSubjectSlice(FixedArrayBuilder* builder, + int from, + int to) { ASSERT(from >= 0); int length = to - from; ASSERT(length > 0); - // Can we encode the slice in 11 bits for length and 19 bits for - // start position - as used by StringBuilderConcatHelper? if (StringBuilderSubstringLength::is_valid(length) && StringBuilderSubstringPosition::is_valid(from)) { int encoded_slice = StringBuilderSubstringLength::encode(length) | StringBuilderSubstringPosition::encode(from); - AddElement(Smi::FromInt(encoded_slice)); + builder->Add(Smi::FromInt(encoded_slice)); } else { // Otherwise encode as two smis. - AddElement(Smi::FromInt(-length)); - AddElement(Smi::FromInt(from)); + builder->Add(Smi::FromInt(-length)); + builder->Add(Smi::FromInt(from)); } - IncrementCharacterCount(length); + } + + + void EnsureCapacity(int elements) { + array_builder_.EnsureCapacity(elements); + } + + + void AddSubjectSlice(int from, int to) { + AddSubjectSlice(&array_builder_, from, to); + // Can we encode the slice in 11 bits for length and 19 bits for + // start position - as used by StringBuilderConcatHelper? + IncrementCharacterCount(to - from); } @@ -1640,7 +1723,7 @@ class ReplacementStringBuilder { Handle ToString() { - if (part_count_ == 0) { + if (array_builder_.length() == 0) { return Factory::empty_string(); } @@ -1652,8 +1735,8 @@ class ReplacementStringBuilder { char* char_buffer = seq->GetChars(); StringBuilderConcatHelper(*subject_, char_buffer, - *parts_, - part_count_); + *array_builder_.array(), + array_builder_.length()); } else { // Non-ASCII. joined_string = NewRawTwoByteString(character_count_); @@ -1662,8 +1745,8 @@ class ReplacementStringBuilder { uc16* char_buffer = seq->GetChars(); StringBuilderConcatHelper(*subject_, char_buffer, - *parts_, - part_count_); + *array_builder_.array(), + array_builder_.length()); } return joined_string; } @@ -1676,8 +1759,14 @@ class ReplacementStringBuilder { character_count_ += by; } - private: + Handle GetParts() { + Handle result = + Factory::NewJSArrayWithElements(array_builder_.array()); + result->set_length(Smi::FromInt(array_builder_.length())); + return result; + } + private: Handle NewRawAsciiString(int size) { CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String); } @@ -1690,14 +1779,12 @@ class ReplacementStringBuilder { void AddElement(Object* element) { ASSERT(element->IsSmi() || element->IsString()); - ASSERT(parts_->length() > part_count_); - parts_->set(part_count_, element); - part_count_++; + ASSERT(array_builder_.capacity() > array_builder_.length()); + array_builder_.Add(element); } + FixedArrayBuilder array_builder_; Handle subject_; - Handle parts_; - int part_count_; int character_count_; bool is_ascii_; }; @@ -2105,7 +2192,6 @@ static Object* Runtime_StringReplaceRegExpWithString(Arguments args) { } - // Cap on the maximal shift in the Boyer-Moore implementation. By setting a // limit, we can fix the size of tables. static const int kBMMaxShift = 0xff; @@ -2869,6 +2955,468 @@ static Object* Runtime_StringMatch(Arguments args) { } +// Two smis before and after the match, for very long strings. +const int kMaxBuilderEntriesPerRegExpMatch = 5; + + +static void SetLastMatchInfoNoCaptures(Handle subject, + Handle last_match_info, + int match_start, + int match_end) { + // Fill last_match_info with a single capture. + last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead); + AssertNoAllocation no_gc; + FixedArray* elements = FixedArray::cast(last_match_info->elements()); + RegExpImpl::SetLastCaptureCount(elements, 2); + RegExpImpl::SetLastInput(elements, *subject); + RegExpImpl::SetLastSubject(elements, *subject); + RegExpImpl::SetCapture(elements, 0, match_start); + RegExpImpl::SetCapture(elements, 1, match_end); +} + + +template +static bool SearchCharMultiple(Vector subject, + String* pattern, + schar pattern_char, + FixedArrayBuilder* builder, + int* match_pos) { + // Position of last match. + int pos = *match_pos; + int subject_length = subject.length(); + while (pos < subject_length) { + int match_end = pos + 1; + if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { + *match_pos = pos; + return false; + } + int new_pos = SingleCharIndexOf(subject, pattern_char, match_end); + if (new_pos >= 0) { + // Match has been found. + if (new_pos > match_end) { + ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos); + } + pos = new_pos; + builder->Add(pattern); + } else { + break; + } + } + if (pos + 1 < subject_length) { + ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length); + } + *match_pos = pos; + return true; +} + + +static bool SearchCharMultiple(Handle subject, + Handle pattern, + Handle last_match_info, + FixedArrayBuilder* builder) { + ASSERT(subject->IsFlat()); + ASSERT_EQ(1, pattern->length()); + uc16 pattern_char = pattern->Get(0); + // Treating position before first as initial "previous match position". + int match_pos = -1; + + for (;;) { // Break when search complete. + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + AssertNoAllocation no_gc; + if (subject->IsAsciiRepresentation()) { + if (pattern_char > String::kMaxAsciiCharCode) { + break; + } + Vector subject_vector = subject->ToAsciiVector(); + char pattern_ascii_char = static_cast(pattern_char); + bool complete = SearchCharMultiple(subject_vector, + *pattern, + pattern_ascii_char, + builder, + &match_pos); + if (complete) break; + } else { + Vector subject_vector = subject->ToUC16Vector(); + bool complete = SearchCharMultiple(subject_vector, + *pattern, + pattern_char, + builder, + &match_pos); + if (complete) break; + } + } + + if (match_pos >= 0) { + SetLastMatchInfoNoCaptures(subject, + last_match_info, + match_pos, + match_pos + 1); + return true; + } + return false; // No matches at all. +} + + +template +static bool SearchStringMultiple(Vector subject, + String* pattern, + Vector pattern_string, + FixedArrayBuilder* builder, + int* match_pos) { + int pos = *match_pos; + int subject_length = subject.length(); + int pattern_length = pattern_string.length(); + int max_search_start = subject_length - pattern_length; + bool is_ascii = (sizeof(schar) == 1); + StringSearchStrategy strategy = + InitializeStringSearch(pattern_string, is_ascii); + switch (strategy) { + case SEARCH_FAIL: return false; + case SEARCH_SHORT: + while (pos <= max_search_start) { + if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { + *match_pos = pos; + return false; + } + // Position of end of previous match. + int match_end = pos + pattern_length; + int new_pos = SimpleIndexOf(subject, pattern_string, match_end); + if (new_pos >= 0) { + // A match. + if (new_pos > match_end) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + new_pos); + } + pos = new_pos; + builder->Add(pattern); + } else { + break; + } + } + break; + case SEARCH_LONG: + while (pos <= max_search_start) { + if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { + *match_pos = pos; + return false; + } + int new_pos = ComplexIndexOf(subject, + pattern_string, + pos + pattern_length); + if (new_pos >= 0) { + // A match has been found. + if (new_pos > pos) { + ReplacementStringBuilder::AddSubjectSlice(builder, pos, new_pos); + } + pos = new_pos; + builder->Add(pattern); + } else { + break; + } + } + break; + } + if (pos < max_search_start) { + ReplacementStringBuilder::AddSubjectSlice(builder, + pos + pattern_length, + subject_length); + } + *match_pos = pos; + return true; +} + + +static bool SearchStringMultiple(Handle subject, + Handle pattern, + Handle last_match_info, + FixedArrayBuilder* builder) { + ASSERT(subject->IsFlat()); + ASSERT(pattern->IsFlat()); + ASSERT(pattern->length() > 1); + + // Treating as if a previous match was before first character. + int match_pos = -pattern->length(); + + for (;;) { // Break when search complete. + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + AssertNoAllocation no_gc; + if (subject->IsAsciiRepresentation()) { + Vector subject_vector = subject->ToAsciiVector(); + if (pattern->IsAsciiRepresentation()) { + if (SearchStringMultiple(subject_vector, + *pattern, + pattern->ToAsciiVector(), + builder, + &match_pos)) break; + } else { + if (SearchStringMultiple(subject_vector, + *pattern, + pattern->ToUC16Vector(), + builder, + &match_pos)) break; + } + } else { + Vector subject_vector = subject->ToUC16Vector(); + if (pattern->IsAsciiRepresentation()) { + if (SearchStringMultiple(subject_vector, + *pattern, + pattern->ToAsciiVector(), + builder, + &match_pos)) break; + } else { + if (SearchStringMultiple(subject_vector, + *pattern, + pattern->ToUC16Vector(), + builder, + &match_pos)) break; + } + } + } + + if (match_pos >= 0) { + SetLastMatchInfoNoCaptures(subject, + last_match_info, + match_pos, + match_pos + pattern->length()); + return true; + } + return false; // No matches at all. +} + + +static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple( + Handle subject, + Handle regexp, + Handle last_match_array, + FixedArrayBuilder* builder) { + ASSERT(subject->IsFlat()); + int match_start = -1; + int match_end = 0; + int pos = 0; + int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); + if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; + + OffsetsVector registers(required_registers); + Vector register_vector(registers.vector(), registers.length()); + int subject_length = subject->length(); + + for (;;) { // Break on failure, return on exception. + RegExpImpl::IrregexpResult result = + RegExpImpl::IrregexpExecOnce(regexp, + subject, + pos, + register_vector); + if (result == RegExpImpl::RE_SUCCESS) { + match_start = register_vector[0]; + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + if (match_end < match_start) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + match_start); + } + match_end = register_vector[1]; + HandleScope loop_scope; + builder->Add(*Factory::NewSubString(subject, match_start, match_end)); + if (match_start != match_end) { + pos = match_end; + } else { + pos = match_end + 1; + if (pos > subject_length) break; + } + } else if (result == RegExpImpl::RE_FAILURE) { + break; + } else { + ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); + return result; + } + } + + if (match_start >= 0) { + if (match_end < subject_length) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + subject_length); + } + SetLastMatchInfoNoCaptures(subject, + last_match_array, + match_start, + match_end); + return RegExpImpl::RE_SUCCESS; + } else { + return RegExpImpl::RE_FAILURE; // No matches at all. + } +} + + +static RegExpImpl::IrregexpResult SearchRegExpMultiple( + Handle subject, + Handle regexp, + Handle last_match_array, + FixedArrayBuilder* builder) { + + ASSERT(subject->IsFlat()); + int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); + if (required_registers < 0) return RegExpImpl::RE_EXCEPTION; + + OffsetsVector registers(required_registers); + Vector register_vector(registers.vector(), registers.length()); + + RegExpImpl::IrregexpResult result = + RegExpImpl::IrregexpExecOnce(regexp, + subject, + 0, + register_vector); + + int capture_count = regexp->CaptureCount(); + int subject_length = subject->length(); + + // Position to search from. + int pos = 0; + // End of previous match. Differs from pos if match was empty. + int match_end = 0; + if (result == RegExpImpl::RE_SUCCESS) { + // Need to keep a copy of the previous match for creating last_match_info + // at the end, so we have two vectors that we swap between. + OffsetsVector registers2(required_registers); + Vector prev_register_vector(registers2.vector(), registers2.length()); + + do { + int match_start = register_vector[0]; + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + if (match_end < match_start) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + match_start); + } + match_end = register_vector[1]; + + { + // Avoid accumulating new handles inside loop. + HandleScope temp_scope; + // Arguments array to replace function is match, captures, index and + // subject, i.e., 3 + capture count in total. + Handle elements = Factory::NewFixedArray(3 + capture_count); + elements->set(0, *Factory::NewSubString(subject, + match_start, + match_end)); + for (int i = 1; i <= capture_count; i++) { + Handle substring = + Factory::NewSubString(subject, + register_vector[i * 2], + register_vector[i * 2 + 1]); + elements->set(i, *substring); + } + elements->set(capture_count + 1, Smi::FromInt(match_start)); + elements->set(capture_count + 2, *subject); + builder->Add(*Factory::NewJSArrayWithElements(elements)); + } + // Swap register vectors, so the last successful match is in + // prev_register_vector. + Vector tmp = prev_register_vector; + prev_register_vector = register_vector; + register_vector = tmp; + + if (match_end > match_start) { + pos = match_end; + } else { + pos = match_end + 1; + if (pos > subject_length) { + break; + } + } + + result = RegExpImpl::IrregexpExecOnce(regexp, + subject, + pos, + register_vector); + } while (result == RegExpImpl::RE_SUCCESS); + + if (result != RegExpImpl::RE_EXCEPTION) { + // Finished matching, with at least one match. + if (match_end < subject_length) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + subject_length); + } + + int last_match_capture_count = (capture_count + 1) * 2; + int last_match_array_size = + last_match_capture_count + RegExpImpl::kLastMatchOverhead; + last_match_array->EnsureSize(last_match_array_size); + AssertNoAllocation no_gc; + FixedArray* elements = FixedArray::cast(last_match_array->elements()); + RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); + RegExpImpl::SetLastSubject(elements, *subject); + RegExpImpl::SetLastInput(elements, *subject); + for (int i = 0; i < last_match_capture_count; i++) { + RegExpImpl::SetCapture(elements, i, prev_register_vector[i]); + } + return RegExpImpl::RE_SUCCESS; + } + } + // No matches at all, return failure or exception result directly. + return result; +} + + +static Object* Runtime_RegExpExecMultiple(Arguments args) { + ASSERT(args.length() == 4); + HandleScope handles; + + CONVERT_ARG_CHECKED(String, subject, 1); + if (!subject->IsFlat()) { FlattenString(subject); } + CONVERT_ARG_CHECKED(JSRegExp, regexp, 0); + CONVERT_ARG_CHECKED(JSArray, last_match_info, 2); + CONVERT_ARG_CHECKED(JSArray, result_array, 3); + + ASSERT(last_match_info->HasFastElements()); + ASSERT(regexp->GetFlags().is_global()); + Handle result_elements; + if (result_array->HasFastElements()) { + result_elements = + Handle(FixedArray::cast(result_array->elements())); + } else { + result_elements = Factory::NewFixedArrayWithHoles(16); + } + FixedArrayBuilder builder(result_elements); + + if (regexp->TypeTag() == JSRegExp::ATOM) { + Handle pattern( + String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); + int pattern_length = pattern->length(); + if (pattern_length == 1) { + if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) { + return *builder.ToJSArray(result_array); + } + return Heap::null_value(); + } + + if (!pattern->IsFlat()) FlattenString(pattern); + if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { + return *builder.ToJSArray(result_array); + } + return Heap::null_value(); + } + + ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); + + RegExpImpl::IrregexpResult result; + if (regexp->CaptureCount() == 0) { + result = SearchRegExpNoCaptureMultiple(subject, + regexp, + last_match_info, + &builder); + } else { + result = SearchRegExpMultiple(subject, regexp, last_match_info, &builder); + } + if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); + if (result == RegExpImpl::RE_FAILURE) return Heap::null_value(); + ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); + return Failure::Exception(); +} + + static Object* Runtime_NumberToRadixString(Arguments args) { NoHandleAllocation ha; ASSERT(args.length() == 2); diff --git a/src/runtime.h b/src/runtime.h index 4175902c45..42af3df88a 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -153,6 +153,7 @@ namespace internal { /* Regular expressions */ \ F(RegExpCompile, 3, 1) \ F(RegExpExec, 4, 1) \ + F(RegExpExecMultiple, 4, 1) \ \ /* Strings */ \ F(StringCharCodeAt, 2, 1) \ diff --git a/src/string.js b/src/string.js index ca438fdde5..f4489efa12 100644 --- a/src/string.js +++ b/src/string.js @@ -405,97 +405,91 @@ function addCaptureString(builder, matchInfo, index) { builder.addSpecialSlice(start, end); }; +// TODO(lrn): This array will survive indefinitely if replace is never +// called again. However, it will be empty, since the contents are cleared +// in the finally block. +var reusableReplaceArray = $Array(16); // Helper function for replacing regular expressions with the result of a -// function application in String.prototype.replace. The function application -// must be interleaved with the regexp matching (contrary to ECMA-262 -// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses -// the static properties of the RegExp constructor. Example: -// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; } -// should be 'abcd' and not 'dddd' (or anything else). +// function application in String.prototype.replace. function StringReplaceRegExpWithFunction(subject, regexp, replace) { - var matchInfo = DoRegExpExec(regexp, subject, 0); - if (IS_NULL(matchInfo)) return subject; - - var result = new ReplaceResultBuilder(subject); - // There's at least one match. If the regexp is global, we have to loop - // over all matches. The loop is not in C++ code here like the one in - // RegExp.prototype.exec, because of the interleaved function application. - // Unfortunately, that means this code is nearly duplicated, here and in - // jsregexp.cc. if (regexp.global) { - var previous = 0; - var startOfMatch; - if (NUMBER_OF_CAPTURES(matchInfo) == 2) { - // Both branches contain essentially the same loop except for the call - // to the replace function. The branch is put outside of the loop for - // speed - do { - startOfMatch = matchInfo[CAPTURE0]; - result.addSpecialSlice(previous, startOfMatch); - previous = matchInfo[CAPTURE1]; - var match = SubString(subject, startOfMatch, previous); - // Don't call directly to avoid exposing the built-in global object. - result.add(replace.call(null, match, startOfMatch, subject)); - // Can't use matchInfo any more from here, since the function could - // overwrite it. - // Continue with the next match. - // Increment previous if we matched an empty string, as per ECMA-262 - // 15.5.4.10. - if (previous == startOfMatch) { - // Add the skipped character to the output, if any. - if (previous < subject.length) { - result.addSpecialSlice(previous, previous + 1); - } - previous++; - // Per ECMA-262 15.10.6.2, if the previous index is greater than the - // string length, there is no match - if (previous > subject.length) { - return result.generate(); - } - } - matchInfo = DoRegExpExec(regexp, subject, previous); - } while (!IS_NULL(matchInfo)); + var resultArray = reusableReplaceArray; + if (resultArray) { + reusableReplaceArray = null; } else { - do { - startOfMatch = matchInfo[CAPTURE0]; - result.addSpecialSlice(previous, startOfMatch); - previous = matchInfo[CAPTURE1]; - result.add(ApplyReplacementFunction(replace, matchInfo, subject)); - // Can't use matchInfo any more from here, since the function could - // overwrite it. - // Continue with the next match. - // Increment previous if we matched an empty string, as per ECMA-262 - // 15.5.4.10. - if (previous == startOfMatch) { - // Add the skipped character to the output, if any. - if (previous < subject.length) { - result.addSpecialSlice(previous, previous + 1); - } - previous++; - // Per ECMA-262 15.10.6.2, if the previous index is greater than the - // string length, there is no match - if (previous > subject.length) { - return result.generate(); - } - } - matchInfo = DoRegExpExec(regexp, subject, previous); - } while (!IS_NULL(matchInfo)); + // Inside a nested replace (replace called from the replacement function + // of another replace) or we have failed to set the reusable array + // back due to an exception in a replacement function. Create a new + // array to use in the future, or until the original is written back. + resultArray = $Array(16); + } + try { + // Must handle exceptions thrown by the replace functions correctly, + // including unregistering global regexps. + var res = %RegExpExecMultiple(regexp, + subject, + lastMatchInfo, + resultArray); + regexp.lastIndex = 0; + if (IS_NULL(res)) { + // No matches at all. + return subject; + } + var len = res.length; + var i = 0; + if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) { + var match_start = 0; + while (i < len) { + var elem = res[i]; + if (%_IsSmi(elem)) { + if (elem > 0) { + match_start = (elem >> 11) + (elem & 0x7ff); + } else { + match_start = res[++i] - elem; + } + } else { + var func_result = replace.call(null, elem, match_start, subject); + if (!IS_STRING(func_result)) func_result = TO_STRING(func_result); + res[i] = func_result; + match_start += elem.length; + } + i++; + } + } else { + while (i < len) { + var elem = res[i]; + if (!%_IsSmi(elem)) { + // elem must be an Array. + // Use the apply argument as backing for global RegExp properties. + lastMatchInfoOverride = elem; + var func_result = replace.apply(null, elem); + if (!IS_STRING(func_result)) func_result = TO_STRING(func_result); + res[i] = func_result; + } + i++; + } + } + var result = new ReplaceResultBuilder(subject, res); + return result.generate(); + } finally { + lastMatchInfoOverride = null; + resultArray.length = 0; + reusableReplaceArray = resultArray; } - - // Tack on the final right substring after the last match. - result.addSpecialSlice(previous, subject.length); - } else { // Not a global regexp, no need to loop. + var matchInfo = DoRegExpExec(regexp, subject, 0); + if (IS_NULL(matchInfo)) return subject; + + var result = new ReplaceResultBuilder(subject); result.addSpecialSlice(0, matchInfo[CAPTURE0]); var endOfMatch = matchInfo[CAPTURE1]; result.add(ApplyReplacementFunction(replace, matchInfo, subject)); // Can't use matchInfo any more from here, since the function could // overwrite it. result.addSpecialSlice(endOfMatch, subject.length); + return result.generate(); } - - return result.generate(); } @@ -894,8 +888,11 @@ function StringSup() { // ReplaceResultBuilder support. function ReplaceResultBuilder(str) { - this.__proto__ = void 0; - this.elements = new $Array(); + if (%_ArgumentsLength() > 1) { + this.elements = %_Arguments(1); + } else { + this.elements = new $Array(); + } this.special_string = str; }