Fix some bugs in accessing details of the lastest regexp

match.  Sometimes were were not updating it when we should
and sometimes we were leaving the lastMatchInfoOverride in
place when we should be using the updated regular last match
info.  Small optimization for zero length match in
String.prototype.replace.
Review URL: https://chromiumcodereview.appspot.com/10184004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11422 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2012-04-23 18:56:07 +00:00
parent 09657d94eb
commit c436c70f8b
4 changed files with 219 additions and 97 deletions

View File

@ -196,6 +196,7 @@ macro SET_UTC_DATE_VALUE(arg, value) = (%DateSetValue(arg, value, 1));
macro SET_LOCAL_DATE_VALUE(arg, value) = (%DateSetValue(arg, value, 0));
# Last input and last subject of regexp matches.
const LAST_SUBJECT_INDEX = 1;
macro LAST_SUBJECT(array) = ((array)[1]);
macro LAST_INPUT(array) = ((array)[2]);

View File

@ -2885,12 +2885,79 @@ void FindStringIndicesDispatch(Isolate* isolate,
}
// Two smis before and after the match, for very long strings.
const int kMaxBuilderEntriesPerRegExpMatch = 5;
static void SetLastMatchInfoNoCaptures(Handle<String> subject,
Handle<JSArray> last_match_info,
int match_start,
int match_end) {
// Fill last_match_info with a single capture.
last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
AssertNoAllocation no_gc;
FixedArray* elements = FixedArray::cast(last_match_info->elements());
RegExpImpl::SetLastCaptureCount(elements, 2);
RegExpImpl::SetLastInput(elements, *subject);
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetCapture(elements, 0, match_start);
RegExpImpl::SetCapture(elements, 1, match_end);
}
template <typename SubjectChar, typename PatternChar>
static bool SearchStringMultiple(Isolate* isolate,
Vector<const SubjectChar> subject,
Vector<const PatternChar> pattern,
String* pattern_string,
FixedArrayBuilder* builder,
int* match_pos) {
int pos = *match_pos;
int subject_length = subject.length();
int pattern_length = pattern.length();
int max_search_start = subject_length - pattern_length;
StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
// Position of end of previous match.
int match_end = pos + pattern_length;
int new_pos = search.Search(subject, match_end);
if (new_pos >= 0) {
// A match.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern_string);
} else {
break;
}
}
if (pos < max_search_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
pos + pattern_length,
subject_length);
}
*match_pos = pos;
return true;
}
template<typename ResultSeqString>
MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString(
MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> pattern_regexp,
Handle<String> replacement) {
Handle<String> replacement,
Handle<JSArray> last_match_info) {
ASSERT(subject->IsFlat());
ASSERT(replacement->IsFlat());
@ -2949,6 +3016,12 @@ MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString(
subject_pos,
subject_len);
}
SetLastMatchInfoNoCaptures(subject,
last_match_info,
indices.at(matches - 1),
indices.at(matches - 1) + pattern_len);
return *result;
}
@ -2997,11 +3070,19 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString(
compiled_replacement.simple_hint()) {
if (subject_handle->HasOnlyAsciiChars() &&
replacement_handle->HasOnlyAsciiChars()) {
return StringReplaceStringWithString<SeqAsciiString>(
isolate, subject_handle, regexp_handle, replacement_handle);
return StringReplaceAtomRegExpWithString<SeqAsciiString>(
isolate,
subject_handle,
regexp_handle,
replacement_handle,
last_match_info_handle);
} else {
return StringReplaceStringWithString<SeqTwoByteString>(
isolate, subject_handle, regexp_handle, replacement_handle);
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
isolate,
subject_handle,
regexp_handle,
replacement_handle,
last_match_info_handle);
}
}
@ -3090,21 +3171,29 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
Handle<String> subject_handle(subject);
Handle<JSRegExp> regexp_handle(regexp);
Handle<JSArray> last_match_info_handle(last_match_info);
// Shortcut for simple non-regexp global replacements
if (regexp_handle->GetFlags().is_global() &&
regexp_handle->TypeTag() == JSRegExp::ATOM) {
Handle<String> empty_string_handle(HEAP->empty_string());
if (subject_handle->HasOnlyAsciiChars()) {
return StringReplaceStringWithString<SeqAsciiString>(
isolate, subject_handle, regexp_handle, empty_string_handle);
return StringReplaceAtomRegExpWithString<SeqAsciiString>(
isolate,
subject_handle,
regexp_handle,
empty_string_handle,
last_match_info_handle);
} else {
return StringReplaceStringWithString<SeqTwoByteString>(
isolate, subject_handle, regexp_handle, empty_string_handle);
return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
isolate,
subject_handle,
regexp_handle,
empty_string_handle,
last_match_info_handle);
}
}
Handle<JSArray> last_match_info_handle(last_match_info);
Handle<Object> match = RegExpImpl::Exec(regexp_handle,
subject_handle,
0,
@ -3124,6 +3213,10 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
end = RegExpImpl::GetCapture(match_info_array, 1);
}
bool global = regexp_handle->GetFlags().is_global();
if (start == end && !global) return *subject_handle;
int length = subject_handle->length();
int new_length = length - (end - start);
if (new_length == 0) {
@ -3139,7 +3232,7 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString(
}
// If the regexp isn't global, only match once.
if (!regexp_handle->GetFlags().is_global()) {
if (!global) {
if (start > 0) {
String::WriteToFlat(*subject_handle,
answer->GetChars(),
@ -3638,70 +3731,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
}
// Two smis before and after the match, for very long strings.
const int kMaxBuilderEntriesPerRegExpMatch = 5;
static void SetLastMatchInfoNoCaptures(Handle<String> subject,
Handle<JSArray> last_match_info,
int match_start,
int match_end) {
// Fill last_match_info with a single capture.
last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
AssertNoAllocation no_gc;
FixedArray* elements = FixedArray::cast(last_match_info->elements());
RegExpImpl::SetLastCaptureCount(elements, 2);
RegExpImpl::SetLastInput(elements, *subject);
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetCapture(elements, 0, match_start);
RegExpImpl::SetCapture(elements, 1, match_end);
}
template <typename SubjectChar, typename PatternChar>
static bool SearchStringMultiple(Isolate* isolate,
Vector<const SubjectChar> subject,
Vector<const PatternChar> pattern,
String* pattern_string,
FixedArrayBuilder* builder,
int* match_pos) {
int pos = *match_pos;
int subject_length = subject.length();
int pattern_length = pattern.length();
int max_search_start = subject_length - pattern_length;
StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
while (pos <= max_search_start) {
if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
*match_pos = pos;
return false;
}
// Position of end of previous match.
int match_end = pos + pattern_length;
int new_pos = search.Search(subject, match_end);
if (new_pos >= 0) {
// A match.
if (new_pos > match_end) {
ReplacementStringBuilder::AddSubjectSlice(builder,
match_end,
new_pos);
}
pos = new_pos;
builder->Add(pattern_string);
} else {
break;
}
}
if (pos < max_search_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
pos + pattern_length,
subject_length);
}
*match_pos = pos;
return true;
}
static bool SearchStringMultiple(Isolate* isolate,
Handle<String> subject,
Handle<String> pattern,
@ -3841,6 +3870,8 @@ static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
}
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
static RegExpImpl::IrregexpResult SearchRegExpMultiple(
Isolate* isolate,
Handle<String> subject,
@ -3869,10 +3900,6 @@ static RegExpImpl::IrregexpResult SearchRegExpMultiple(
// End of previous match. Differs from pos if match was empty.
int match_end = 0;
if (result == RegExpImpl::RE_SUCCESS) {
// Need to keep a copy of the previous match for creating last_match_info
// at the end, so we have two vectors that we swap between.
OffsetsVector registers2(required_registers, isolate);
Vector<int> prev_register_vector(registers2.vector(), registers2.length());
bool first = true;
do {
int match_start = register_vector[0];
@ -3925,11 +3952,6 @@ static RegExpImpl::IrregexpResult SearchRegExpMultiple(
elements->set(capture_count + 2, *subject);
builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
// Swap register vectors, so the last successful match is in
// prev_register_vector.
Vector<int32_t> tmp = prev_register_vector;
prev_register_vector = register_vector;
register_vector = tmp;
if (match_end > match_start) {
pos = match_end;
@ -3961,12 +3983,12 @@ static RegExpImpl::IrregexpResult SearchRegExpMultiple(
last_match_array->EnsureSize(last_match_array_size);
AssertNoAllocation no_gc;
FixedArray* elements = FixedArray::cast(last_match_array->elements());
// We have to set this even though the rest of the last match array is
// ignored.
RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
// These are also read without consulting the override.
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetLastInput(elements, *subject);
for (int i = 0; i < last_match_capture_count; i++) {
RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
}
return RegExpImpl::RE_SUCCESS;
}
}
@ -3975,6 +3997,9 @@ static RegExpImpl::IrregexpResult SearchRegExpMultiple(
}
// This is only called for StringReplaceGlobalRegExpWithFunction. This sets
// lastMatchInfoOverride to maintain the last match info, so we don't need to
// set any other last match array info.
RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
ASSERT(args.length() == 4);
HandleScope handles(isolate);

View File

@ -237,10 +237,28 @@ function StringReplace(search, replace) {
replace);
}
} else {
return %StringReplaceRegExpWithString(subject,
search,
TO_STRING_INLINE(replace),
lastMatchInfo);
if (lastMatchInfoOverride == null) {
return %StringReplaceRegExpWithString(subject,
search,
TO_STRING_INLINE(replace),
lastMatchInfo);
} else {
// We use this hack to detect whether StringReplaceRegExpWithString
// found at least one hit. In that case we need to remove any
// override.
var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX];
lastMatchInfo[LAST_SUBJECT_INDEX] = 0;
var answer = %StringReplaceRegExpWithString(subject,
search,
TO_STRING_INLINE(replace),
lastMatchInfo);
if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) {
lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
} else {
lastMatchInfoOverride = null;
}
return answer;
}
}
}
@ -429,14 +447,22 @@ function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
return subject;
}
var len = res.length;
var i = 0;
if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
// If the number of captures is two then there are no explicit captures in
// the regexp, just the implicit capture that captures the whole match. In
// this case we can simplify quite a bit and end up with something faster.
// The builder will consist of some integers that indicate slices of the
// input string and some replacements that were returned from the replace
// function.
var match_start = 0;
var override = new InternalArray(null, 0, subject);
var receiver = %GetDefaultReceiver(replace);
while (i < len) {
for (var i = 0; i < len; i++) {
var elem = res[i];
if (%_IsSmi(elem)) {
// Integers represent slices of the original string. Use these to
// get the offsets we need for the override array (so things like
// RegExp.leftContext work during the callback function.
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
@ -448,23 +474,25 @@ function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
lastMatchInfoOverride = override;
var func_result =
%_CallFunction(receiver, elem, match_start, subject, replace);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
res[i] = TO_STRING_INLINE(func_result);
match_start += elem.length;
}
i++;
}
} else {
var receiver = %GetDefaultReceiver(replace);
while (i < len) {
for (var i = 0; i < len; i++) {
var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
lastMatchInfoOverride = elem;
var func_result = %Apply(replace, receiver, elem, 0, elem.length);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
res[i] = TO_STRING_INLINE(func_result);
}
i++;
}
}
var resultBuilder = new ReplaceResultBuilder(subject, res);

View File

@ -86,3 +86,71 @@ assertEquals('c', RegExp.$2);
for (var i = 3; i < 10; i++) {
assertEquals("", RegExp['$' + i]);
}
function Override() {
// Set the internal lastMatchInfoOverride. After calling this we do a normal
// match and verify the override was cleared and that we record the new
// captures.
"abcdabcd".replace(/(b)(c)/g, function() { });
}
function TestOverride(input, expect, property, re_src) {
var re = new RegExp(re_src);
var re_g = new RegExp(re_src, "g");
function OverrideCase(fn) {
Override();
fn();
assertEquals(expect, RegExp[property]);
}
OverrideCase(function() { return input.replace(re, "x"); });
OverrideCase(function() { return input.replace(re_g, "x"); });
OverrideCase(function() { return input.replace(re, ""); });
OverrideCase(function() { return input.replace(re_g, ""); });
OverrideCase(function() { return input.match(re); });
OverrideCase(function() { return input.match(re_g); });
OverrideCase(function() { return re.test(input); });
OverrideCase(function() { return re_g.test(input); });
}
var input = "bar.foo baz......";
var re_str = "(ba.).*?f";
TestOverride(input, "bar", "$1", re_str);
input = "foo bar baz";
var re_str = "bar";
TestOverride(input, "bar", "$&", re_str);
function no_last_match(fn) {
fn();
assertEquals("hestfisk", RegExp.$1);
}
/(hestfisk)/.test("There's no such thing as a hestfisk!");
no_last_match(function() { "foo".replace("f", ""); });
no_last_match(function() { "foo".replace("f", "f"); });
no_last_match(function() { "foo".split("o"); });
var base = "In the music. In the music. ";
var cons = base + base + base + base;
no_last_match(function() { cons.replace("x", "y"); });
no_last_match(function() { cons.replace("e", "E"); });
// Here's one that matches once, then tries to match again, but fails.
// Verify that the last match info is from the last match, not from the
// failure that came after.
"bar.foo baz......".replace(/(ba.).*?f/g, function() { return "x";});
assertEquals("bar", RegExp.$1);
var a = "foo bar baz".replace(/^|bar/g, "");
assertEquals("foo baz", a);
a = "foo bar baz".replace(/^|bar/g, "*");
assertEquals("*foo * baz", a);