Cache results in SearchRegExpMultiple.

BUG=

Review URL: https://chromiumcodereview.appspot.com/10837290

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12416 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2012-08-31 09:28:01 +00:00
parent 40f44b1d4f
commit 5dd51bafef
5 changed files with 247 additions and 98 deletions

View File

@ -4042,7 +4042,7 @@ class Internals {
static const int kNullValueRootIndex = 7;
static const int kTrueValueRootIndex = 8;
static const int kFalseValueRootIndex = 9;
static const int kEmptySymbolRootIndex = 114;
static const int kEmptySymbolRootIndex = 115;
static const int kJSObjectType = 0xaa;
static const int kFirstNonstringType = 0x80;

View File

@ -1002,7 +1002,8 @@ void Heap::MarkCompactPrologue() {
isolate_->keyed_lookup_cache()->Clear();
isolate_->context_slot_cache()->Clear();
isolate_->descriptor_lookup_cache()->Clear();
StringSplitCache::Clear(string_split_cache());
RegExpResultsCache::Clear(string_split_cache());
RegExpResultsCache::Clear(regexp_multiple_cache());
isolate_->compilation_cache()->MarkCompactPrologue();
@ -2761,12 +2762,18 @@ bool Heap::CreateInitialObjects() {
set_single_character_string_cache(FixedArray::cast(obj));
// Allocate cache for string split.
{ MaybeObject* maybe_obj =
AllocateFixedArray(StringSplitCache::kStringSplitCacheSize, TENURED);
{ MaybeObject* maybe_obj = AllocateFixedArray(
RegExpResultsCache::kRegExpResultsCacheSize, TENURED);
if (!maybe_obj->ToObject(&obj)) return false;
}
set_string_split_cache(FixedArray::cast(obj));
{ MaybeObject* maybe_obj = AllocateFixedArray(
RegExpResultsCache::kRegExpResultsCacheSize, TENURED);
if (!maybe_obj->ToObject(&obj)) return false;
}
set_regexp_multiple_cache(FixedArray::cast(obj));
// Allocate cache for external strings pointing to native source code.
{ MaybeObject* maybe_obj = AllocateFixedArray(Natives::GetBuiltinsCount());
if (!maybe_obj->ToObject(&obj)) return false;
@ -2792,70 +2799,98 @@ bool Heap::CreateInitialObjects() {
}
Object* StringSplitCache::Lookup(
FixedArray* cache, String* string, String* pattern) {
if (!string->IsSymbol() || !pattern->IsSymbol()) return Smi::FromInt(0);
uint32_t hash = string->Hash();
uint32_t index = ((hash & (kStringSplitCacheSize - 1)) &
Object* RegExpResultsCache::Lookup(Heap* heap,
String* key_string,
Object* key_pattern,
ResultsCacheType type) {
FixedArray* cache;
if (!key_string->IsSymbol()) return Smi::FromInt(0);
if (type == STRING_SPLIT_SUBSTRINGS) {
ASSERT(key_pattern->IsString());
if (!key_pattern->IsSymbol()) return Smi::FromInt(0);
cache = heap->string_split_cache();
} else {
ASSERT(type == REGEXP_MULTIPLE_INDICES);
ASSERT(key_pattern->IsFixedArray());
cache = heap->regexp_multiple_cache();
}
uint32_t hash = key_string->Hash();
uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
~(kArrayEntriesPerCacheEntry - 1));
if (cache->get(index + kStringOffset) == string &&
cache->get(index + kPatternOffset) == pattern) {
if (cache->get(index + kStringOffset) == key_string &&
cache->get(index + kPatternOffset) == key_pattern) {
return cache->get(index + kArrayOffset);
}
index = ((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1));
if (cache->get(index + kStringOffset) == string &&
cache->get(index + kPatternOffset) == pattern) {
index =
((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
if (cache->get(index + kStringOffset) == key_string &&
cache->get(index + kPatternOffset) == key_pattern) {
return cache->get(index + kArrayOffset);
}
return Smi::FromInt(0);
}
void StringSplitCache::Enter(Heap* heap,
FixedArray* cache,
String* string,
String* pattern,
FixedArray* array) {
if (!string->IsSymbol() || !pattern->IsSymbol()) return;
uint32_t hash = string->Hash();
uint32_t index = ((hash & (kStringSplitCacheSize - 1)) &
void RegExpResultsCache::Enter(Heap* heap,
String* key_string,
Object* key_pattern,
FixedArray* value_array,
ResultsCacheType type) {
FixedArray* cache;
if (!key_string->IsSymbol()) return;
if (type == STRING_SPLIT_SUBSTRINGS) {
ASSERT(key_pattern->IsString());
if (!key_pattern->IsSymbol()) return;
cache = heap->string_split_cache();
} else {
ASSERT(type == REGEXP_MULTIPLE_INDICES);
ASSERT(key_pattern->IsFixedArray());
cache = heap->regexp_multiple_cache();
}
uint32_t hash = key_string->Hash();
uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
~(kArrayEntriesPerCacheEntry - 1));
if (cache->get(index + kStringOffset) == Smi::FromInt(0)) {
cache->set(index + kStringOffset, string);
cache->set(index + kPatternOffset, pattern);
cache->set(index + kArrayOffset, array);
cache->set(index + kStringOffset, key_string);
cache->set(index + kPatternOffset, key_pattern);
cache->set(index + kArrayOffset, value_array);
} else {
uint32_t index2 =
((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1));
((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
if (cache->get(index2 + kStringOffset) == Smi::FromInt(0)) {
cache->set(index2 + kStringOffset, string);
cache->set(index2 + kPatternOffset, pattern);
cache->set(index2 + kArrayOffset, array);
cache->set(index2 + kStringOffset, key_string);
cache->set(index2 + kPatternOffset, key_pattern);
cache->set(index2 + kArrayOffset, value_array);
} else {
cache->set(index2 + kStringOffset, Smi::FromInt(0));
cache->set(index2 + kPatternOffset, Smi::FromInt(0));
cache->set(index2 + kArrayOffset, Smi::FromInt(0));
cache->set(index + kStringOffset, string);
cache->set(index + kPatternOffset, pattern);
cache->set(index + kArrayOffset, array);
cache->set(index + kStringOffset, key_string);
cache->set(index + kPatternOffset, key_pattern);
cache->set(index + kArrayOffset, value_array);
}
}
if (array->length() < 100) { // Limit how many new symbols we want to make.
for (int i = 0; i < array->length(); i++) {
String* str = String::cast(array->get(i));
// If the array is a reasonably short list of substrings, convert it into a
// list of symbols.
if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) {
for (int i = 0; i < value_array->length(); i++) {
String* str = String::cast(value_array->get(i));
Object* symbol;
MaybeObject* maybe_symbol = heap->LookupSymbol(str);
if (maybe_symbol->ToObject(&symbol)) {
array->set(i, symbol);
value_array->set(i, symbol);
}
}
}
array->set_map_no_write_barrier(heap->fixed_cow_array_map());
// Convert backing store to a copy-on-write array.
value_array->set_map_no_write_barrier(heap->fixed_cow_array_map());
}
void StringSplitCache::Clear(FixedArray* cache) {
for (int i = 0; i < kStringSplitCacheSize; i++) {
void RegExpResultsCache::Clear(FixedArray* cache) {
for (int i = 0; i < kRegExpResultsCacheSize; i++) {
cache->set(i, Smi::FromInt(0));
}
}

View File

@ -87,6 +87,7 @@ namespace internal {
V(Object, instanceof_cache_answer, InstanceofCacheAnswer) \
V(FixedArray, single_character_string_cache, SingleCharacterStringCache) \
V(FixedArray, string_split_cache, StringSplitCache) \
V(FixedArray, regexp_multiple_cache, RegExpMultipleCache) \
V(Object, termination_exception, TerminationException) \
V(Smi, hash_seed, HashSeed) \
V(Map, string_map, StringMap) \
@ -2582,24 +2583,31 @@ class GCTracer BASE_EMBEDDED {
};
class StringSplitCache {
class RegExpResultsCache {
public:
static Object* Lookup(FixedArray* cache, String* string, String* pattern);
enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
// Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
// On success, the returned result is guaranteed to be a COW-array.
static Object* Lookup(Heap* heap,
String* key_string,
Object* key_pattern,
ResultsCacheType type);
// Attempt to add value_array to the cache specified by type. On success,
// value_array is turned into a COW-array.
static void Enter(Heap* heap,
FixedArray* cache,
String* string,
String* pattern,
FixedArray* array);
String* key_string,
Object* key_pattern,
FixedArray* value_array,
ResultsCacheType type);
static void Clear(FixedArray* cache);
static const int kStringSplitCacheSize = 0x100;
static const int kRegExpResultsCacheSize = 0x100;
private:
static const int kArrayEntriesPerCacheEntry = 4;
static const int kStringOffset = 0;
static const int kPatternOffset = 1;
static const int kArrayOffset = 2;
static MaybeObject* WrapFixedArrayInJSArray(Object* fixed_array);
};

View File

@ -2413,18 +2413,13 @@ class FixedArrayBuilder {
return array_->length();
}
Handle<JSArray> ToJSArray() {
Handle<JSArray> result_array = FACTORY->NewJSArrayWithElements(array_);
result_array->set_length(Smi::FromInt(length_));
return result_array;
}
Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
FACTORY->SetContent(target_array, array_);
target_array->set_length(Smi::FromInt(length_));
return target_array;
}
private:
Handle<FixedArray> array_;
int length_;
@ -2543,10 +2538,6 @@ class ReplacementStringBuilder {
character_count_ += by;
}
Handle<JSArray> GetParts() {
return array_builder_.ToJSArray();
}
private:
Handle<SeqAsciiString> NewRawAsciiString(int length) {
return heap_->isolate()->factory()->NewRawAsciiString(length);
@ -3667,21 +3658,57 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) {
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
template<bool has_capture>
static int SearchRegExpMultiple(
static MaybeObject* SearchRegExpMultiple(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
Handle<JSArray> result_array) {
ASSERT(subject->IsFlat());
ASSERT_NE(has_capture, regexp->CaptureCount() == 0);
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
static const int kMinLengthToCache = 0x1000;
if (subject_length > kMinLengthToCache) {
Handle<Object> cached_answer(RegExpResultsCache::Lookup(
isolate->heap(),
*subject,
regexp->data(),
RegExpResultsCache::REGEXP_MULTIPLE_INDICES));
if (*cached_answer != Smi::FromInt(0)) {
Handle<FixedArray> cached_fixed_array =
Handle<FixedArray>(FixedArray::cast(*cached_answer));
// The cache FixedArray is a COW-array and can therefore be reused.
isolate->factory()->SetContent(result_array, cached_fixed_array);
// The actual length of the result array is stored in the last element of
// the backing store (the backing FixedArray may have a larger capacity).
Object* cached_fixed_array_last_element =
cached_fixed_array->get(cached_fixed_array->length() - 1);
Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
result_array->set_length(js_array_length);
RegExpImpl::SetLastMatchInfo(
last_match_array, subject, capture_count, NULL);
return *result_array;
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
if (global_cache.HasException()) return Failure::Exception();
Handle<FixedArray> result_elements;
if (result_array->HasFastObjectElements()) {
result_elements =
Handle<FixedArray>(FixedArray::cast(result_array->elements()));
}
if (result_elements.is_null() || result_elements->length() < 16) {
result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
}
FixedArrayBuilder builder(result_elements);
// Position to search from.
int match_start = -1;
int match_end = 0;
@ -3694,9 +3721,9 @@ static int SearchRegExpMultiple(
int32_t* current_match = global_cache.FetchNext();
if (current_match == NULL) break;
match_start = current_match[0];
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
if (match_end < match_start) {
ReplacementStringBuilder::AddSubjectSlice(builder,
ReplacementStringBuilder::AddSubjectSlice(&builder,
match_end,
match_start);
}
@ -3738,19 +3765,19 @@ static int SearchRegExpMultiple(
}
elements->set(capture_count + 1, Smi::FromInt(match_start));
elements->set(capture_count + 2, *subject);
builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
} else {
builder->Add(*match);
builder.Add(*match);
}
}
}
if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION;
if (global_cache.HasException()) return Failure::Exception();
if (match_start >= 0) {
// Finished matching, with at least one match.
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder,
ReplacementStringBuilder::AddSubjectSlice(&builder,
match_end,
subject_length);
}
@ -3758,9 +3785,23 @@ static int SearchRegExpMultiple(
RegExpImpl::SetLastMatchInfo(
last_match_array, subject, capture_count, NULL);
return RegExpImpl::RE_SUCCESS;
if (subject_length > kMinLengthToCache) {
// Store the length of the result array into the last element of the
// backing FixedArray.
builder.EnsureCapacity(1);
Handle<FixedArray> fixed_array = builder.array();
fixed_array->set(fixed_array->length() - 1,
Smi::FromInt(builder.length()));
// Cache the result and turn the FixedArray into a COW array.
RegExpResultsCache::Enter(isolate->heap(),
*subject,
regexp->data(),
*fixed_array,
RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
}
return *builder.ToJSArray(result_array);
} else {
return RegExpImpl::RE_FAILURE; // No matches at all.
return isolate->heap()->null_value(); // No matches at all.
}
}
@ -3780,29 +3821,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
ASSERT(last_match_info->HasFastObjectElements());
ASSERT(regexp->GetFlags().is_global());
Handle<FixedArray> result_elements;
if (result_array->HasFastObjectElements()) {
result_elements =
Handle<FixedArray>(FixedArray::cast(result_array->elements()));
}
if (result_elements.is_null() || result_elements->length() < 16) {
result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
}
FixedArrayBuilder builder(result_elements);
int result;
if (regexp->CaptureCount() == 0) {
result = SearchRegExpMultiple<false>(
isolate, subject, regexp, last_match_info, &builder);
return SearchRegExpMultiple<false>(
isolate, subject, regexp, last_match_info, result_array);
} else {
result = SearchRegExpMultiple<true>(
isolate, subject, regexp, last_match_info, &builder);
return SearchRegExpMultiple<true>(
isolate, subject, regexp, last_match_info, result_array);
}
if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array);
if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value();
ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
return Failure::Exception();
}
@ -6119,11 +6145,13 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
RUNTIME_ASSERT(pattern_length > 0);
if (limit == 0xffffffffu) {
Handle<Object> cached_answer(StringSplitCache::Lookup(
isolate->heap()->string_split_cache(),
Handle<Object> cached_answer(RegExpResultsCache::Lookup(
isolate->heap(),
*subject,
*pattern));
*pattern,
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS));
if (*cached_answer != Smi::FromInt(0)) {
// The cache FixedArray is a COW-array and can therefore be reused.
Handle<JSArray> result =
isolate->factory()->NewJSArrayWithElements(
Handle<FixedArray>::cast(cached_answer));
@ -6183,11 +6211,11 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
if (limit == 0xffffffffu) {
if (result->HasFastObjectElements()) {
StringSplitCache::Enter(isolate->heap(),
isolate->heap()->string_split_cache(),
*subject,
*pattern,
*elements);
RegExpResultsCache::Enter(isolate->heap(),
*subject,
*pattern,
*elements,
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
}
}

View File

@ -0,0 +1,78 @@
// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Long string to trigger caching.
var string =
"Friends, Romans, countrymen, lend me your ears! \
I come to bury Caesar, not to praise him. \
The evil that men do lives after them, \
The good is oft interred with their bones; \
So let it be with Caesar. The noble Brutus \
Hath told you Caesar was ambitious; \
If it were so, it was a grievous fault, \
And grievously hath Caesar answer'd it. \
Here, under leave of Brutus and the rest- \
For Brutus is an honorable man; \
So are they all, all honorable men- \
Come I to speak in Caesar's funeral. \
He was my friend, faithful and just to me; \
But Brutus says he was ambitious, \
And Brutus is an honorable man. \
He hath brought many captives home to Rome, \
Whose ransoms did the general coffers fill. \
Did this in Caesar seem ambitious? \
When that the poor have cried, Caesar hath wept; \
Ambition should be made of sterner stuff: \
Yet Brutus says he was ambitious, \
And Brutus is an honorable man. \
You all did see that on the Lupercal \
I thrice presented him a kingly crown, \
Which he did thrice refuse. Was this ambition? \
Yet Brutus says he was ambitious, \
And sure he is an honorable man. \
I speak not to disprove what Brutus spoke, \
But here I am to speak what I do know. \
You all did love him once, not without cause; \
What cause withholds you then to mourn for him? \
O judgement, thou art fled to brutish beasts, \
And men have lost their reason. Bear with me; \
My heart is in the coffin there with Caesar, \
And I must pause till it come back to me.";
var replaced = string.replace(/\b\w+\b/g, function() { return "foo"; });
for (var i = 0; i < 3; i++) {
assertEquals(replaced,
string.replace(/\b\w+\b/g, function() { return "foo"; }));
}
// Check that the result is in a COW array.
var words = string.split(" ");
assertEquals("Friends,", words[0]);
words[0] = "Enemies,";
words = string.split(" ");
assertEquals("Friends,", words[0]);