[string] Widen StringIndexOf fast path

The StringIndexOf fast path used to be very narrow, only allowing
one-byte single-char search strings (and a one-byte subject string).

This changes the CSA fast path to call into our internal SearchString C++
function instead (after attempting to unpack both Strings), and can handle
strings of arbitrary length and encoding. The only remaining runtime call is
when either string needs to be flattened.

BUG=

Review-Url: https://codereview.chromium.org/2814373002
Cr-Commit-Position: refs/heads/master@{#44718}
This commit is contained in:
jgruber 2017-04-19 03:47:03 -07:00 committed by Commit bot
parent 06d2e8128a
commit 4cb011885b
9 changed files with 274 additions and 110 deletions

View File

@ -63,6 +63,7 @@
#include "src/runtime/runtime.h"
#include "src/simulator.h" // For flushing instruction cache.
#include "src/snapshot/serializer-common.h"
#include "src/string-search.h"
#include "src/wasm/wasm-external-refs.h"
// Include native regexp-macro-assembler.
@ -1562,12 +1563,28 @@ ExternalReference ExternalReference::libc_memset_function(Isolate* isolate) {
return ExternalReference(Redirect(isolate, FUNCTION_ADDR(libc_memset)));
}
template <typename SubjectChar, typename PatternChar>
ExternalReference ExternalReference::search_string_raw(Isolate* isolate) {
auto f = SearchStringRaw<SubjectChar, PatternChar>;
return ExternalReference(Redirect(isolate, FUNCTION_ADDR(f)));
}
ExternalReference ExternalReference::try_internalize_string_function(
Isolate* isolate) {
return ExternalReference(Redirect(
isolate, FUNCTION_ADDR(StringTable::LookupStringIfExists_NoAllocate)));
}
// Explicit instantiations for all combinations of 1- and 2-byte strings.
template ExternalReference
ExternalReference::search_string_raw<const uint8_t, const uint8_t>(Isolate*);
template ExternalReference
ExternalReference::search_string_raw<const uint8_t, const uc16>(Isolate*);
template ExternalReference
ExternalReference::search_string_raw<const uc16, const uint8_t>(Isolate*);
template ExternalReference
ExternalReference::search_string_raw<const uc16, const uc16>(Isolate*);
ExternalReference ExternalReference::page_flags(Page* page) {
return ExternalReference(reinterpret_cast<Address>(page) +
MemoryChunk::kFlagsOffset);

View File

@ -994,6 +994,9 @@ class ExternalReference BASE_EMBEDDED {
static ExternalReference try_internalize_string_function(Isolate* isolate);
template <typename SubjectChar, typename PatternChar>
static ExternalReference search_string_raw(Isolate* isolate);
static ExternalReference page_flags(Page* page);
static ExternalReference ForDeoptEntry(Address entry);

View File

@ -59,48 +59,74 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
return var_data.value();
}
Node* LoadOneByteChar(Node* string, Node* index) {
return Load(MachineType::Uint8(), string, OneByteCharOffset(index));
}
void DispatchOnStringEncodings(Node* const lhs_instance_type,
Node* const rhs_instance_type,
Label* if_one_one, Label* if_one_two,
Label* if_two_one, Label* if_two_two) {
STATIC_ASSERT(kStringEncodingMask == 0x8);
STATIC_ASSERT(kTwoByteStringTag == 0x0);
STATIC_ASSERT(kOneByteStringTag == 0x8);
Node* OneByteCharAddress(Node* string, Node* index) {
Node* offset = OneByteCharOffset(index);
return IntPtrAdd(string, offset);
}
// First combine the encodings.
Node* OneByteCharOffset(Node* index) {
return CharOffset(String::ONE_BYTE_ENCODING, index);
}
Node* const encoding_mask = Int32Constant(kStringEncodingMask);
Node* const lhs_encoding = Word32And(lhs_instance_type, encoding_mask);
Node* const rhs_encoding = Word32And(rhs_instance_type, encoding_mask);
Node* CharOffset(String::Encoding encoding, Node* index) {
const int header = SeqOneByteString::kHeaderSize - kHeapObjectTag;
Node* offset = index;
if (encoding == String::TWO_BYTE_ENCODING) {
offset = IntPtrAdd(offset, offset);
}
offset = IntPtrAdd(offset, IntPtrConstant(header));
return offset;
}
Node* const combined_encodings =
Word32Or(lhs_encoding, Word32Shr(rhs_encoding, 1));
void DispatchOnStringInstanceType(Node* const instance_type,
Label* if_onebyte_sequential,
Label* if_onebyte_external,
Label* if_otherwise) {
const int kMask = kStringRepresentationMask | kStringEncodingMask;
Node* const encoding_and_representation =
Word32And(instance_type, Int32Constant(kMask));
// Then dispatch on the combined encoding.
Label unreachable(this, Label::kDeferred);
int32_t values[] = {
kOneByteStringTag | kSeqStringTag,
kOneByteStringTag | kExternalStringTag,
kOneByteStringTag | (kOneByteStringTag >> 1),
kOneByteStringTag | (kTwoByteStringTag >> 1),
kTwoByteStringTag | (kOneByteStringTag >> 1),
kTwoByteStringTag | (kTwoByteStringTag >> 1),
};
Label* labels[] = {
if_onebyte_sequential, if_onebyte_external,
if_one_one, if_one_two, if_two_one, if_two_two,
};
STATIC_ASSERT(arraysize(values) == arraysize(labels));
Switch(encoding_and_representation, if_otherwise, values, labels,
arraysize(values));
STATIC_ASSERT(arraysize(values) == arraysize(labels));
Switch(combined_encodings, &unreachable, values, labels, arraysize(values));
BIND(&unreachable);
Unreachable();
}
template <typename SubjectChar, typename PatternChar>
Node* CallSearchStringRaw(Node* const subject_ptr, Node* const subject_length,
Node* const search_ptr, Node* const search_length,
Node* const start_position) {
Node* const function_addr = ExternalConstant(
ExternalReference::search_string_raw<SubjectChar, PatternChar>(
isolate()));
Node* const isolate_ptr =
ExternalConstant(ExternalReference::isolate_address(isolate()));
MachineType type_ptr = MachineType::Pointer();
MachineType type_intptr = MachineType::IntPtr();
Node* const result = CallCFunction6(
type_intptr, type_ptr, type_ptr, type_intptr, type_ptr, type_intptr,
type_intptr, function_addr, isolate_ptr, subject_ptr, subject_length,
search_ptr, search_length, start_position);
return result;
}
Node* PointerToStringDataAtIndex(Node* const string_data, Node* const index,
String::Encoding encoding) {
const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
? UINT8_ELEMENTS
: UINT16_ELEMENTS;
Node* const offset_in_bytes =
ElementOffsetFromIndex(index, kind, INTPTR_PARAMETERS);
return IntPtrAdd(string_data, offset_in_bytes);
}
void GenerateStringEqual(Node* context, Node* left, Node* right);
@ -113,8 +139,10 @@ class StringBuiltinsAssembler : public CodeStubAssembler {
Node* LoadSurrogatePairAt(Node* string, Node* length, Node* index,
UnicodeEncoding encoding);
void StringIndexOf(Node* receiver, Node* instance_type, Node* search_string,
Node* search_string_instance_type, Node* position,
void StringIndexOf(Node* const subject_string,
Node* const subject_instance_type,
Node* const search_string,
Node* const search_instance_type, Node* const position,
std::function<void(Node*)> f_return);
Node* IndexOfDollarChar(Node* const context, Node* const string);
@ -712,112 +740,157 @@ TF_BUILTIN(StringPrototypeConcat, CodeStubAssembler) {
}
void StringBuiltinsAssembler::StringIndexOf(
Node* receiver, Node* instance_type, Node* search_string,
Node* search_string_instance_type, Node* position,
std::function<void(Node*)> f_return) {
CSA_ASSERT(this, IsString(receiver));
Node* const subject_string, Node* const subject_instance_type,
Node* const search_string, Node* const search_instance_type,
Node* const position, std::function<void(Node*)> f_return) {
CSA_ASSERT(this, IsString(subject_string));
CSA_ASSERT(this, IsString(search_string));
CSA_ASSERT(this, TaggedIsSmi(position));
Label zero_length_needle(this),
call_runtime_unchecked(this, Label::kDeferred), return_minus_1(this),
check_search_string(this), continue_fast_path(this);
Node* const int_zero = IntPtrConstant(0);
VARIABLE(var_needle_byte, MachineType::PointerRepresentation(), int_zero);
VARIABLE(var_string_addr, MachineType::PointerRepresentation(), int_zero);
Node* needle_length = SmiUntag(LoadStringLength(search_string));
// Use faster/complex runtime fallback for long search strings.
GotoIf(IntPtrLessThan(IntPtrConstant(1), needle_length),
&call_runtime_unchecked);
Node* string_length = SmiUntag(LoadStringLength(receiver));
Node* start_position = IntPtrMax(SmiUntag(position), int_zero);
Node* const search_length = SmiUntag(LoadStringLength(search_string));
Node* const subject_length = SmiUntag(LoadStringLength(subject_string));
Node* const start_position = IntPtrMax(SmiUntag(position), int_zero);
Label zero_length_needle(this), return_minus_1(this);
{
GotoIf(IntPtrEqual(int_zero, search_length), &zero_length_needle);
GotoIf(IntPtrEqual(int_zero, needle_length), &zero_length_needle);
// Check that the needle fits in the start position.
GotoIfNot(IntPtrLessThanOrEqual(needle_length,
IntPtrSub(string_length, start_position)),
GotoIfNot(IntPtrLessThanOrEqual(search_length,
IntPtrSub(subject_length, start_position)),
&return_minus_1);
// Load the string address.
{
Label if_onebyte_sequential(this);
Label if_onebyte_external(this, Label::kDeferred);
// Only support one-byte strings on the fast path.
DispatchOnStringInstanceType(instance_type, &if_onebyte_sequential,
&if_onebyte_external, &call_runtime_unchecked);
BIND(&if_onebyte_sequential);
{
var_string_addr.Bind(
OneByteCharAddress(BitcastTaggedToWord(receiver), start_position));
Goto(&check_search_string);
}
BIND(&if_onebyte_external);
{
Node* const unpacked = TryDerefExternalString(receiver, instance_type,
&call_runtime_unchecked);
var_string_addr.Bind(OneByteCharAddress(unpacked, start_position));
Goto(&check_search_string);
}
}
// Try to unpack subject and search strings. Bail to runtime if either needs
// to be flattened.
ToDirectStringAssembler subject_to_direct(state(), subject_string);
ToDirectStringAssembler search_to_direct(state(), search_string);
// Load the needle character.
BIND(&check_search_string);
{
Label if_onebyte_sequential(this);
Label if_onebyte_external(this, Label::kDeferred);
Label call_runtime_unchecked(this, Label::kDeferred);
DispatchOnStringInstanceType(search_string_instance_type,
&if_onebyte_sequential, &if_onebyte_external,
&call_runtime_unchecked);
subject_to_direct.TryToDirect(&call_runtime_unchecked);
search_to_direct.TryToDirect(&call_runtime_unchecked);
BIND(&if_onebyte_sequential);
{
var_needle_byte.Bind(
ChangeInt32ToIntPtr(LoadOneByteChar(search_string, int_zero)));
Goto(&continue_fast_path);
}
// Load pointers to string data.
Node* const subject_ptr =
subject_to_direct.PointerToData(&call_runtime_unchecked);
Node* const search_ptr =
search_to_direct.PointerToData(&call_runtime_unchecked);
BIND(&if_onebyte_external);
{
Node* const unpacked = TryDerefExternalString(
search_string, search_string_instance_type, &call_runtime_unchecked);
var_needle_byte.Bind(
ChangeInt32ToIntPtr(LoadOneByteChar(unpacked, int_zero)));
Goto(&continue_fast_path);
}
}
Node* const subject_offset = subject_to_direct.offset();
Node* const search_offset = search_to_direct.offset();
BIND(&continue_fast_path);
// Like String::IndexOf, the actual matching is done by the optimized
// SearchString method in string-search.h. Dispatch based on string instance
// types, then call straight into C++ for matching.
CSA_ASSERT(this, IntPtrGreaterThan(search_length, int_zero));
CSA_ASSERT(this, IntPtrGreaterThanOrEqual(start_position, int_zero));
CSA_ASSERT(this, IntPtrGreaterThanOrEqual(subject_length, start_position));
CSA_ASSERT(this,
IntPtrLessThanOrEqual(search_length,
IntPtrSub(subject_length, start_position)));
Label one_one(this), one_two(this), two_one(this), two_two(this);
DispatchOnStringEncodings(subject_to_direct.instance_type(),
search_to_direct.instance_type(), &one_one,
&one_two, &two_one, &two_two);
typedef const uint8_t onebyte_t;
typedef const uc16 twobyte_t;
BIND(&one_one);
{
Node* needle_byte = var_needle_byte.value();
Node* string_addr = var_string_addr.value();
Node* search_length = IntPtrSub(string_length, start_position);
// Call out to the highly optimized memchr to perform the actual byte
// search.
Node* memchr =
Node* const adjusted_subject_ptr = PointerToStringDataAtIndex(
subject_ptr, subject_offset, String::ONE_BYTE_ENCODING);
Node* const adjusted_search_ptr = PointerToStringDataAtIndex(
search_ptr, search_offset, String::ONE_BYTE_ENCODING);
Label direct_memchr_call(this), generic_fast_path(this);
Branch(IntPtrEqual(search_length, IntPtrConstant(1)), &direct_memchr_call,
&generic_fast_path);
// An additional fast path that calls directly into memchr for 1-length
// search strings.
BIND(&direct_memchr_call);
{
Node* const string_addr = IntPtrAdd(adjusted_subject_ptr, start_position);
Node* const search_length = IntPtrSub(subject_length, start_position);
Node* const search_byte =
ChangeInt32ToIntPtr(Load(MachineType::Uint8(), adjusted_search_ptr));
Node* const memchr =
ExternalConstant(ExternalReference::libc_memchr_function(isolate()));
Node* result_address =
Node* const result_address =
CallCFunction3(MachineType::Pointer(), MachineType::Pointer(),
MachineType::IntPtr(), MachineType::UintPtr(), memchr,
string_addr, needle_byte, search_length);
string_addr, search_byte, search_length);
GotoIf(WordEqual(result_address, int_zero), &return_minus_1);
Node* result_index =
Node* const result_index =
IntPtrAdd(IntPtrSub(result_address, string_addr), start_position);
f_return(SmiTag(result_index));
}
BIND(&generic_fast_path);
{
Node* const result = CallSearchStringRaw<onebyte_t, onebyte_t>(
adjusted_subject_ptr, subject_length, adjusted_search_ptr,
search_length, start_position);
f_return(SmiTag(result));
}
}
BIND(&one_two);
{
Node* const adjusted_subject_ptr = PointerToStringDataAtIndex(
subject_ptr, subject_offset, String::ONE_BYTE_ENCODING);
Node* const adjusted_search_ptr = PointerToStringDataAtIndex(
search_ptr, search_offset, String::TWO_BYTE_ENCODING);
Node* const result = CallSearchStringRaw<onebyte_t, twobyte_t>(
adjusted_subject_ptr, subject_length, adjusted_search_ptr,
search_length, start_position);
f_return(SmiTag(result));
}
BIND(&two_one);
{
Node* const adjusted_subject_ptr = PointerToStringDataAtIndex(
subject_ptr, subject_offset, String::TWO_BYTE_ENCODING);
Node* const adjusted_search_ptr = PointerToStringDataAtIndex(
search_ptr, search_offset, String::ONE_BYTE_ENCODING);
Node* const result = CallSearchStringRaw<twobyte_t, onebyte_t>(
adjusted_subject_ptr, subject_length, adjusted_search_ptr,
search_length, start_position);
f_return(SmiTag(result));
}
BIND(&two_two);
{
Node* const adjusted_subject_ptr = PointerToStringDataAtIndex(
subject_ptr, subject_offset, String::TWO_BYTE_ENCODING);
Node* const adjusted_search_ptr = PointerToStringDataAtIndex(
search_ptr, search_offset, String::TWO_BYTE_ENCODING);
Node* const result = CallSearchStringRaw<twobyte_t, twobyte_t>(
adjusted_subject_ptr, subject_length, adjusted_search_ptr,
search_length, start_position);
f_return(SmiTag(result));
}
BIND(&return_minus_1);
f_return(SmiConstant(-1));
BIND(&zero_length_needle);
{
Comment("0-length search_string");
f_return(SmiTag(IntPtrMin(string_length, start_position)));
f_return(SmiTag(IntPtrMin(subject_length, start_position)));
}
BIND(&call_runtime_unchecked);
@ -826,7 +899,7 @@ void StringBuiltinsAssembler::StringIndexOf(
// are already known due to type checks in this stub.
Comment("Call Runtime Unchecked");
Node* result = CallRuntime(Runtime::kStringIndexOfUnchecked, SmiConstant(0),
receiver, search_string, position);
subject_string, search_string, position);
f_return(result);
}
}

View File

@ -733,6 +733,16 @@ Node* CodeAssembler::CallCFunction3(MachineType return_type,
arg2_type, function, arg0, arg1, arg2);
}
Node* CodeAssembler::CallCFunction6(
MachineType return_type, MachineType arg0_type, MachineType arg1_type,
MachineType arg2_type, MachineType arg3_type, MachineType arg4_type,
MachineType arg5_type, Node* function, Node* arg0, Node* arg1, Node* arg2,
Node* arg3, Node* arg4, Node* arg5) {
return raw_assembler()->CallCFunction6(
return_type, arg0_type, arg1_type, arg2_type, arg3_type, arg4_type,
arg5_type, function, arg0, arg1, arg2, arg3, arg4, arg5);
}
void CodeAssembler::Goto(Label* label) {
label->MergeVariables();
raw_assembler()->Goto(label->label_);

View File

@ -421,6 +421,14 @@ class V8_EXPORT_PRIVATE CodeAssembler {
MachineType arg1_type, MachineType arg2_type,
Node* function, Node* arg0, Node* arg1, Node* arg2);
// Call to a C function with six arguments.
Node* CallCFunction6(MachineType return_type, MachineType arg0_type,
MachineType arg1_type, MachineType arg2_type,
MachineType arg3_type, MachineType arg4_type,
MachineType arg5_type, Node* function, Node* arg0,
Node* arg1, Node* arg2, Node* arg3, Node* arg4,
Node* arg5);
// Exception handling support.
void GotoIfException(Node* node, Label* if_exception,
Variable* exception_var = nullptr);

View File

@ -257,6 +257,26 @@ Node* RawMachineAssembler::CallCFunction3(MachineType return_type,
return AddNode(common()->Call(descriptor), function, arg0, arg1, arg2);
}
Node* RawMachineAssembler::CallCFunction6(
MachineType return_type, MachineType arg0_type, MachineType arg1_type,
MachineType arg2_type, MachineType arg3_type, MachineType arg4_type,
MachineType arg5_type, Node* function, Node* arg0, Node* arg1, Node* arg2,
Node* arg3, Node* arg4, Node* arg5) {
MachineSignature::Builder builder(zone(), 1, 6);
builder.AddReturn(return_type);
builder.AddParam(arg0_type);
builder.AddParam(arg1_type);
builder.AddParam(arg2_type);
builder.AddParam(arg3_type);
builder.AddParam(arg4_type);
builder.AddParam(arg5_type);
const CallDescriptor* descriptor =
Linkage::GetSimplifiedCDescriptor(zone(), builder.Build());
return AddNode(common()->Call(descriptor), function, arg0, arg1, arg2, arg3,
arg4, arg5);
}
Node* RawMachineAssembler::CallCFunction8(
MachineType return_type, MachineType arg0_type, MachineType arg1_type,
MachineType arg2_type, MachineType arg3_type, MachineType arg4_type,

View File

@ -773,6 +773,13 @@ class V8_EXPORT_PRIVATE RawMachineAssembler {
Node* CallCFunction3(MachineType return_type, MachineType arg0_type,
MachineType arg1_type, MachineType arg2_type,
Node* function, Node* arg0, Node* arg1, Node* arg2);
// Call to a C function with six arguments.
Node* CallCFunction6(MachineType return_type, MachineType arg0_type,
MachineType arg1_type, MachineType arg2_type,
MachineType arg3_type, MachineType arg4_type,
MachineType arg5_type, Node* function, Node* arg0,
Node* arg1, Node* arg2, Node* arg3, Node* arg4,
Node* arg5);
// Call to a C function with eight arguments.
Node* CallCFunction8(MachineType return_type, MachineType arg0_type,
MachineType arg1_type, MachineType arg2_type,

View File

@ -239,6 +239,19 @@ void ExternalReferenceTable::AddReferences(Isolate* isolate) {
"libc_memset");
Add(ExternalReference::try_internalize_string_function(isolate).address(),
"try_internalize_string_function");
Add(ExternalReference::search_string_raw<const uint8_t, const uint8_t>(
isolate)
.address(),
"search_string_raw<1-byte, 1-byte>");
Add(ExternalReference::search_string_raw<const uint8_t, const uc16>(isolate)
.address(),
"search_string_raw<1-byte, 2-byte>");
Add(ExternalReference::search_string_raw<const uc16, const uint8_t>(isolate)
.address(),
"search_string_raw<2-byte, 1-byte>");
Add(ExternalReference::search_string_raw<const uc16, const uc16>(isolate)
.address(),
"search_string_raw<1-byte, 2-byte>");
Add(ExternalReference::log_enter_external_function(isolate).address(),
"Logger::EnterExternal");
Add(ExternalReference::log_leave_external_function(isolate).address(),

View File

@ -563,6 +563,19 @@ int SearchString(Isolate* isolate,
return search.Search(subject, start_index);
}
// A wrapper function around SearchString that wraps raw pointers to the subject
// and pattern as vectors before calling SearchString. Used from the
// StringIndexOf builtin.
template <typename SubjectChar, typename PatternChar>
int SearchStringRaw(Isolate* isolate, const SubjectChar* subject_ptr,
int subject_length, const PatternChar* pattern_ptr,
int pattern_length, int start_index) {
DisallowHeapAllocation no_gc;
Vector<const SubjectChar> subject(subject_ptr, subject_length);
Vector<const PatternChar> pattern(pattern_ptr, pattern_length);
return SearchString(isolate, subject, pattern, start_index);
}
} // namespace internal
} // namespace v8