[regexp] Add a RegExp.p.exec fast path for ATOM regexps

Until now, ATOM regexps (i.e. simple patterns don't require regexp matching
logic but can use generic string matching algorithms instead) have always gone
through the slow runtime.

This CL implements a fast path in CSA which simply calls StringIndexOf
internally and then sets up the last-match-info as required.

Local microbenchmarks show a 30% improvement for RE.p.exec on ATOM regexps,
and a 5% improvement on Octane/RegExp.

Bug: v8:6462
Change-Id: I35b4c5caf416fa35fe388dd58e34dea55b098d09
Reviewed-on: https://chromium-review.googlesource.com/535455
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Camillo Bruni <cbruni@chromium.org>
Cr-Commit-Position: refs/heads/master@{#45947}
This commit is contained in:
jgruber 2017-06-14 14:58:38 +02:00 committed by Commit Bot
parent 8ada753888
commit b1ee17e588
2 changed files with 107 additions and 22 deletions

View File

@ -815,7 +815,9 @@ namespace internal {
CPP(ReflectSetPrototypeOf) \
\
/* RegExp */ \
TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \
TFS(RegExpPrototypeExecSlow, kReceiver, kString) \
\
CPP(RegExpCapture1Getter) \
CPP(RegExpCapture2Getter) \
CPP(RegExpCapture3Getter) \

View File

@ -257,7 +257,7 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
ToDirectStringAssembler to_direct(state(), string);
VARIABLE(var_result, MachineRepresentation::kTagged);
Label out(this), runtime(this, Label::kDeferred);
Label out(this), atom(this), runtime(this, Label::kDeferred);
// External constants.
Node* const isolate_address =
@ -269,11 +269,20 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
Node* const static_offsets_vector_address = ExternalConstant(
ExternalReference::address_of_static_offsets_vector(isolate()));
// Ensure that a RegExp stack is allocated.
// At this point, last_index is definitely a canonicalized non-negative
// number, which implies that any non-Smi last_index is greater than
// the maximal string length. If lastIndex > string.length then the matcher
// must fail.
Label if_failure(this);
Node* const smi_string_length = LoadStringLength(string);
{
Node* const stack_size =
Load(MachineType::IntPtr(), regexp_stack_memory_size_address);
GotoIf(IntPtrEqual(stack_size, int_zero), &runtime);
CSA_ASSERT(this, IsNumberNormalized(last_index));
CSA_ASSERT(this, IsNumberPositive(last_index));
Node* const last_index_is_not_smi = TaggedIsNotSmi(last_index);
Node* const last_index_is_oob =
SmiGreaterThan(last_index, smi_string_length);
GotoIf(Word32Or(last_index_is_not_smi, last_index_is_oob), &if_failure);
}
Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
@ -282,10 +291,17 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
CSA_ASSERT(this, TaggedIsNotSmi(data));
CSA_ASSERT(this, HasInstanceType(data, FIXED_ARRAY_TYPE));
// Check the type of the RegExp. Only continue if type is
// JSRegExp::IRREGEXP.
Node* const tag = LoadFixedArrayElement(data, JSRegExp::kTagIndex);
GotoIfNot(SmiEqual(tag, SmiConstant(JSRegExp::IRREGEXP)), &runtime);
// Dispatch on the type of the RegExp. The common case (IRREGEXP) is first.
// NOT_COMPILED is last as it requires a slow runtime call anyway.
{
Label next(this);
Node* const tag = LoadFixedArrayElement(data, JSRegExp::kTagIndex);
GotoIf(SmiEqual(tag, SmiConstant(JSRegExp::IRREGEXP)), &next);
GotoIf(SmiEqual(tag, SmiConstant(JSRegExp::ATOM)), &atom);
CSA_ASSERT(this, SmiEqual(tag, SmiConstant(JSRegExp::NOT_COMPILED)));
Goto(&runtime);
BIND(&next);
}
// Check (number_of_captures + 1) * 2 <= offsets vector size
// Or number_of_captures <= offsets vector size / 2 - 1
@ -300,23 +316,18 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
&runtime);
}
// Ensure that a RegExp stack is allocated. This check is after branching off
// for ATOM regexps to avoid unnecessary trips to runtime.
{
Node* const stack_size =
Load(MachineType::IntPtr(), regexp_stack_memory_size_address);
GotoIf(IntPtrEqual(stack_size, int_zero), &runtime);
}
// Unpack the string if possible.
to_direct.TryToDirect(&runtime);
Node* const smi_string_length = LoadStringLength(string);
// At this point, last_index is definitely a canonicalized non-negative
// number, which implies that any non-Smi last_index is greater than
// the maximal string length. If lastIndex > string.length then the matcher
// must fail.
Label if_failure(this);
CSA_ASSERT(this, IsNumberNormalized(last_index));
CSA_ASSERT(this, IsNumberPositive(last_index));
GotoIfNot(TaggedIsSmi(last_index), &if_failure); // Outside Smi range.
GotoIf(SmiGreaterThan(last_index, smi_string_length), &if_failure);
// Load the irregexp code object and offsets into the subject string. Both
// depend on whether the string is one- or two-byte.
@ -530,6 +541,14 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
Goto(&out);
}
BIND(&atom);
{
Node* const result = CallBuiltin(Builtins::kRegExpExecAtom, context, regexp,
string, last_index, match_info);
var_result.Bind(result);
Goto(&out);
}
BIND(&out);
return var_result.value();
#endif // V8_INTERPRETED_REGEXP
@ -851,6 +870,70 @@ TF_BUILTIN(RegExpPrototypeExecSlow, RegExpBuiltinsAssembler) {
Return(RegExpPrototypeExecBody(context, regexp, string, false));
}
// Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
// and {match_info} is updated on success.
// The slow path is implemented in RegExpImpl::AtomExec.
TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
Node* const regexp = Parameter(Descriptor::kRegExp);
Node* const subject_string = Parameter(Descriptor::kString);
Node* const last_index = Parameter(Descriptor::kLastIndex);
Node* const match_info = Parameter(Descriptor::kMatchInfo);
Node* const context = Parameter(Descriptor::kContext);
CSA_ASSERT(this, IsJSRegExp(regexp));
CSA_ASSERT(this, IsString(subject_string));
CSA_ASSERT(this, TaggedIsPositiveSmi(last_index));
CSA_ASSERT(this, IsFixedArray(match_info));
Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
CSA_ASSERT(this, IsFixedArray(data));
CSA_ASSERT(this, SmiEqual(LoadFixedArrayElement(data, JSRegExp::kTagIndex),
SmiConstant(JSRegExp::ATOM)));
// Callers ensure that last_index is in-bounds.
CSA_ASSERT(this,
SmiLessThanOrEqual(last_index, LoadStringLength(subject_string)));
Node* const needle_string =
LoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex);
CSA_ASSERT(this, IsString(needle_string));
Node* const match_from =
CallBuiltin(Builtins::kStringIndexOf, context, subject_string,
needle_string, last_index);
CSA_ASSERT(this, TaggedIsSmi(match_from));
Label if_failure(this), if_success(this);
Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);
BIND(&if_success);
{
CSA_ASSERT(this, TaggedIsPositiveSmi(match_from));
CSA_ASSERT(this, SmiLessThan(match_from, LoadStringLength(subject_string)));
const int kNumRegisters = 2;
STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);
Node* const match_to = SmiAdd(match_from, LoadStringLength(needle_string));
StoreFixedArrayElement(match_info, RegExpMatchInfo::kNumberOfCapturesIndex,
SmiConstant(kNumRegisters), SKIP_WRITE_BARRIER);
StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
subject_string);
StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
subject_string);
StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex,
match_from, SKIP_WRITE_BARRIER);
StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex + 1,
match_to, SKIP_WRITE_BARRIER);
Return(match_info);
}
BIND(&if_failure);
Return(NullConstant());
}
// ES#sec-regexp.prototype.exec
// RegExp.prototype.exec ( string )
TF_BUILTIN(RegExpPrototypeExec, RegExpBuiltinsAssembler) {