diff --git a/src/builtins/builtins-definitions.h b/src/builtins/builtins-definitions.h index 375469a70e..1d0f7c66e3 100644 --- a/src/builtins/builtins-definitions.h +++ b/src/builtins/builtins-definitions.h @@ -815,7 +815,9 @@ namespace internal { CPP(ReflectSetPrototypeOf) \ \ /* RegExp */ \ + TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \ TFS(RegExpPrototypeExecSlow, kReceiver, kString) \ + \ CPP(RegExpCapture1Getter) \ CPP(RegExpCapture2Getter) \ CPP(RegExpCapture3Getter) \ diff --git a/src/builtins/builtins-regexp-gen.cc b/src/builtins/builtins-regexp-gen.cc index 428c542290..cf462cf8fa 100644 --- a/src/builtins/builtins-regexp-gen.cc +++ b/src/builtins/builtins-regexp-gen.cc @@ -257,7 +257,7 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context, ToDirectStringAssembler to_direct(state(), string); VARIABLE(var_result, MachineRepresentation::kTagged); - Label out(this), runtime(this, Label::kDeferred); + Label out(this), atom(this), runtime(this, Label::kDeferred); // External constants. Node* const isolate_address = @@ -269,11 +269,20 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context, Node* const static_offsets_vector_address = ExternalConstant( ExternalReference::address_of_static_offsets_vector(isolate())); - // Ensure that a RegExp stack is allocated. + // At this point, last_index is definitely a canonicalized non-negative + // number, which implies that any non-Smi last_index is greater than + // the maximal string length. If lastIndex > string.length then the matcher + // must fail. + + Label if_failure(this); + Node* const smi_string_length = LoadStringLength(string); { - Node* const stack_size = - Load(MachineType::IntPtr(), regexp_stack_memory_size_address); - GotoIf(IntPtrEqual(stack_size, int_zero), &runtime); + CSA_ASSERT(this, IsNumberNormalized(last_index)); + CSA_ASSERT(this, IsNumberPositive(last_index)); + Node* const last_index_is_not_smi = TaggedIsNotSmi(last_index); + Node* const last_index_is_oob = + SmiGreaterThan(last_index, smi_string_length); + GotoIf(Word32Or(last_index_is_not_smi, last_index_is_oob), &if_failure); } Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset); @@ -282,10 +291,17 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context, CSA_ASSERT(this, TaggedIsNotSmi(data)); CSA_ASSERT(this, HasInstanceType(data, FIXED_ARRAY_TYPE)); - // Check the type of the RegExp. Only continue if type is - // JSRegExp::IRREGEXP. - Node* const tag = LoadFixedArrayElement(data, JSRegExp::kTagIndex); - GotoIfNot(SmiEqual(tag, SmiConstant(JSRegExp::IRREGEXP)), &runtime); + // Dispatch on the type of the RegExp. The common case (IRREGEXP) is first. + // NOT_COMPILED is last as it requires a slow runtime call anyway. + { + Label next(this); + Node* const tag = LoadFixedArrayElement(data, JSRegExp::kTagIndex); + GotoIf(SmiEqual(tag, SmiConstant(JSRegExp::IRREGEXP)), &next); + GotoIf(SmiEqual(tag, SmiConstant(JSRegExp::ATOM)), &atom); + CSA_ASSERT(this, SmiEqual(tag, SmiConstant(JSRegExp::NOT_COMPILED))); + Goto(&runtime); + BIND(&next); + } // Check (number_of_captures + 1) * 2 <= offsets vector size // Or number_of_captures <= offsets vector size / 2 - 1 @@ -300,23 +316,18 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context, &runtime); } + // Ensure that a RegExp stack is allocated. This check is after branching off + // for ATOM regexps to avoid unnecessary trips to runtime. + { + Node* const stack_size = + Load(MachineType::IntPtr(), regexp_stack_memory_size_address); + GotoIf(IntPtrEqual(stack_size, int_zero), &runtime); + } + // Unpack the string if possible. to_direct.TryToDirect(&runtime); - Node* const smi_string_length = LoadStringLength(string); - - // At this point, last_index is definitely a canonicalized non-negative - // number, which implies that any non-Smi last_index is greater than - // the maximal string length. If lastIndex > string.length then the matcher - // must fail. - - Label if_failure(this); - CSA_ASSERT(this, IsNumberNormalized(last_index)); - CSA_ASSERT(this, IsNumberPositive(last_index)); - GotoIfNot(TaggedIsSmi(last_index), &if_failure); // Outside Smi range. - GotoIf(SmiGreaterThan(last_index, smi_string_length), &if_failure); - // Load the irregexp code object and offsets into the subject string. Both // depend on whether the string is one- or two-byte. @@ -530,6 +541,14 @@ Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context, Goto(&out); } + BIND(&atom); + { + Node* const result = CallBuiltin(Builtins::kRegExpExecAtom, context, regexp, + string, last_index, match_info); + var_result.Bind(result); + Goto(&out); + } + BIND(&out); return var_result.value(); #endif // V8_INTERPRETED_REGEXP @@ -851,6 +870,70 @@ TF_BUILTIN(RegExpPrototypeExecSlow, RegExpBuiltinsAssembler) { Return(RegExpPrototypeExecBody(context, regexp, string, false)); } +// Fast path stub for ATOM regexps. String matching is done by StringIndexOf, +// and {match_info} is updated on success. +// The slow path is implemented in RegExpImpl::AtomExec. +TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) { + Node* const regexp = Parameter(Descriptor::kRegExp); + Node* const subject_string = Parameter(Descriptor::kString); + Node* const last_index = Parameter(Descriptor::kLastIndex); + Node* const match_info = Parameter(Descriptor::kMatchInfo); + Node* const context = Parameter(Descriptor::kContext); + + CSA_ASSERT(this, IsJSRegExp(regexp)); + CSA_ASSERT(this, IsString(subject_string)); + CSA_ASSERT(this, TaggedIsPositiveSmi(last_index)); + CSA_ASSERT(this, IsFixedArray(match_info)); + + Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset); + CSA_ASSERT(this, IsFixedArray(data)); + CSA_ASSERT(this, SmiEqual(LoadFixedArrayElement(data, JSRegExp::kTagIndex), + SmiConstant(JSRegExp::ATOM))); + + // Callers ensure that last_index is in-bounds. + CSA_ASSERT(this, + SmiLessThanOrEqual(last_index, LoadStringLength(subject_string))); + + Node* const needle_string = + LoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex); + CSA_ASSERT(this, IsString(needle_string)); + + Node* const match_from = + CallBuiltin(Builtins::kStringIndexOf, context, subject_string, + needle_string, last_index); + CSA_ASSERT(this, TaggedIsSmi(match_from)); + + Label if_failure(this), if_success(this); + Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success); + + BIND(&if_success); + { + CSA_ASSERT(this, TaggedIsPositiveSmi(match_from)); + CSA_ASSERT(this, SmiLessThan(match_from, LoadStringLength(subject_string))); + + const int kNumRegisters = 2; + STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters); + + Node* const match_to = SmiAdd(match_from, LoadStringLength(needle_string)); + + StoreFixedArrayElement(match_info, RegExpMatchInfo::kNumberOfCapturesIndex, + SmiConstant(kNumRegisters), SKIP_WRITE_BARRIER); + StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex, + subject_string); + StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex, + subject_string); + StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex, + match_from, SKIP_WRITE_BARRIER); + StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex + 1, + match_to, SKIP_WRITE_BARRIER); + + Return(match_info); + } + + BIND(&if_failure); + Return(NullConstant()); +} + // ES#sec-regexp.prototype.exec // RegExp.prototype.exec ( string ) TF_BUILTIN(RegExpPrototypeExec, RegExpBuiltinsAssembler) {