[regexp] Send sticky @@splits to the slow path

Due to shortcuts we take on the RegExp.p[@@split] fast path (we don't allocate
a new instance), we need to send sticky regexps to the slow path.

The problem is a slight impedance mismatch between the spec and our fast-path
implementation.

Spec: Creates a new regexp instance `splitter` that is guaranteed to be sticky,
uses `splitter.lastIndex` to advance the search range, advances by itself using
AdvanceStringIndex if `splitter` did not match at the current position.

Our fast path: Uses the given regexp instance and does not modify stickyness,
uses last_match_info to advance search range, returns (and assumes no more
matches) once RegExpExecInternal fails to match.

This is fine if the given regexp is non-sticky, since 1. the value of lastIndex
is ignored, and 2. non-sticky regexps match if a match is found anywhere in the
string, not just exactly at the current lastIndex.

Sticky regexps though are a problem. If no match is found exactly at the current
position, @@split assumes no more matches and exits.

In a follow-up, we could explore other options, such as allocating a new
instance or saving/restoring flags and lastIndex.

Bug: v8:6706
Change-Id: I6da2266df72b2f80f00c1ce3cd7c8655de91f680
Reviewed-on: https://chromium-review.googlesource.com/626065
Reviewed-by: Yang Guo <yangguo@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47543}
This commit is contained in:
jgruber 2017-08-23 10:00:32 +02:00 committed by Commit Bot
parent baa7bc0fbc
commit 27fd52abad
2 changed files with 81 additions and 34 deletions

View File

@ -1486,12 +1486,9 @@ TF_BUILTIN(RegExpPrototypeSourceGetter, RegExpBuiltinsAssembler) {
// Fast-path implementation for flag checks on an unmodified JSRegExp instance.
Node* RegExpBuiltinsAssembler::FastFlagGetter(Node* const regexp,
JSRegExp::Flag flag) {
Node* const smi_zero = SmiConstant(0);
Node* const flags = LoadObjectField(regexp, JSRegExp::kFlagsOffset);
Node* const mask = SmiConstant(flag);
Node* const is_flag_set = WordNotEqual(SmiAnd(flags, mask), smi_zero);
return is_flag_set;
return SmiToWord32(SmiAnd(flags, mask));
}
// Load through the GetProperty stub.
@ -2287,13 +2284,14 @@ TF_BUILTIN(RegExpPrototypeSearch, RegExpBuiltinsAssembler) {
RegExpPrototypeSearchBodySlow(context, receiver, string);
}
// Generates the fast path for @@split. {regexp} is an unmodified JSRegExp,
// {string} is a String, and {limit} is a Smi.
// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
// JSRegExp, {string} is a String, and {limit} is a Smi.
void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,
Node* const regexp,
Node* const string,
Node* const limit) {
CSA_ASSERT(this, IsFastRegExp(context, regexp));
CSA_ASSERT(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
CSA_ASSERT(this, TaggedIsSmi(limit));
CSA_ASSERT(this, IsString(string));
@ -2551,49 +2549,61 @@ TF_BUILTIN(RegExpSplit, RegExpBuiltinsAssembler) {
// been changed.
// Convert {maybe_limit} to a uint32, capping at the maximal smi value.
VARIABLE(var_limit, MachineRepresentation::kTagged, maybe_limit);
Label if_limitissmimax(this), limit_done(this), runtime(this);
Label if_limitissmimax(this), runtime(this, Label::kDeferred);
GotoIf(IsUndefined(maybe_limit), &if_limitissmimax);
GotoIf(TaggedIsPositiveSmi(maybe_limit), &limit_done);
Node* const limit = ToUint32(context, maybe_limit);
{
// ToUint32(limit) could potentially change the shape of the RegExp
// object. Recheck that we are still on the fast path and bail to runtime
// otherwise.
Label next(this);
GotoIf(IsUndefined(maybe_limit), &if_limitissmimax);
GotoIf(TaggedIsPositiveSmi(maybe_limit), &next);
Node* const limit = ToUint32(context, maybe_limit);
{
Label next(this);
BranchIfFastRegExp(context, regexp, &next, &runtime);
BIND(&next);
// ToUint32(limit) could potentially change the shape of the RegExp
// object. Recheck that we are still on the fast path and bail to runtime
// otherwise.
{
Label next(this);
BranchIfFastRegExp(context, regexp, &next, &runtime);
BIND(&next);
}
GotoIfNot(TaggedIsPositiveSmi(limit), &if_limitissmimax);
var_limit.Bind(limit);
Goto(&next);
}
GotoIfNot(TaggedIsSmi(limit), &if_limitissmimax);
BIND(&if_limitissmimax);
{
// TODO(jgruber): In this case, we can probably avoid generation of limit
// checks in Generate_RegExpPrototypeSplitBody.
var_limit.Bind(SmiConstant(Smi::kMaxValue));
Goto(&next);
}
var_limit.Bind(limit);
Goto(&limit_done);
BIND(&next);
}
BIND(&if_limitissmimax);
{
// TODO(jgruber): In this case, we can probably avoid generation of limit
// checks in Generate_RegExpPrototypeSplitBody.
var_limit.Bind(SmiConstant(Smi::kMaxValue));
Goto(&limit_done);
}
// Due to specific shortcuts we take on the fast path (specifically, we don't
// allocate a new regexp instance as specced), we need to ensure that the
// given regexp is non-sticky to avoid invalid results. See crbug.com/v8/6706.
BIND(&limit_done);
{
Node* const limit = var_limit.value();
RegExpPrototypeSplitBody(context, regexp, string, limit);
}
GotoIf(FastFlagGetter(regexp, JSRegExp::kSticky), &runtime);
// We're good to go on the fast path, which is inlined here.
RegExpPrototypeSplitBody(context, regexp, string, var_limit.value());
BIND(&runtime);
{
// The runtime call passes in limit to ensure the second ToUint32(limit)
// call is not observable.
CSA_ASSERT(this, IsNumber(limit));
Return(CallRuntime(Runtime::kRegExpSplit, context, regexp, string, limit));
CSA_ASSERT(this, IsNumber(var_limit.value()));
Return(CallRuntime(Runtime::kRegExpSplit, context, regexp, string,
var_limit.value()));
}
}

View File

@ -0,0 +1,37 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
const str = "a-b-c";
function test(re) {
assertArrayEquals(["a", "b", "c"], re[Symbol.split](str));
}
!function() {
const re = /-/y;
re.lastIndex = 1;
test(re);
}();
!function() {
const re = /-/y;
re.lastIndex = 3;
test(re);
}();
!function() {
const re = /-/y;
re.lastIndex = -1;
test(re);
}();
!function() {
const re = /-/y;
test(re);
}();
!function() {
const re = /-/g;
test(re);
}();