From 27fd52abad7d6959f71c8510c4af9f7b4a6d6431 Mon Sep 17 00:00:00 2001 From: jgruber Date: Wed, 23 Aug 2017 10:00:32 +0200 Subject: [PATCH] [regexp] Send sticky @@splits to the slow path Due to shortcuts we take on the RegExp.p[@@split] fast path (we don't allocate a new instance), we need to send sticky regexps to the slow path. The problem is a slight impedance mismatch between the spec and our fast-path implementation. Spec: Creates a new regexp instance `splitter` that is guaranteed to be sticky, uses `splitter.lastIndex` to advance the search range, advances by itself using AdvanceStringIndex if `splitter` did not match at the current position. Our fast path: Uses the given regexp instance and does not modify stickyness, uses last_match_info to advance search range, returns (and assumes no more matches) once RegExpExecInternal fails to match. This is fine if the given regexp is non-sticky, since 1. the value of lastIndex is ignored, and 2. non-sticky regexps match if a match is found anywhere in the string, not just exactly at the current lastIndex. Sticky regexps though are a problem. If no match is found exactly at the current position, @@split assumes no more matches and exits. In a follow-up, we could explore other options, such as allocating a new instance or saving/restoring flags and lastIndex. Bug: v8:6706 Change-Id: I6da2266df72b2f80f00c1ce3cd7c8655de91f680 Reviewed-on: https://chromium-review.googlesource.com/626065 Reviewed-by: Yang Guo Commit-Queue: Jakob Gruber Cr-Commit-Position: refs/heads/master@{#47543} --- src/builtins/builtins-regexp-gen.cc | 78 ++++++++++++++----------- test/mjsunit/regress/regress-v8-6706.js | 37 ++++++++++++ 2 files changed, 81 insertions(+), 34 deletions(-) create mode 100644 test/mjsunit/regress/regress-v8-6706.js diff --git a/src/builtins/builtins-regexp-gen.cc b/src/builtins/builtins-regexp-gen.cc index 803c341592..bc5480c935 100644 --- a/src/builtins/builtins-regexp-gen.cc +++ b/src/builtins/builtins-regexp-gen.cc @@ -1486,12 +1486,9 @@ TF_BUILTIN(RegExpPrototypeSourceGetter, RegExpBuiltinsAssembler) { // Fast-path implementation for flag checks on an unmodified JSRegExp instance. Node* RegExpBuiltinsAssembler::FastFlagGetter(Node* const regexp, JSRegExp::Flag flag) { - Node* const smi_zero = SmiConstant(0); Node* const flags = LoadObjectField(regexp, JSRegExp::kFlagsOffset); Node* const mask = SmiConstant(flag); - Node* const is_flag_set = WordNotEqual(SmiAnd(flags, mask), smi_zero); - - return is_flag_set; + return SmiToWord32(SmiAnd(flags, mask)); } // Load through the GetProperty stub. @@ -2287,13 +2284,14 @@ TF_BUILTIN(RegExpPrototypeSearch, RegExpBuiltinsAssembler) { RegExpPrototypeSearchBodySlow(context, receiver, string); } -// Generates the fast path for @@split. {regexp} is an unmodified JSRegExp, -// {string} is a String, and {limit} is a Smi. +// Generates the fast path for @@split. {regexp} is an unmodified, non-sticky +// JSRegExp, {string} is a String, and {limit} is a Smi. void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context, Node* const regexp, Node* const string, Node* const limit) { CSA_ASSERT(this, IsFastRegExp(context, regexp)); + CSA_ASSERT(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky))); CSA_ASSERT(this, TaggedIsSmi(limit)); CSA_ASSERT(this, IsString(string)); @@ -2551,49 +2549,61 @@ TF_BUILTIN(RegExpSplit, RegExpBuiltinsAssembler) { // been changed. // Convert {maybe_limit} to a uint32, capping at the maximal smi value. + VARIABLE(var_limit, MachineRepresentation::kTagged, maybe_limit); - Label if_limitissmimax(this), limit_done(this), runtime(this); + Label if_limitissmimax(this), runtime(this, Label::kDeferred); - GotoIf(IsUndefined(maybe_limit), &if_limitissmimax); - GotoIf(TaggedIsPositiveSmi(maybe_limit), &limit_done); - - Node* const limit = ToUint32(context, maybe_limit); { - // ToUint32(limit) could potentially change the shape of the RegExp - // object. Recheck that we are still on the fast path and bail to runtime - // otherwise. + Label next(this); + + GotoIf(IsUndefined(maybe_limit), &if_limitissmimax); + GotoIf(TaggedIsPositiveSmi(maybe_limit), &next); + + Node* const limit = ToUint32(context, maybe_limit); { - Label next(this); - BranchIfFastRegExp(context, regexp, &next, &runtime); - BIND(&next); + // ToUint32(limit) could potentially change the shape of the RegExp + // object. Recheck that we are still on the fast path and bail to runtime + // otherwise. + { + Label next(this); + BranchIfFastRegExp(context, regexp, &next, &runtime); + BIND(&next); + } + + GotoIfNot(TaggedIsPositiveSmi(limit), &if_limitissmimax); + + var_limit.Bind(limit); + Goto(&next); } - GotoIfNot(TaggedIsSmi(limit), &if_limitissmimax); + BIND(&if_limitissmimax); + { + // TODO(jgruber): In this case, we can probably avoid generation of limit + // checks in Generate_RegExpPrototypeSplitBody. + var_limit.Bind(SmiConstant(Smi::kMaxValue)); + Goto(&next); + } - var_limit.Bind(limit); - Goto(&limit_done); + BIND(&next); } - BIND(&if_limitissmimax); - { - // TODO(jgruber): In this case, we can probably avoid generation of limit - // checks in Generate_RegExpPrototypeSplitBody. - var_limit.Bind(SmiConstant(Smi::kMaxValue)); - Goto(&limit_done); - } + // Due to specific shortcuts we take on the fast path (specifically, we don't + // allocate a new regexp instance as specced), we need to ensure that the + // given regexp is non-sticky to avoid invalid results. See crbug.com/v8/6706. - BIND(&limit_done); - { - Node* const limit = var_limit.value(); - RegExpPrototypeSplitBody(context, regexp, string, limit); - } + GotoIf(FastFlagGetter(regexp, JSRegExp::kSticky), &runtime); + + // We're good to go on the fast path, which is inlined here. + + RegExpPrototypeSplitBody(context, regexp, string, var_limit.value()); BIND(&runtime); { // The runtime call passes in limit to ensure the second ToUint32(limit) // call is not observable. - CSA_ASSERT(this, IsNumber(limit)); - Return(CallRuntime(Runtime::kRegExpSplit, context, regexp, string, limit)); + CSA_ASSERT(this, IsNumber(var_limit.value())); + Return(CallRuntime(Runtime::kRegExpSplit, context, regexp, string, + var_limit.value())); } } diff --git a/test/mjsunit/regress/regress-v8-6706.js b/test/mjsunit/regress/regress-v8-6706.js new file mode 100644 index 0000000000..36ce229fd8 --- /dev/null +++ b/test/mjsunit/regress/regress-v8-6706.js @@ -0,0 +1,37 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +const str = "a-b-c"; + +function test(re) { + assertArrayEquals(["a", "b", "c"], re[Symbol.split](str)); +} + +!function() { + const re = /-/y; + re.lastIndex = 1; + test(re); +}(); + +!function() { + const re = /-/y; + re.lastIndex = 3; + test(re); +}(); + +!function() { + const re = /-/y; + re.lastIndex = -1; + test(re); +}(); + +!function() { + const re = /-/y; + test(re); +}(); + +!function() { + const re = /-/g; + test(re); +}();