[regexp] Migrate @@split to TurboFan

This shows around a 2.2x speedup compared to the old JS implementation (and
3.5x compared to CPP) for the fast path.

Adds ToUint32 to CodeStubAssembler.

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2532403002
Cr-Commit-Position: refs/heads/master@{#41408}
This commit is contained in:
jgruber 2016-12-01 00:24:57 -08:00 committed by Commit bot
parent 6b9c49cac1
commit 2c3fe6d961
8 changed files with 614 additions and 312 deletions

View File

@ -1878,7 +1878,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
{ {
Handle<JSFunction> fun = SimpleCreateFunction( Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.split]"), isolate, factory->InternalizeUtf8String("[Symbol.split]"),
Builtins::kRegExpPrototypeSplit, 2, false); Builtins::kRegExpPrototypeSplit, 2, true);
InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM); InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM);
} }

View File

@ -16,6 +16,7 @@ namespace internal {
typedef compiler::Node Node; typedef compiler::Node Node;
typedef CodeStubAssembler::Label CLabel; typedef CodeStubAssembler::Label CLabel;
typedef CodeStubAssembler::Variable CVariable; typedef CodeStubAssembler::Variable CVariable;
typedef CodeStubAssembler::ParameterMode ParameterMode;
typedef compiler::CodeAssemblerState CodeAssemblerState; typedef compiler::CodeAssemblerState CodeAssemblerState;
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
@ -259,7 +260,7 @@ Node* ConstructNewResultFromMatchInfo(Isolate* isolate, CodeStubAssembler* a,
Node* string) { Node* string) {
CLabel out(a); CLabel out(a);
CodeStubAssembler::ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS; ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
Node* const num_indices = a->SmiUntag(a->LoadFixedArrayElement( Node* const num_indices = a->SmiUntag(a->LoadFixedArrayElement(
match_info, a->IntPtrConstant(RegExpMatchInfo::kNumberOfCapturesIndex), 0, match_info, a->IntPtrConstant(RegExpMatchInfo::kNumberOfCapturesIndex), 0,
mode)); mode));
@ -1180,8 +1181,7 @@ class GrowableFixedArray {
CodeStubAssembler* a = assembler_; CodeStubAssembler* a = assembler_;
const WriteBarrierMode barrier_mode = UPDATE_WRITE_BARRIER; const WriteBarrierMode barrier_mode = UPDATE_WRITE_BARRIER;
const CodeStubAssembler::ParameterMode mode = const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const length = var_length_.value(); Node* const length = var_length_.value();
Node* const capacity = var_capacity_.value(); Node* const capacity = var_capacity_.value();
@ -1233,8 +1233,7 @@ class GrowableFixedArray {
CodeStubAssembler* a = assembler_; CodeStubAssembler* a = assembler_;
const ElementsKind kind = FAST_ELEMENTS; const ElementsKind kind = FAST_ELEMENTS;
const CodeStubAssembler::ParameterMode mode = const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
static const int kInitialArraySize = 8; static const int kInitialArraySize = 8;
Node* const capacity = a->IntPtrConstant(kInitialArraySize); Node* const capacity = a->IntPtrConstant(kInitialArraySize);
@ -1262,7 +1261,7 @@ class GrowableFixedArray {
} }
Node* GrowFixedArray(Node* const current_capacity, Node* const new_capacity, Node* GrowFixedArray(Node* const current_capacity, Node* const new_capacity,
CodeStubAssembler::ParameterMode mode) { ParameterMode mode) {
DCHECK(mode == CodeStubAssembler::INTPTR_PARAMETERS); DCHECK(mode == CodeStubAssembler::INTPTR_PARAMETERS);
CodeStubAssembler* a = assembler_; CodeStubAssembler* a = assembler_;
@ -1360,8 +1359,7 @@ void Generate_RegExpPrototypeMatchBody(CodeStubAssembler* a,
// TODO(jgruber): We could optimize further here and in other // TODO(jgruber): We could optimize further here and in other
// methods (e.g. @@search) by bypassing RegExp result construction. // methods (e.g. @@search) by bypassing RegExp result construction.
Node* const result_fixed_array = a->LoadElements(result); Node* const result_fixed_array = a->LoadElements(result);
const CodeStubAssembler::ParameterMode mode = const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
match = match =
a->LoadFixedArrayElement(result_fixed_array, int_zero, 0, mode); a->LoadFixedArrayElement(result_fixed_array, int_zero, 0, mode);
@ -1380,8 +1378,7 @@ void Generate_RegExpPrototypeMatchBody(CodeStubAssembler* a,
// TODO(jgruber): We could optimize further here and in other // TODO(jgruber): We could optimize further here and in other
// methods (e.g. @@search) by bypassing RegExp result construction. // methods (e.g. @@search) by bypassing RegExp result construction.
Node* const result_fixed_array = a->LoadElements(result); Node* const result_fixed_array = a->LoadElements(result);
const CodeStubAssembler::ParameterMode mode = const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const match = Node* const match =
a->LoadFixedArrayElement(result_fixed_array, int_zero, 0, mode); a->LoadFixedArrayElement(result_fixed_array, int_zero, 0, mode);
@ -1598,346 +1595,322 @@ void Builtins::Generate_RegExpPrototypeSearch(CodeAssemblerState* state) {
namespace { namespace {
MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate, // Generates the fast path for @@split. {regexp} is an unmodified JSRegExp,
Handle<Object> object, // {string} is a String, and {limit} is a Smi.
uint32_t* out) { void Generate_RegExpPrototypeSplitBody(CodeStubAssembler* a, Node* const regexp,
if (object->IsUndefined(isolate)) { Node* const string, Node* const limit,
*out = kMaxUInt32; Node* const context) {
return object; Isolate* isolate = a->isolate();
Node* const null = a->NullConstant();
Node* const smi_zero = a->SmiConstant(0);
Node* const int_zero = a->IntPtrConstant(0);
Node* const int_limit = a->SmiUntag(limit);
const ElementsKind kind = FAST_ELEMENTS;
const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
Node* const allocation_site = nullptr;
Node* const native_context = a->LoadNativeContext(context);
Node* const array_map = a->LoadJSArrayElementsMap(kind, native_context);
CLabel return_empty_array(a, CLabel::kDeferred);
// If limit is zero, return an empty array.
{
CLabel next(a), if_limitiszero(a, CLabel::kDeferred);
a->Branch(a->SmiEqual(limit, smi_zero), &return_empty_array, &next);
a->Bind(&next);
} }
Handle<Object> number; Node* const string_length = a->LoadStringLength(string);
ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
*out = NumberToUint32(*number);
return object;
}
bool AtSurrogatePair(Isolate* isolate, Handle<String> string, int index) { // If passed the empty {string}, return either an empty array or a singleton
if (index + 1 >= string->length()) return false; // array depending on whether the {regexp} matches.
const uint16_t first = string->Get(index); {
if (first < 0xD800 || first > 0xDBFF) return false; CLabel next(a), if_stringisempty(a, CLabel::kDeferred);
const uint16_t second = string->Get(index + 1); a->Branch(a->SmiEqual(string_length, smi_zero), &if_stringisempty, &next);
return (second >= 0xDC00 && second <= 0xDFFF);
}
Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
Handle<FixedArray> elems,
int num_elems) {
elems->Shrink(num_elems);
return isolate->factory()->NewJSArrayWithElements(elems);
}
MaybeHandle<JSArray> RegExpSplit(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> string,
Handle<Object> limit_obj) {
Factory* factory = isolate->factory();
uint32_t limit;
RETURN_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit), JSArray);
const int length = string->length();
if (limit == 0) return factory->NewJSArray(0);
Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
if (length == 0) {
Handle<Object> match_indices;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices,
RegExpImpl::Exec(regexp, string, 0, last_match_info), JSArray);
if (!match_indices->IsNull(isolate)) return factory->NewJSArray(0);
Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
elems->set(0, *string);
return factory->NewJSArrayWithElements(elems);
}
int current_index = 0;
int start_index = 0;
int start_match = 0;
static const int kInitialArraySize = 8;
Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
int num_elems = 0;
while (true) {
if (start_index == length) {
Handle<String> substr =
factory->NewSubString(string, current_index, length);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
break;
}
Handle<Object> match_indices_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(regexp, string, start_index,
isolate->regexp_last_match_info()),
JSArray);
if (match_indices_obj->IsNull(isolate)) {
Handle<String> substr =
factory->NewSubString(string, current_index, length);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
break;
}
auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
start_match = match_indices->Capture(0);
if (start_match == length) {
Handle<String> substr =
factory->NewSubString(string, current_index, length);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
break;
}
const int end_index = match_indices->Capture(1);
if (start_index == end_index && end_index == current_index) {
const bool unicode = (regexp->GetFlags() & JSRegExp::kUnicode) != 0;
if (unicode && AtSurrogatePair(isolate, string, start_index)) {
start_index += 2;
} else {
start_index += 1;
}
continue;
}
a->Bind(&if_stringisempty);
{ {
Handle<String> substr = Node* const last_match_info = a->LoadContextElement(
factory->NewSubString(string, current_index, start_match); native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
}
if (static_cast<uint32_t>(num_elems) == limit) break; Callable exec_callable = CodeFactory::RegExpExec(isolate);
Node* const match_indices = a->CallStub(
exec_callable, context, regexp, string, smi_zero, last_match_info);
for (int i = 2; i < match_indices->NumberOfCaptureRegisters(); i += 2) { CLabel return_singleton_array(a);
const int start = match_indices->Capture(i); a->Branch(a->WordEqual(match_indices, null), &return_singleton_array,
const int end = match_indices->Capture(i + 1); &return_empty_array);
if (end != -1) { a->Bind(&return_singleton_array);
Handle<String> substr = factory->NewSubString(string, start, end); {
elems = FixedArray::SetAndGrow(elems, num_elems++, substr); Node* const length = a->SmiConstant(1);
} else { Node* const capacity = a->IntPtrConstant(1);
elems = FixedArray::SetAndGrow(elems, num_elems++, Node* const result = a->AllocateJSArray(kind, array_map, capacity,
factory->undefined_value()); length, allocation_site, mode);
}
if (static_cast<uint32_t>(num_elems) == limit) { Node* const fixed_array = a->LoadElements(result);
return NewJSArrayWithElements(isolate, elems, num_elems); a->StoreFixedArrayElement(fixed_array, 0, string);
a->Return(result);
} }
} }
start_index = current_index = end_index; a->Bind(&next);
} }
return NewJSArrayWithElements(isolate, elems, num_elems); // Loop preparations.
}
// ES##sec-speciesconstructor GrowableFixedArray array(a);
// SpeciesConstructor ( O, defaultConstructor )
MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor(
Isolate* isolate, Handle<JSReceiver> recv,
Handle<JSFunction> default_ctor) {
Handle<Object> ctor_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, ctor_obj,
JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
Object);
if (ctor_obj->IsUndefined(isolate)) return default_ctor; CVariable var_last_matched_until(a, MachineRepresentation::kTagged);
CVariable var_next_search_from(a, MachineRepresentation::kTagged);
if (!ctor_obj->IsJSReceiver()) { var_last_matched_until.Bind(smi_zero);
THROW_NEW_ERROR(isolate, var_next_search_from.Bind(smi_zero);
NewTypeError(MessageTemplate::kConstructorNotReceiver),
Object); CVariable* vars[] = {array.var_array(), array.var_length(),
array.var_capacity(), &var_last_matched_until,
&var_next_search_from};
const int vars_count = sizeof(vars) / sizeof(vars[0]);
CLabel loop(a, vars_count, vars), push_suffix_and_out(a), out(a);
a->Goto(&loop);
a->Bind(&loop);
{
Node* const next_search_from = var_next_search_from.value();
Node* const last_matched_until = var_last_matched_until.value();
// We're done if we've reached the end of the string.
{
CLabel next(a);
a->Branch(a->SmiEqual(next_search_from, string_length),
&push_suffix_and_out, &next);
a->Bind(&next);
}
// Search for the given {regexp}.
Node* const last_match_info = a->LoadContextElement(
native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
Callable exec_callable = CodeFactory::RegExpExec(isolate);
Node* const match_indices =
a->CallStub(exec_callable, context, regexp, string, next_search_from,
last_match_info);
// We're done if no match was found.
{
CLabel next(a);
a->Branch(a->WordEqual(match_indices, null), &push_suffix_and_out, &next);
a->Bind(&next);
}
Node* const match_from = a->LoadFixedArrayElement(
match_indices, a->IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex),
0, mode);
// We're done if the match starts beyond the string.
{
CLabel next(a);
a->Branch(a->WordEqual(match_from, string_length), &push_suffix_and_out,
&next);
a->Bind(&next);
}
Node* const match_to = a->LoadFixedArrayElement(
match_indices,
a->IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 1), 0, mode);
// Advance index and continue if the match is empty.
{
CLabel next(a);
a->GotoUnless(a->SmiEqual(match_to, next_search_from), &next);
a->GotoUnless(a->SmiEqual(match_to, last_matched_until), &next);
Node* const is_unicode = FastFlagGetter(a, regexp, JSRegExp::kUnicode);
Node* const new_next_search_from =
AdvanceStringIndex(a, string, next_search_from, is_unicode);
var_next_search_from.Bind(new_next_search_from);
a->Goto(&loop);
a->Bind(&next);
}
// A valid match was found, add the new substring to the array.
{
Node* const from = last_matched_until;
Node* const to = match_from;
Node* const substr = a->SubString(context, string, from, to);
array.Push(substr);
a->GotoIf(a->WordEqual(array.length(), int_limit), &out);
}
// Add all captures to the array.
{
Node* const num_registers = a->LoadFixedArrayElement(
match_indices,
a->IntPtrConstant(RegExpMatchInfo::kNumberOfCapturesIndex), 0, mode);
Node* const int_num_registers = a->SmiUntag(num_registers);
CVariable var_reg(a, MachineType::PointerRepresentation());
var_reg.Bind(a->IntPtrConstant(2));
CVariable* vars[] = {array.var_array(), array.var_length(),
array.var_capacity(), &var_reg};
const int vars_count = sizeof(vars) / sizeof(vars[0]);
CLabel nested_loop(a, vars_count, vars), nested_loop_out(a);
a->Branch(a->IntPtrLessThan(var_reg.value(), int_num_registers),
&nested_loop, &nested_loop_out);
a->Bind(&nested_loop);
{
Node* const reg = var_reg.value();
Node* const from = a->LoadFixedArrayElement(
match_indices, reg,
RegExpMatchInfo::kFirstCaptureIndex * kPointerSize, mode);
Node* const to = a->LoadFixedArrayElement(
match_indices, reg,
(RegExpMatchInfo::kFirstCaptureIndex + 1) * kPointerSize, mode);
CLabel select_capture(a), select_undefined(a), store_value(a);
CVariable var_value(a, MachineRepresentation::kTagged);
a->Branch(a->SmiEqual(to, a->SmiConstant(-1)), &select_undefined,
&select_capture);
a->Bind(&select_capture);
{
Node* const substr = a->SubString(context, string, from, to);
var_value.Bind(substr);
a->Goto(&store_value);
}
a->Bind(&select_undefined);
{
Node* const undefined = a->UndefinedConstant();
var_value.Bind(undefined);
a->Goto(&store_value);
}
a->Bind(&store_value);
{
array.Push(var_value.value());
a->GotoIf(a->WordEqual(array.length(), int_limit), &out);
Node* const new_reg = a->IntPtrAdd(reg, a->IntPtrConstant(2));
var_reg.Bind(new_reg);
a->Branch(a->IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
&nested_loop_out);
}
}
a->Bind(&nested_loop_out);
}
var_last_matched_until.Bind(match_to);
var_next_search_from.Bind(match_to);
a->Goto(&loop);
} }
Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj); a->Bind(&push_suffix_and_out);
{
Node* const from = var_last_matched_until.value();
Node* const to = string_length;
Handle<Object> species; Node* const substr = a->SubString(context, string, from, to);
ASSIGN_RETURN_ON_EXCEPTION( array.Push(substr);
isolate, species,
JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
Object);
if (species->IsNull(isolate) || species->IsUndefined(isolate)) { a->Goto(&out);
return default_ctor;
} }
if (species->IsConstructor()) return species; a->Bind(&out);
{
Node* const result = array.ToJSArray(context);
a->Return(result);
}
THROW_NEW_ERROR( a->Bind(&return_empty_array);
isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object); {
Node* const length = smi_zero;
Node* const capacity = int_zero;
Node* const result = a->AllocateJSArray(kind, array_map, capacity, length,
allocation_site, mode);
a->Return(result);
}
} }
} // namespace } // namespace
// ES#sec-regexp.prototype-@@split // ES#sec-regexp.prototype-@@split
// RegExp.prototype [ @@split ] ( string, limit ) // RegExp.prototype [ @@split ] ( string, limit )
BUILTIN(RegExpPrototypeSplit) { void Builtins::Generate_RegExpPrototypeSplit(CodeAssemblerState* state) {
HandleScope scope(isolate); CodeStubAssembler a(state);
CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@split");
Factory* factory = isolate->factory(); Isolate* const isolate = a.isolate();
Handle<Object> string_obj = args.atOrUndefined(isolate, 1); Node* const undefined = a.UndefinedConstant();
Handle<Object> limit_obj = args.atOrUndefined(isolate, 2);
Handle<String> string; Node* const maybe_receiver = a.Parameter(0);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string, Node* const maybe_string = a.Parameter(1);
Object::ToString(isolate, string_obj)); Node* const maybe_limit = a.Parameter(2);
Node* const context = a.Parameter(5);
if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) { // Ensure {maybe_receiver} is a JSReceiver.
RETURN_RESULT_OR_FAILURE( Node* const map = ThrowIfNotJSReceiver(
isolate, &a, isolate, context, maybe_receiver,
RegExpSplit(isolate, Handle<JSRegExp>::cast(recv), string, limit_obj)); MessageTemplate::kIncompatibleMethodReceiver, "RegExp.prototype.@@split");
} Node* const receiver = maybe_receiver;
Handle<JSFunction> regexp_fun = isolate->regexp_function(); // Convert {maybe_string} to a String.
Handle<Object> ctor; Node* const string = a.ToString(context, maybe_string);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
Handle<Object> flags_obj; CLabel fast_path(&a), slow_path(&a);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION( BranchIfFastPath(&a, context, map, &fast_path, &slow_path);
isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
Handle<String> flags; a.Bind(&fast_path);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
Object::ToString(isolate, flags_obj));
Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
Handle<String> new_flags = flags;
if (!sticky) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
factory->NewConsString(flags, y_str));
}
Handle<JSReceiver> splitter;
{ {
const int argc = 2; // Convert {maybe_limit} to a uint32, capping at the maximal smi value.
CVariable var_limit(&a, MachineRepresentation::kTagged);
CLabel if_limitissmimax(&a), limit_done(&a);
ScopedVector<Handle<Object>> argv(argc); a.GotoIf(a.WordEqual(maybe_limit, undefined), &if_limitissmimax);
argv[0] = recv;
argv[1] = new_flags;
Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
Handle<Object> splitter_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
splitter = Handle<JSReceiver>::cast(splitter_obj);
}
uint32_t limit;
RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
const int length = string->length();
if (limit == 0) return *factory->NewJSArray(0);
if (length == 0) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
elems->set(0, *string);
return *factory->NewJSArrayWithElements(elems);
}
// TODO(jgruber): Wrap this in a helper class.
static const int kInitialArraySize = 8;
Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
int num_elems = 0;
int string_index = 0;
int prev_string_index = 0;
while (string_index < length) {
RETURN_FAILURE_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (result->IsNull(isolate)) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
// TODO(jgruber): Extract toLength of some property into function.
Handle<Object> last_index_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
const int last_index = Handle<Smi>::cast(last_index_obj)->value();
const int end = std::min(last_index, length);
if (end == prev_string_index) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
{ {
Handle<String> substr = Node* const limit = a.ToUint32(context, maybe_limit);
factory->NewSubString(string, prev_string_index, string_index); a.GotoUnless(a.TaggedIsSmi(limit), &if_limitissmimax);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
if (static_cast<uint32_t>(num_elems) == limit) { var_limit.Bind(limit);
return *NewJSArrayWithElements(isolate, elems, num_elems); a.Goto(&limit_done);
}
} }
prev_string_index = end; a.Bind(&if_limitissmimax);
{
Handle<Object> num_captures_obj; // TODO(jgruber): In this case, we can probably generation of limit checks
ASSIGN_RETURN_FAILURE_ON_EXCEPTION( // in Generate_RegExpPrototypeSplitBody.
isolate, num_captures_obj, Node* const smi_max = a.SmiConstant(Smi::kMaxValue);
Object::GetProperty(result, isolate->factory()->length_string())); var_limit.Bind(smi_max);
a.Goto(&limit_done);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
const int num_captures =
std::max(Handle<Smi>::cast(num_captures_obj)->value(), 0);
for (int i = 1; i < num_captures; i++) {
Handle<Object> capture;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, capture, Object::GetElement(isolate, result, i));
elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
if (static_cast<uint32_t>(num_elems) == limit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
} }
string_index = prev_string_index; a.Bind(&limit_done);
{
Node* const limit = var_limit.value();
Generate_RegExpPrototypeSplitBody(&a, receiver, string, limit, context);
}
} }
a.Bind(&slow_path);
{ {
Handle<String> substr = Node* const result = a.CallRuntime(Runtime::kRegExpSplit, context, receiver,
factory->NewSubString(string, prev_string_index, length); string, maybe_limit);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr); a.Return(result);
} }
return *NewJSArrayWithElements(isolate, elems, num_elems);
} }
namespace { namespace {
@ -1972,8 +1945,7 @@ Node* ReplaceGlobalCallableFastPath(CodeStubAssembler* a, Node* context,
Node* const capacity = a->IntPtrConstant(16); Node* const capacity = a->IntPtrConstant(16);
Node* const length = smi_zero; Node* const length = smi_zero;
Node* const allocation_site = nullptr; Node* const allocation_site = nullptr;
CodeStubAssembler::ParameterMode capacity_mode = ParameterMode capacity_mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
result_array = a->AllocateJSArray(kind, array_map, capacity, length, result_array = a->AllocateJSArray(kind, array_map, capacity, length,
allocation_site, capacity_mode); allocation_site, capacity_mode);
@ -2001,7 +1973,7 @@ Node* ReplaceGlobalCallableFastPath(CodeStubAssembler* a, Node* context,
Node* const res_elems = a->LoadElements(res); Node* const res_elems = a->LoadElements(res);
CSA_ASSERT(a, a->HasInstanceType(res_elems, FIXED_ARRAY_TYPE)); CSA_ASSERT(a, a->HasInstanceType(res_elems, FIXED_ARRAY_TYPE));
CodeStubAssembler::ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS; ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
Node* const num_capture_registers = a->LoadFixedArrayElement( Node* const num_capture_registers = a->LoadFixedArrayElement(
last_match_info, last_match_info,
a->IntPtrConstant(RegExpMatchInfo::kNumberOfCapturesIndex), 0, mode); a->IntPtrConstant(RegExpMatchInfo::kNumberOfCapturesIndex), 0, mode);
@ -2034,8 +2006,7 @@ Node* ReplaceGlobalCallableFastPath(CodeStubAssembler* a, Node* context,
Node* const i = var_i.value(); Node* const i = var_i.value();
a->GotoUnless(a->IntPtrLessThan(i, end), &create_result); a->GotoUnless(a->IntPtrLessThan(i, end), &create_result);
CodeStubAssembler::ParameterMode mode = ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const elem = a->LoadFixedArrayElement(res_elems, i, 0, mode); Node* const elem = a->LoadFixedArrayElement(res_elems, i, 0, mode);
CLabel if_issmi(a), if_isstring(a), loop_epilogue(a); CLabel if_issmi(a), if_isstring(a), loop_epilogue(a);
@ -2102,8 +2073,7 @@ Node* ReplaceGlobalCallableFastPath(CodeStubAssembler* a, Node* context,
a->Bind(&if_hasexplicitcaptures); a->Bind(&if_hasexplicitcaptures);
{ {
CodeStubAssembler::ParameterMode mode = ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const from = int_zero; Node* const from = int_zero;
Node* const to = a->SmiUntag(res_length); Node* const to = a->SmiUntag(res_length);
@ -2220,8 +2190,7 @@ Node* ReplaceSimpleStringFastPath(CodeStubAssembler* a, Node* context,
a->Bind(&if_matched); a->Bind(&if_matched);
{ {
CodeStubAssembler::ParameterMode mode = ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
CodeStubAssembler::INTPTR_PARAMETERS;
Node* const subject_start = smi_zero; Node* const subject_start = smi_zero;
Node* const match_start = a->LoadFixedArrayElement( Node* const match_start = a->LoadFixedArrayElement(

View File

@ -610,7 +610,7 @@ namespace internal {
TFJ(RegExpPrototypeSearch, 1) \ TFJ(RegExpPrototypeSearch, 1) \
CPP(RegExpPrototypeSourceGetter) \ CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \ CPP(RegExpPrototypeSpeciesGetter) \
CPP(RegExpPrototypeSplit) \ TFJ(RegExpPrototypeSplit, 2) \
TFJ(RegExpPrototypeStickyGetter, 0) \ TFJ(RegExpPrototypeStickyGetter, 0) \
TFJ(RegExpPrototypeTest, 1) \ TFJ(RegExpPrototypeTest, 1) \
CPP(RegExpPrototypeToString) \ CPP(RegExpPrototypeToString) \

View File

@ -3892,6 +3892,108 @@ Node* CodeStubAssembler::ToNumber(Node* context, Node* input) {
return var_result.value(); return var_result.value();
} }
Node* CodeStubAssembler::ToUint32(Node* context, Node* input) {
Node* const float_zero = Float64Constant(0.0);
Node* const float_two_32 = Float64Constant(static_cast<double>(1ULL << 32));
Label out(this);
Variable var_result(this, MachineRepresentation::kTagged);
var_result.Bind(input);
// Early exit for positive smis.
{
// TODO(jgruber): This branch and the recheck below can be removed once we
// have a ToNumber with multiple exits.
Label next(this, Label::kDeferred);
Branch(WordIsPositiveSmi(input), &out, &next);
Bind(&next);
}
Node* const number = ToNumber(context, input);
var_result.Bind(number);
// Perhaps we have a positive smi now.
{
Label next(this, Label::kDeferred);
Branch(WordIsPositiveSmi(number), &out, &next);
Bind(&next);
}
Label if_isnegativesmi(this), if_isheapnumber(this);
Branch(TaggedIsSmi(number), &if_isnegativesmi, &if_isheapnumber);
Bind(&if_isnegativesmi);
{
// floor({input}) mod 2^32 === {input} + 2^32.
Node* const float_number = SmiToFloat64(number);
Node* const float_result = Float64Add(float_number, float_two_32);
Node* const result = ChangeFloat64ToTagged(float_result);
var_result.Bind(result);
Goto(&out);
}
Bind(&if_isheapnumber);
{
Label return_zero(this);
Node* const value = LoadHeapNumberValue(number);
{
// +-0.
Label next(this);
Branch(Float64Equal(value, float_zero), &return_zero, &next);
Bind(&next);
}
{
// NaN.
Label next(this);
Branch(Float64Equal(value, value), &next, &return_zero);
Bind(&next);
}
{
// +Infinity.
Label next(this);
Node* const positive_infinity =
Float64Constant(std::numeric_limits<double>::infinity());
Branch(Float64Equal(value, positive_infinity), &return_zero, &next);
Bind(&next);
}
{
// -Infinity.
Label next(this);
Node* const negative_infinity =
Float64Constant(-1.0 * std::numeric_limits<double>::infinity());
Branch(Float64Equal(value, negative_infinity), &return_zero, &next);
Bind(&next);
}
// Return floor({input}) mod 2^32 (assuming mod semantics that always return
// positive results).
{
Node* x = Float64Floor(value);
x = Float64Mod(x, float_two_32);
x = Float64Add(x, float_two_32);
x = Float64Mod(x, float_two_32);
Node* const result = ChangeFloat64ToTagged(x);
var_result.Bind(result);
Goto(&out);
}
Bind(&return_zero);
{
var_result.Bind(SmiConstant(Smi::kZero));
Goto(&out);
}
}
Bind(&out);
return var_result.value();
}
Node* CodeStubAssembler::ToString(Node* context, Node* input) { Node* CodeStubAssembler::ToString(Node* context, Node* input) {
Label is_number(this); Label is_number(this);
Label runtime(this, Label::kDeferred); Label runtime(this, Label::kDeferred);

View File

@ -614,6 +614,11 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler {
// Convert any object to a Number. // Convert any object to a Number.
Node* ToNumber(Node* context, Node* input); Node* ToNumber(Node* context, Node* input);
// Converts |input| to one of 2^32 integer values in the range 0 through
// 2^321, inclusive.
// ES#sec-touint32
compiler::Node* ToUint32(compiler::Node* context, compiler::Node* input);
// Convert any object to a String. // Convert any object to a String.
Node* ToString(Node* context, Node* input); Node* ToString(Node* context, Node* input);

View File

@ -1269,6 +1269,223 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
isolate, subject, regexp, replace)); isolate, subject, regexp, replace));
} }
namespace {
// ES##sec-speciesconstructor
// SpeciesConstructor ( O, defaultConstructor )
MUST_USE_RESULT MaybeHandle<Object> SpeciesConstructor(
Isolate* isolate, Handle<JSReceiver> recv,
Handle<JSFunction> default_ctor) {
Handle<Object> ctor_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, ctor_obj,
JSObject::GetProperty(recv, isolate->factory()->constructor_string()),
Object);
if (ctor_obj->IsUndefined(isolate)) return default_ctor;
if (!ctor_obj->IsJSReceiver()) {
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kConstructorNotReceiver),
Object);
}
Handle<JSReceiver> ctor = Handle<JSReceiver>::cast(ctor_obj);
Handle<Object> species;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, species,
JSObject::GetProperty(ctor, isolate->factory()->species_symbol()),
Object);
if (species->IsNull(isolate) || species->IsUndefined(isolate)) {
return default_ctor;
}
if (species->IsConstructor()) return species;
THROW_NEW_ERROR(
isolate, NewTypeError(MessageTemplate::kSpeciesNotConstructor), Object);
}
MUST_USE_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
Handle<Object> object,
uint32_t* out) {
if (object->IsUndefined(isolate)) {
*out = kMaxUInt32;
return object;
}
Handle<Object> number;
ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(object), Object);
*out = NumberToUint32(*number);
return object;
}
Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
Handle<FixedArray> elems,
int num_elems) {
elems->Shrink(num_elems);
return isolate->factory()->NewJSArrayWithElements(elems);
}
} // namespace
// Slow path for:
// ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit) {
HandleScope scope(isolate);
DCHECK(args.length() == 3);
DCHECK(args[1]->IsString());
CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
Factory* factory = isolate->factory();
Handle<JSFunction> regexp_fun = isolate->regexp_function();
Handle<Object> ctor;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
Handle<Object> flags_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
Handle<String> flags;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
Object::ToString(isolate, flags_obj));
Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
Handle<String> new_flags = flags;
if (!sticky) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
factory->NewConsString(flags, y_str));
}
Handle<JSReceiver> splitter;
{
const int argc = 2;
ScopedVector<Handle<Object>> argv(argc);
argv[0] = recv;
argv[1] = new_flags;
Handle<JSFunction> ctor_fun = Handle<JSFunction>::cast(ctor);
Handle<Object> splitter_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, splitter_obj, Execution::New(ctor_fun, argc, argv.start()));
splitter = Handle<JSReceiver>::cast(splitter_obj);
}
uint32_t limit;
RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
const int length = string->length();
if (limit == 0) return *factory->NewJSArray(0);
if (length == 0) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
elems->set(0, *string);
return *factory->NewJSArrayWithElements(elems);
}
static const int kInitialArraySize = 8;
Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
int num_elems = 0;
int string_index = 0;
int prev_string_index = 0;
while (string_index < length) {
RETURN_FAILURE_ON_EXCEPTION(
isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
factory->undefined_value()));
if (result->IsNull(isolate)) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
Handle<Object> last_index_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
const int last_index = Handle<Smi>::cast(last_index_obj)->value();
const int end = std::min(last_index, length);
if (end == prev_string_index) {
string_index = RegExpUtils::AdvanceStringIndex(isolate, string,
string_index, unicode);
continue;
}
{
Handle<String> substr =
factory->NewSubString(string, prev_string_index, string_index);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
if (static_cast<uint32_t>(num_elems) == limit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
}
prev_string_index = end;
Handle<Object> num_captures_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, num_captures_obj,
Object::GetProperty(result, isolate->factory()->length_string()));
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
const int num_captures =
std::max(Handle<Smi>::cast(num_captures_obj)->value(), 0);
for (int i = 1; i < num_captures; i++) {
Handle<Object> capture;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, capture, Object::GetElement(isolate, result, i));
elems = FixedArray::SetAndGrow(elems, num_elems++, capture);
if (static_cast<uint32_t>(num_elems) == limit) {
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
}
string_index = prev_string_index;
}
{
Handle<String> substr =
factory->NewSubString(string, prev_string_index, length);
elems = FixedArray::SetAndGrow(elems, num_elems++, substr);
}
return *NewJSArrayWithElements(isolate, elems, num_elems);
}
// Slow path for: // Slow path for:
// ES#sec-regexp.prototype-@@replace // ES#sec-regexp.prototype-@@replace
// RegExp.prototype [ @@replace ] ( string, replaceValue ) // RegExp.prototype [ @@replace ] ( string, replaceValue )

View File

@ -462,6 +462,7 @@ namespace internal {
F(RegExpExecReThrow, 4, 1) \ F(RegExpExecReThrow, 4, 1) \
F(RegExpInternalReplace, 3, 1) \ F(RegExpInternalReplace, 3, 1) \
F(RegExpReplace, 3, 1) \ F(RegExpReplace, 3, 1) \
F(RegExpSplit, 3, 1) \
F(StringReplaceGlobalRegExpWithString, 4, 1) \ F(StringReplaceGlobalRegExpWithString, 4, 1) \
F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \ F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \
F(StringSplit, 3, 1) F(StringSplit, 3, 1)

View File

@ -164,3 +164,11 @@ separator = { toString: function() { assertEquals(1, counter);
assertEquals(["a", "c"], String.prototype.split.call(subject, separator)); assertEquals(["a", "c"], String.prototype.split.call(subject, separator));
assertEquals(2, counter); assertEquals(2, counter);
// Check ToUint32 conversion of limit.
assertArrayEquals(["a"], "a,b,c,d,e,f".split(/,/, -4294967295));
assertArrayEquals(["a"], "a,b,c,d,e,f".split(/,/, -4294967294.5));
assertArrayEquals(["a", "b"], "a,b,c,d,e,f".split(/,/, -4294967294));
assertArrayEquals(["a", "b", "c"], "a,b,c,d,e,f".split(/,/, -4294967293));
assertArrayEquals(["a", "b", "c", "d"], "a,b,c,d,e,f".split(/,/, -4294967292));
assertArrayEquals(["a", "b", "c", "d", "e", "f"], "a,b,c,d,e,f".split(/,/, -1));