[regexp] Port RegExpConstructor to C++

This moves the RegExp constructor to C++. Local runs of octane/regexp are
performance-neutral:

C++: 4970.1 +- 404.981
JS: 4869.2 +- 586.743

That's probably only the case because exec and replace dominate
octane/regexp. There's potential for improvement here, for instance by
adding a fast-path if new.target is an unmodified JSRegExp function.

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2384613004
Cr-Commit-Position: refs/heads/master@{#39981}
This commit is contained in:
jgruber 2016-10-05 02:13:04 -07:00 committed by Commit bot
parent aa93e6ca95
commit d515156441
5 changed files with 168 additions and 49 deletions

View File

@ -1649,11 +1649,24 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
factory->NewJSObject(isolate->object_function(), TENURED); factory->NewJSObject(isolate->object_function(), TENURED);
Handle<JSFunction> regexp_fun = Handle<JSFunction> regexp_fun =
InstallFunction(global, "RegExp", JS_REGEXP_TYPE, JSRegExp::kSize, InstallFunction(global, "RegExp", JS_REGEXP_TYPE, JSRegExp::kSize,
prototype, Builtins::kIllegal); prototype, Builtins::kRegExpConstructor);
InstallWithIntrinsicDefaultProto(isolate, regexp_fun, InstallWithIntrinsicDefaultProto(isolate, regexp_fun,
Context::REGEXP_FUNCTION_INDEX); Context::REGEXP_FUNCTION_INDEX);
regexp_fun->shared()->SetConstructStub(
*isolate->builtins()->JSBuiltinsConstructStub()); Handle<SharedFunctionInfo> shared(regexp_fun->shared(), isolate);
shared->SetConstructStub(*isolate->builtins()->RegExpConstructor());
shared->set_instance_class_name(isolate->heap()->RegExp_string());
shared->DontAdaptArguments();
shared->set_length(2);
// RegExp.prototype setup.
// Install the "constructor" property on the {prototype}.
JSObject::AddProperty(prototype, factory->constructor_string(), regexp_fun,
DONT_ENUM);
SimpleInstallFunction(prototype, "exec", Builtins::kRegExpPrototypeExec, 1,
true, DONT_ENUM);
DCHECK(regexp_fun->has_initial_map()); DCHECK(regexp_fun->has_initial_map());
Handle<Map> initial_map(regexp_fun->initial_map()); Handle<Map> initial_map(regexp_fun->initial_map());
@ -1675,15 +1688,6 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
initial_map->set_unused_property_fields(0); initial_map->set_unused_property_fields(0);
initial_map->set_instance_size(initial_map->instance_size() + initial_map->set_instance_size(initial_map->instance_size() +
num_fields * kPointerSize); num_fields * kPointerSize);
// RegExp.prototype setup.
// Install the "constructor" property on the {prototype}.
JSObject::AddProperty(prototype, factory->constructor_string(), regexp_fun,
DONT_ENUM);
SimpleInstallFunction(prototype, "exec", Builtins::kRegExpPrototypeExec, 1,
true, DONT_ENUM);
} }
{ // -- E r r o r { // -- E r r o r

View File

@ -11,6 +11,153 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
// -----------------------------------------------------------------------------
// ES6 section 21.2 RegExp Objects
namespace {
// ES#sec-isregexp IsRegExp ( argument )
Maybe<bool> IsRegExp(Isolate* isolate, Handle<Object> object) {
if (!object->IsJSReceiver()) return Just(false);
Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object);
if (isolate->regexp_function()->initial_map() == receiver->map()) {
// Fast-path for unmodified JSRegExp instances.
return Just(true);
}
Handle<Object> match;
ASSIGN_RETURN_ON_EXCEPTION_VALUE(
isolate, match,
JSObject::GetProperty(receiver, isolate->factory()->match_symbol()),
Nothing<bool>());
if (!match->IsUndefined(isolate)) return Just(match->BooleanValue());
return Just(object->IsJSRegExp());
}
Handle<String> PatternFlags(Isolate* isolate, Handle<JSRegExp> regexp) {
static const int kMaxFlagsLength = 5 + 1; // 5 flags and '\0';
char flags_string[kMaxFlagsLength];
int i = 0;
const JSRegExp::Flags flags = regexp->GetFlags();
if ((flags & JSRegExp::kGlobal) != 0) flags_string[i++] = 'g';
if ((flags & JSRegExp::kIgnoreCase) != 0) flags_string[i++] = 'i';
if ((flags & JSRegExp::kMultiline) != 0) flags_string[i++] = 'm';
if ((flags & JSRegExp::kUnicode) != 0) flags_string[i++] = 'u';
if ((flags & JSRegExp::kSticky) != 0) flags_string[i++] = 'y';
DCHECK_LT(i, kMaxFlagsLength);
memset(&flags_string[i], '\0', kMaxFlagsLength - i);
return isolate->factory()->NewStringFromAsciiChecked(flags_string);
}
// ES#sec-regexpinitialize
// Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
MaybeHandle<JSRegExp> RegExpInitialize(Isolate* isolate,
Handle<JSRegExp> regexp,
Handle<Object> pattern,
Handle<Object> flags) {
Handle<String> pattern_string;
if (pattern->IsUndefined(isolate)) {
pattern_string = isolate->factory()->empty_string();
} else {
ASSIGN_RETURN_ON_EXCEPTION(isolate, pattern_string,
Object::ToString(isolate, pattern), JSRegExp);
}
Handle<String> flags_string;
if (flags->IsUndefined(isolate)) {
flags_string = isolate->factory()->empty_string();
} else {
ASSIGN_RETURN_ON_EXCEPTION(isolate, flags_string,
Object::ToString(isolate, flags), JSRegExp);
}
// TODO(jgruber): We could avoid the flags back and forth conversions.
RETURN_RESULT(isolate,
JSRegExp::Initialize(regexp, pattern_string, flags_string),
JSRegExp);
}
} // namespace
// ES#sec-regexp-pattern-flags
// RegExp ( pattern, flags )
BUILTIN(RegExpConstructor) {
HandleScope scope(isolate);
Handle<HeapObject> new_target = args.new_target();
Handle<Object> pattern = args.atOrUndefined(isolate, 1);
Handle<Object> flags = args.atOrUndefined(isolate, 2);
Handle<JSFunction> target = isolate->regexp_function();
bool pattern_is_regexp;
{
Maybe<bool> maybe_pattern_is_regexp = IsRegExp(isolate, pattern);
if (maybe_pattern_is_regexp.IsNothing()) {
DCHECK(isolate->has_pending_exception());
return isolate->heap()->exception();
}
pattern_is_regexp = maybe_pattern_is_regexp.FromJust();
}
if (new_target->IsUndefined(isolate)) {
new_target = target;
// ES6 section 21.2.3.1 step 3.b
if (pattern_is_regexp && flags->IsUndefined(isolate)) {
Handle<Object> pattern_constructor;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, pattern_constructor,
Object::GetProperty(pattern,
isolate->factory()->constructor_string()));
if (pattern_constructor.is_identical_to(new_target)) {
return *pattern;
}
}
}
if (pattern->IsJSRegExp()) {
Handle<JSRegExp> regexp_pattern = Handle<JSRegExp>::cast(pattern);
if (flags->IsUndefined(isolate)) {
flags = PatternFlags(isolate, regexp_pattern);
}
pattern = handle(regexp_pattern->source(), isolate);
} else if (pattern_is_regexp) {
Handle<Object> pattern_source;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, pattern_source,
Object::GetProperty(pattern, isolate->factory()->source_string()));
if (flags->IsUndefined(isolate)) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, flags,
Object::GetProperty(pattern, isolate->factory()->flags_string()));
}
pattern = pattern_source;
}
Handle<JSReceiver> new_target_receiver = Handle<JSReceiver>::cast(new_target);
// TODO(jgruber): Fast-path for target == new_target == unmodified JSRegExp.
Handle<JSObject> object;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, object, JSObject::New(target, new_target_receiver));
Handle<JSRegExp> regexp = Handle<JSRegExp>::cast(object);
RETURN_RESULT_OR_FAILURE(isolate,
RegExpInitialize(isolate, regexp, pattern, flags));
}
namespace { namespace {
compiler::Node* LoadLastIndex(CodeStubAssembler* a, compiler::Node* context, compiler::Node* LoadLastIndex(CodeStubAssembler* a, compiler::Node* context,

View File

@ -532,14 +532,15 @@ namespace internal {
CPP(ReflectSet) \ CPP(ReflectSet) \
CPP(ReflectSetPrototypeOf) \ CPP(ReflectSetPrototypeOf) \
\ \
/* RegExp */ \
CPP(RegExpConstructor) \
TFJ(RegExpPrototypeExec, 2) \
\
/* SharedArrayBuffer */ \ /* SharedArrayBuffer */ \
CPP(SharedArrayBufferPrototypeGetByteLength) \ CPP(SharedArrayBufferPrototypeGetByteLength) \
TFJ(AtomicsLoad, 3) \ TFJ(AtomicsLoad, 3) \
TFJ(AtomicsStore, 4) \ TFJ(AtomicsStore, 4) \
\ \
/* RegExp */ \
TFJ(RegExpPrototypeExec, 2) \
\
/* String */ \ /* String */ \
ASM(StringConstructor) \ ASM(StringConstructor) \
ASM(StringConstructor_ConstructStub) \ ASM(StringConstructor_ConstructStub) \

View File

@ -64,6 +64,7 @@
V(eval_string, "eval") \ V(eval_string, "eval") \
V(EvalError_string, "EvalError") \ V(EvalError_string, "EvalError") \
V(false_string, "false") \ V(false_string, "false") \
V(flags_string, "flags") \
V(float32x4_string, "float32x4") \ V(float32x4_string, "float32x4") \
V(Float32x4_string, "Float32x4") \ V(Float32x4_string, "Float32x4") \
V(for_api_string, "for_api") \ V(for_api_string, "for_api") \

View File

@ -81,37 +81,6 @@ function PatternFlags(pattern) {
} }
// ES#sec-regexp-pattern-flags
// RegExp ( pattern, flags )
function RegExpConstructor(pattern, flags) {
var newtarget = new.target;
var pattern_is_regexp = IsRegExp(pattern);
if (IS_UNDEFINED(newtarget)) {
newtarget = GlobalRegExp;
// ES6 section 21.2.3.1 step 3.b
if (pattern_is_regexp && IS_UNDEFINED(flags) &&
pattern.constructor === newtarget) {
return pattern;
}
}
if (IS_REGEXP(pattern)) {
if (IS_UNDEFINED(flags)) flags = PatternFlags(pattern);
pattern = REGEXP_SOURCE(pattern);
} else if (pattern_is_regexp) {
var input_pattern = pattern;
pattern = pattern.source;
if (IS_UNDEFINED(flags)) flags = input_pattern.flags;
}
var object = %_NewObject(GlobalRegExp, newtarget);
return RegExpInitialize(object, pattern, flags);
}
// ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags) // ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags)
function RegExpCompileJS(pattern, flags) { function RegExpCompileJS(pattern, flags) {
if (!IS_REGEXP(this)) { if (!IS_REGEXP(this)) {
@ -973,9 +942,6 @@ function RegExpSpecies() {
// ------------------------------------------------------------------- // -------------------------------------------------------------------
%FunctionSetInstanceClassName(GlobalRegExp, 'RegExp');
%SetCode(GlobalRegExp, RegExpConstructor);
utils.InstallGetter(GlobalRegExp, speciesSymbol, RegExpSpecies); utils.InstallGetter(GlobalRegExp, speciesSymbol, RegExpSpecies);
utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [