[regexp] Port RegExpExec

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2307853002
Cr-Commit-Position: refs/heads/master@{#39150}
This commit is contained in:
jgruber 2016-09-05 00:17:10 -07:00 committed by Commit bot
parent 99e91aaeac
commit c79e163ba8
6 changed files with 147 additions and 56 deletions

View File

@ -1642,6 +1642,9 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
SimpleInstallFunction(proto, factory->toString_string(),
Builtins::kRegExpPrototypeToString, 0, false,
DONT_ENUM);
SimpleInstallFunction(proto, factory->exec_string(),
Builtins::kRegExpPrototypeExec, 1, false,
DONT_ENUM);
}
{
@ -1721,8 +1724,6 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
#undef INSTALL_CAPTURE_GETTER
}
// TODO(jgruber): shared->set_force_inline on getters.
DCHECK(regexp_fun->has_initial_map());
Handle<Map> initial_map(regexp_fun->initial_map());
@ -1733,7 +1734,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
// ECMA-262, section 15.10.7.5.
PropertyAttributes writable =
static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
DataDescriptor field(factory->last_index_string(),
DataDescriptor field(factory->lastIndex_string(),
JSRegExp::kLastIndexFieldIndex, writable,
Representation::Tagged());
initial_map->AppendDescriptor(&field);

View File

@ -5,6 +5,7 @@
#include "src/builtins/builtins.h"
#include "src/builtins/builtins-utils.h"
#include "src/regexp/jsregexp.h"
#include "src/string-builder.h"
namespace v8 {
@ -321,13 +322,16 @@ const int kLastSubjectIndex = 1;
const int kLastInputIndex = 2;
const int kFirstCaptureIndex = 3;
Handle<Object> GetLastMatchField(Isolate* isolate, int index) {
Handle<JSObject> GetLastMatchInfo(Isolate* isolate) {
Handle<JSFunction> global_regexp = isolate->regexp_function();
Handle<Object> last_match_info_obj = JSReceiver::GetDataProperty(
global_regexp, isolate->factory()->regexp_last_match_info_symbol());
Handle<JSReceiver> last_match_info =
Handle<JSReceiver>::cast(last_match_info_obj);
return Handle<JSObject>::cast(last_match_info_obj);
}
Handle<Object> GetLastMatchField(Isolate* isolate, int index) {
Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
return JSReceiver::GetElement(isolate, last_match_info, index)
.ToHandleChecked();
}
@ -459,5 +463,134 @@ BUILTIN(RegExpPrototypeRightContextGetter) {
return *isolate->factory()->NewSubString(last_subject, start_index, len);
}
namespace {
MaybeHandle<Object> SetLastIndex(Isolate* isolate, Handle<JSRegExp> regexp,
int value) {
return Object::SetProperty(regexp, isolate->factory()->lastIndex_string(),
handle(Smi::FromInt(value), isolate), SLOPPY);
}
Handle<JSArray> ConstructResult(Isolate* isolate, int size, int index,
Handle<String> input) {
Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
Handle<JSObject> object =
isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
Handle<JSArray> array = Handle<JSArray>::cast(object);
array->set_elements(*elements);
array->set_length(Smi::FromInt(size));
// Write in-object properties after the length of the array.
array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex,
Smi::FromInt(index));
array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
return array;
}
Handle<Object> ReturnNewResultFromMatchInfo(Isolate* isolate,
Handle<Object> match_info,
Handle<String> string) {
const int num_captures = GetLastMatchNumberOfCaptures(isolate);
DCHECK_EQ(0, num_captures % 2);
const int num_results = num_captures / 2;
int start = GetLastMatchCapture(isolate, 0);
int end = GetLastMatchCapture(isolate, 1);
// Calculate the substring of the first match before creating the result array
// to avoid an unnecessary write barrier storing the first result.
Handle<String> first = isolate->factory()->NewSubString(string, start, end);
Handle<JSArray> result = ConstructResult(isolate, num_results, start, string);
Handle<FixedArray> elems =
handle(FixedArray::cast(result->elements()), isolate);
elems->set(0, *first);
for (int i = 1; i < num_results; i++) {
start = GetLastMatchCapture(isolate, i * 2);
if (start != -1) {
end = GetLastMatchCapture(isolate, i * 2 + 1);
Handle<String> capture =
isolate->factory()->NewSubString(string, start, end);
elems->set(i, *capture);
}
}
return result;
}
MaybeHandle<Object> RegExpExecJS(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> string) {
Handle<Object> last_index_obj;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, last_index_obj,
Object::GetProperty(regexp, isolate->factory()->lastIndex_string()),
Object);
// Conversion is required by the ES2015 specification (RegExpBuiltinExec
// algorithm, step 4) even if the value is discarded for non-global RegExps.
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj), Object);
int last_index = Handle<Smi>::cast(last_index_obj)->value();
const int flags = regexp->GetFlags();
const bool global = (flags & JSRegExp::kGlobal) != 0;
const bool sticky = (flags & JSRegExp::kSticky) != 0;
const bool update_last_index = (global || sticky);
if (update_last_index) {
if (last_index > string->length()) {
RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
return isolate->factory()->null_value();
}
} else {
last_index = 0;
}
Handle<JSObject> last_match_info = GetLastMatchInfo(isolate);
// matchIndices is either null or the RegExpLastMatchInfo array.
// TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
// itself, but ES2015 allows monkey-patching this property to differ from
// the internal flags. If it differs, recompile a different RegExp?
// TODO(jgruber): The result of Exec does not need to be a JSArray.
Handle<Object> match_indices;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices,
RegExpImpl::Exec(regexp, string, last_index, last_match_info), Object);
if (match_indices->IsNull(isolate)) {
RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, 0), Object);
return isolate->factory()->null_value();
}
// Successful match.
if (update_last_index) {
last_index = GetLastMatchCapture(isolate, 1);
RETURN_ON_EXCEPTION(isolate, SetLastIndex(isolate, regexp, last_index),
Object);
}
return ReturnNewResultFromMatchInfo(isolate, match_indices, string);
}
} // namespace
// ES#sec-regexp.prototype.exec
// RegExp.prototype.exec ( string )
BUILTIN(RegExpPrototypeExec) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSRegExp, regexp, "RegExp.prototype.exec");
Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, string_obj));
RETURN_RESULT_OR_FAILURE(isolate, RegExpExecJS(isolate, regexp, string));
}
} // namespace internal
} // namespace v8

View File

@ -509,6 +509,7 @@ namespace internal {
/* RegExp */ \
CPP(RegExpConstructor) \
CPP(RegExpPrototypeCompile) \
CPP(RegExpPrototypeExec) \
CPP(RegExpPrototypeToString) \
CPP(RegExpPrototypeCapture1Getter) \
CPP(RegExpPrototypeCapture2Getter) \

View File

@ -60,6 +60,7 @@
V(Error_string, "Error") \
V(eval_string, "eval") \
V(EvalError_string, "EvalError") \
V(exec_string, "exec") \
V(false_string, "false") \
V(flags_string, "flags") \
V(float32x4_string, "float32x4") \
@ -91,7 +92,7 @@
V(isView_string, "isView") \
V(KeyedLoadMonomorphic_string, "KeyedLoadMonomorphic") \
V(KeyedStoreMonomorphic_string, "KeyedStoreMonomorphic") \
V(last_index_string, "lastIndex") \
V(lastIndex_string, "lastIndex") \
V(length_string, "length") \
V(line_string, "line") \
V(Map_string, "Map") \

View File

@ -24,6 +24,7 @@ var searchSymbol = utils.ImportNow("search_symbol");
var speciesSymbol = utils.ImportNow("species_symbol");
var splitSymbol = utils.ImportNow("split_symbol");
var SpeciesConstructor;
var RegExpSubclassExecJS;
utils.Import(function(from) {
ExpandReplacement = from.ExpandReplacement;
@ -115,53 +116,6 @@ function RegExpExecNoTests(regexp, string, start) {
}
// ES#sec-regexp.prototype.exec
// RegExp.prototype.exec ( string )
function RegExpSubclassExecJS(string) {
if (!IS_REGEXP(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
'RegExp.prototype.exec', this);
}
string = TO_STRING(string);
var lastIndex = this.lastIndex;
// Conversion is required by the ES2015 specification (RegExpBuiltinExec
// algorithm, step 4) even if the value is discarded for non-global RegExps.
var i = TO_LENGTH(lastIndex);
var global = TO_BOOLEAN(REGEXP_GLOBAL(this));
var sticky = TO_BOOLEAN(REGEXP_STICKY(this));
var updateLastIndex = global || sticky;
if (updateLastIndex) {
if (i > string.length) {
this.lastIndex = 0;
return null;
}
} else {
i = 0;
}
// matchIndices is either null or the RegExpLastMatchInfo array.
// TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
// itself, but ES2015 allows monkey-patching this property to differ from
// the internal flags. If it differs, recompile a different RegExp?
var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
if (IS_NULL(matchIndices)) {
this.lastIndex = 0;
return null;
}
// Successful match.
if (updateLastIndex) {
this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
}
RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
}
%FunctionRemovePrototype(RegExpSubclassExecJS);
// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
// Also takes an optional exec method in case our caller
// has already fetched exec.
@ -774,7 +728,6 @@ function RegExpSubclassSearch(string) {
// -------------------------------------------------------------------
utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
"exec", RegExpSubclassExecJS,
"test", RegExpSubclassTest,
matchSymbol, RegExpSubclassMatch,
replaceSymbol, RegExpSubclassReplace,
@ -785,6 +738,8 @@ utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
// Temporary until all RegExpLastMatchInfo accesses are ported to C++.
SET_PRIVATE(GlobalRegExp, lastMatchInfoSymbol, RegExpLastMatchInfo);
var RegExpSubclassExecJS = GlobalRegExp.prototype.exec;
// -------------------------------------------------------------------
// Internal

View File

@ -16392,7 +16392,7 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
// Map has changed, so use generic, but slower, method.
RETURN_ON_EXCEPTION(
isolate,
JSReceiver::SetProperty(regexp, factory->last_index_string(),
JSReceiver::SetProperty(regexp, factory->lastIndex_string(),
Handle<Smi>(Smi::FromInt(0), isolate), STRICT),
JSRegExp);
}