[regexp] Named capture support for callable replacements

This implements support for named captures in
RegExp.prototype[@@replace] for when the replaceValue is callable.

In that case, the result.groups object is passed to the replacer
function as the last argument.

BUG=v8:5437

Review-Url: https://codereview.chromium.org/2764343004
Cr-Commit-Position: refs/heads/master@{#44142}
This commit is contained in:
jgruber 2017-03-27 04:18:31 -07:00 committed by Commit bot
parent 980448dfcd
commit 80879b8c26
4 changed files with 231 additions and 27 deletions

View File

@ -6954,6 +6954,13 @@ String* JSRegExp::Pattern() {
return pattern;
}
Object* JSRegExp::CaptureNameMap() {
DCHECK(this->data()->IsFixedArray());
DCHECK_EQ(TypeTag(), IRREGEXP);
Object* value = DataAt(kIrregexpCaptureNameMapIndex);
DCHECK_NE(value, Smi::FromInt(JSRegExp::kUninitializedValue));
return value;
}
Object* JSRegExp::DataAt(int index) {
DCHECK(TypeTag() != NOT_COMPILED);

View File

@ -8272,6 +8272,7 @@ class JSRegExp: public JSObject {
inline int CaptureCount();
inline Flags GetFlags();
inline String* Pattern();
inline Object* CaptureNameMap();
inline Object* DataAt(int index);
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object* value);

View File

@ -4,6 +4,8 @@
#include "src/runtime/runtime-utils.h"
#include <functional>
#include "src/arguments.h"
#include "src/conversions-inl.h"
#include "src/isolate-inl.h"
@ -885,7 +887,7 @@ class VectorBackedMatch : public String::Match {
public:
VectorBackedMatch(Isolate* isolate, Handle<String> subject,
Handle<String> match, int match_position,
ZoneVector<Handle<Object>>* captures)
std::vector<Handle<Object>>* captures)
: isolate_(isolate),
match_(match),
match_position_(match_position),
@ -924,9 +926,34 @@ class VectorBackedMatch : public String::Match {
Handle<String> subject_;
Handle<String> match_;
const int match_position_;
ZoneVector<Handle<Object>>* captures_;
std::vector<Handle<Object>>* captures_;
};
// Create the groups object (see also the RegExp result creation in
// RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
Handle<JSObject> ConstructNamedCaptureGroupsObject(
Isolate* isolate, Handle<FixedArray> capture_map,
std::function<Object*(int)> f_get_capture) {
Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
const int capture_count = capture_map->length() >> 1;
for (int i = 0; i < capture_count; i++) {
const int name_ix = i * 2;
const int index_ix = i * 2 + 1;
Handle<String> capture_name(String::cast(capture_map->get(name_ix)));
const int capture_ix = Smi::cast(capture_map->get(index_ix))->value();
DCHECK(1 <= capture_ix && capture_ix <= capture_count);
Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
DCHECK(capture_value->IsString());
JSObject::AddProperty(groups, capture_name, capture_value, NONE);
}
return groups;
}
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
template <bool has_capture>
@ -934,8 +961,9 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<RegExpMatchInfo> last_match_array,
Handle<JSArray> result_array) {
DCHECK(subject->IsFlat());
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
DCHECK(subject->IsFlat());
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
@ -1013,11 +1041,19 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
if (has_capture) {
// Arguments array to replace function is match, captures, index and
// subject, i.e., 3 + capture count in total.
Handle<FixedArray> elements =
isolate->factory()->NewFixedArray(3 + capture_count);
// subject, i.e., 3 + capture count in total. If the RegExp contains
// named captures, they are also passed as the last argument.
elements->set(0, *match);
Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
const bool has_named_captures = maybe_capture_map->IsFixedArray();
const int argc =
has_named_captures ? 4 + capture_count : 3 + capture_count;
Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
int cursor = 0;
elements->set(cursor++, *match);
for (int i = 1; i <= capture_count; i++) {
int start = current_match[i * 2];
if (start >= 0) {
@ -1025,14 +1061,25 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
DCHECK(start <= end);
Handle<String> substring =
isolate->factory()->NewSubString(subject, start, end);
elements->set(i, *substring);
elements->set(cursor++, *substring);
} else {
DCHECK(current_match[i * 2 + 1] < 0);
elements->set(i, isolate->heap()->undefined_value());
elements->set(cursor++, isolate->heap()->undefined_value());
}
}
elements->set(capture_count + 1, Smi::FromInt(match_start));
elements->set(capture_count + 2, *subject);
elements->set(cursor++, Smi::FromInt(match_start));
elements->set(cursor++, *subject);
if (has_named_captures) {
Handle<FixedArray> capture_map =
Handle<FixedArray>::cast(maybe_capture_map);
Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
isolate, capture_map, [=](int ix) { return elements->get(ix); });
elements->set(cursor++, *groups);
}
DCHECK_EQ(cursor, argc);
builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
} else {
builder.Add(*match);
@ -1084,15 +1131,16 @@ MUST_USE_RESULT MaybeHandle<String> RegExpReplace(Isolate* isolate,
Handle<JSRegExp> regexp,
Handle<String> string,
Handle<Object> replace_obj) {
// Functional fast-paths are dispatched directly by replace builtin.
DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
DCHECK(!replace_obj->IsCallable());
Factory* factory = isolate->factory();
const int flags = regexp->GetFlags();
const bool global = (flags & JSRegExp::kGlobal) != 0;
const bool sticky = (flags & JSRegExp::kSticky) != 0;
// Functional fast-paths are dispatched directly by replace builtin.
DCHECK(!replace_obj->IsCallable());
Handle<String> replace;
ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
Object::ToString(isolate, replace_obj), String);
@ -1254,26 +1302,49 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
builder.AppendString(factory->NewSubString(subject, 0, index));
// Compute the parameter list consisting of the match, captures, index,
// and subject for the replace function invocation.
// and subject for the replace function invocation. If the RegExp contains
// named captures, they are also passed as the last argument.
// The number of captures plus one for the match.
const int m = match_indices->NumberOfCaptureRegisters() / 2;
const int argc = m + 2;
bool has_named_captures = false;
Handle<FixedArray> capture_map;
if (m > 1) {
// The existence of capture groups implies IRREGEXP kind.
DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
Object* maybe_capture_map = regexp->CaptureNameMap();
if (maybe_capture_map->IsFixedArray()) {
has_named_captures = true;
capture_map = handle(FixedArray::cast(maybe_capture_map));
}
}
const int argc = has_named_captures ? m + 3 : m + 2;
ScopedVector<Handle<Object>> argv(argc);
int cursor = 0;
for (int j = 0; j < m; j++) {
bool ok;
Handle<String> capture =
RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
if (ok) {
argv[j] = capture;
argv[cursor++] = capture;
} else {
argv[j] = factory->undefined_value();
argv[cursor++] = factory->undefined_value();
}
}
argv[argc - 2] = handle(Smi::FromInt(index), isolate);
argv[argc - 1] = subject;
argv[cursor++] = handle(Smi::FromInt(index), isolate);
argv[cursor++] = subject;
if (has_named_captures) {
argv[cursor++] = ConstructNamedCaptureGroupsObject(
isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
}
DCHECK_EQ(cursor, argc);
Handle<Object> replacement_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
@ -1578,14 +1649,14 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
isolate, position_obj,
Object::GetProperty(result, factory->index_string()));
// TODO(jgruber): Extract and correct error handling. Since we can go up to
// 2^53 - 1 (at least for ToLength), we might actually need uint64_t here?
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, position_obj, Object::ToInteger(isolate, position_obj));
const uint32_t position =
std::min(PositiveNumberToUint32(*position_obj), length);
ZoneVector<Handle<Object>> captures(&zone);
std::vector<Handle<Object>> captures;
captures.reserve(captures_length);
for (int n = 0; n < captures_length; n++) {
Handle<Object> capture;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
@ -1598,17 +1669,29 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
captures.push_back(capture);
}
Handle<Object> groups_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, groups_obj,
Object::GetProperty(result, factory->groups_string()));
const bool has_named_captures = !groups_obj->IsUndefined(isolate);
Handle<String> replacement;
if (functional_replace) {
const int argc = captures_length + 2;
const int argc =
has_named_captures ? captures_length + 3 : captures_length + 2;
ScopedVector<Handle<Object>> argv(argc);
int cursor = 0;
for (int j = 0; j < captures_length; j++) {
argv[j] = captures[j];
argv[cursor++] = captures[j];
}
argv[captures_length] = handle(Smi::FromInt(position), isolate);
argv[captures_length + 1] = string;
argv[cursor++] = handle(Smi::FromInt(position), isolate);
argv[cursor++] = string;
if (has_named_captures) argv[cursor++] = groups_obj;
DCHECK_EQ(cursor, argc);
Handle<Object> replacement_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
@ -1619,6 +1702,7 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement, Object::ToString(isolate, replacement_obj));
} else {
DCHECK(!functional_replace);
VectorBackedMatch m(isolate, string, match, position, &captures);
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, replacement, String::GetSubstitution(isolate, &m, replace));

View File

@ -96,3 +96,115 @@ assertThrows('/(?<𐒤>a)/u', SyntaxError); // ID_Continue but not ID_Start.
// The '__proto__' property on the groups object.
assertEquals(undefined, /(?<a>.)/u.exec("a").groups.__proto__);
assertEquals("a", /(?<__proto__>a)/u.exec("a").groups.__proto__);
// @@replace with a callable replacement argument (no named captures).
{
let result = "abcd".replace(/(.)(.)/u, (match, fst, snd, offset, str) => {
assertEquals("ab", match);
assertEquals("a", fst);
assertEquals("b", snd);
assertEquals(0, offset);
assertEquals("abcd", str);
return `${snd}${fst}`;
});
assertEquals("bacd", result);
}
// @@replace with a callable replacement argument (global, named captures).
{
let i = 0;
let result = "abcd".replace(/(?<fst>.)(?<snd>.)/gu,
(match, fst, snd, offset, str, groups) => {
if (i == 0) {
assertEquals("ab", match);
assertEquals("a", groups.fst);
assertEquals("b", groups.snd);
assertEquals("a", fst);
assertEquals("b", snd);
assertEquals(0, offset);
assertEquals("abcd", str);
} else if (i == 1) {
assertEquals("cd", match);
assertEquals("c", groups.fst);
assertEquals("d", groups.snd);
assertEquals("c", fst);
assertEquals("d", snd);
assertEquals(2, offset);
assertEquals("abcd", str);
} else {
assertUnreachable();
}
i++;
return `${groups.snd}${groups.fst}`;
});
assertEquals("badc", result);
}
// @@replace with a callable replacement argument (non-global, named captures).
{
let result = "abcd".replace(/(?<fst>.)(?<snd>.)/u,
(match, fst, snd, offset, str, groups) => {
assertEquals("ab", match);
assertEquals("a", groups.fst);
assertEquals("b", groups.snd);
assertEquals("a", fst);
assertEquals("b", snd);
assertEquals(0, offset);
assertEquals("abcd", str);
return `${groups.snd}${groups.fst}`;
});
assertEquals("bacd", result);
}
function toSlowMode(re) {
re.exec = (str) => RegExp.prototype.exec.call(re, str);
return re;
}
// @@replace with a callable replacement argument (slow, global,
// named captures).
{
let i = 0;
let re = toSlowMode(/(?<fst>.)(?<snd>.)/gu);
let result = "abcd".replace(re, (match, fst, snd, offset, str, groups) => {
if (i == 0) {
assertEquals("ab", match);
assertEquals("a", groups.fst);
assertEquals("b", groups.snd);
assertEquals("a", fst);
assertEquals("b", snd);
assertEquals(0, offset);
assertEquals("abcd", str);
} else if (i == 1) {
assertEquals("cd", match);
assertEquals("c", groups.fst);
assertEquals("d", groups.snd);
assertEquals("c", fst);
assertEquals("d", snd);
assertEquals(2, offset);
assertEquals("abcd", str);
} else {
assertUnreachable();
}
i++;
return `${groups.snd}${groups.fst}`;
});
assertEquals("badc", result);
}
// @@replace with a callable replacement argument (slow, non-global,
// named captures).
{
let re = toSlowMode(/(?<fst>.)(?<snd>.)/u);
let result = "abcd".replace(re, (match, fst, snd, offset, str, groups) => {
assertEquals("ab", match);
assertEquals("a", groups.fst);
assertEquals("b", groups.snd);
assertEquals("a", fst);
assertEquals("b", snd);
assertEquals(0, offset);
assertEquals("abcd", str);
return `${groups.snd}${groups.fst}`;
});
assertEquals("bacd", result);
}