[regexp] Store named captures on the regexp result
This implements storing named captures on the regexp result object.
For instance, /(?<a>.)/u.exec("b") will return a result such that:
result.group.a // "b"
The spec proposal is not yet final, so this may still change in the future.
BUG=v8:5437
Review-Url: https://codereview.chromium.org/2630233003
Cr-Original-Commit-Position: refs/heads/master@{#42532}
Committed: 70000946eb
Review-Url: https://codereview.chromium.org/2630233003
Cr-Commit-Position: refs/heads/master@{#42570}
This commit is contained in:
parent
24b9fc3a9a
commit
ee94fa11ed
@ -33,8 +33,9 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
|
||||
void StoreLastIndex(Node* context, Node* regexp, Node* value,
|
||||
bool is_fastpath);
|
||||
|
||||
Node* ConstructNewResultFromMatchInfo(Node* context, Node* match_info,
|
||||
Node* string);
|
||||
Node* ConstructNewResultFromMatchInfo(Node* const context, Node* const regexp,
|
||||
Node* const match_info,
|
||||
Node* const string);
|
||||
|
||||
Node* RegExpPrototypeExecBodyWithoutResult(Node* const context,
|
||||
Node* const regexp,
|
||||
@ -141,10 +142,10 @@ void RegExpBuiltinsAssembler::StoreLastIndex(Node* context, Node* regexp,
|
||||
}
|
||||
}
|
||||
|
||||
Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(Node* context,
|
||||
Node* match_info,
|
||||
Node* string) {
|
||||
Label out(this);
|
||||
Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
|
||||
Node* const context, Node* const regexp, Node* const match_info,
|
||||
Node* const string) {
|
||||
Label named_captures(this), out(this);
|
||||
|
||||
Node* const num_indices = SmiUntag(LoadFixedArrayElement(
|
||||
match_info, RegExpMatchInfo::kNumberOfCapturesIndex));
|
||||
@ -164,7 +165,8 @@ Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(Node* context,
|
||||
|
||||
StoreFixedArrayElement(result_elements, 0, first, SKIP_WRITE_BARRIER);
|
||||
|
||||
GotoIf(SmiEqual(num_results, SmiConstant(Smi::FromInt(1))), &out);
|
||||
// If no captures exist we can skip named capture handling as well.
|
||||
GotoIf(SmiEqual(num_results, SmiConstant(1)), &out);
|
||||
|
||||
// Store all remaining captures.
|
||||
Node* const limit = IntPtrAdd(
|
||||
@ -187,7 +189,7 @@ Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(Node* context,
|
||||
Node* const start = LoadFixedArrayElement(match_info, from_cursor);
|
||||
|
||||
Label next_iter(this);
|
||||
GotoIf(SmiEqual(start, SmiConstant(Smi::FromInt(-1))), &next_iter);
|
||||
GotoIf(SmiEqual(start, SmiConstant(-1)), &next_iter);
|
||||
|
||||
Node* const from_cursor_plus1 = IntPtrAdd(from_cursor, IntPtrConstant(1));
|
||||
Node* const end = LoadFixedArrayElement(match_info, from_cursor_plus1);
|
||||
@ -199,7 +201,85 @@ Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(Node* context,
|
||||
Bind(&next_iter);
|
||||
var_from_cursor.Bind(IntPtrAdd(from_cursor, IntPtrConstant(2)));
|
||||
var_to_cursor.Bind(IntPtrAdd(to_cursor, IntPtrConstant(1)));
|
||||
Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop, &out);
|
||||
Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
|
||||
&named_captures);
|
||||
}
|
||||
|
||||
Bind(&named_captures);
|
||||
{
|
||||
// We reach this point only if captures exist, implying that this is an
|
||||
// IRREGEXP JSRegExp.
|
||||
|
||||
CSA_ASSERT(this, HasInstanceType(regexp, JS_REGEXP_TYPE));
|
||||
CSA_ASSERT(this, SmiGreaterThan(num_results, SmiConstant(1)));
|
||||
|
||||
// Preparations for named capture properties. Exit early if the result does
|
||||
// not have any named captures to minimize performance impact.
|
||||
|
||||
Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
|
||||
CSA_ASSERT(this, SmiEqual(LoadFixedArrayElement(data, JSRegExp::kTagIndex),
|
||||
SmiConstant(JSRegExp::IRREGEXP)));
|
||||
|
||||
// The names fixed array associates names at even indices with a capture
|
||||
// index at odd indices.
|
||||
Node* const names =
|
||||
LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
|
||||
GotoIf(SmiEqual(names, SmiConstant(0)), &out);
|
||||
|
||||
// Allocate a new object to store the named capture properties.
|
||||
// TODO(jgruber): Could be optimized by adding the object map to the heap
|
||||
// root list.
|
||||
|
||||
Node* const native_context = LoadNativeContext(context);
|
||||
Node* const object_function =
|
||||
LoadContextElement(native_context, Context::OBJECT_FUNCTION_INDEX);
|
||||
Node* const object_function_map = LoadObjectField(
|
||||
object_function, JSFunction::kPrototypeOrInitialMapOffset);
|
||||
|
||||
Node* const group_object = AllocateJSObjectFromMap(object_function_map);
|
||||
|
||||
// Store it on the result as a 'group' property.
|
||||
|
||||
{
|
||||
Node* const name = HeapConstant(isolate()->factory()->group_string());
|
||||
Node* const language_mode = SmiConstant(Smi::FromInt(STRICT));
|
||||
CallRuntime(Runtime::kSetProperty, context, result, name, group_object,
|
||||
language_mode);
|
||||
}
|
||||
|
||||
// One or more named captures exist, add a property for each one.
|
||||
|
||||
CSA_ASSERT(this, HasInstanceType(names, FIXED_ARRAY_TYPE));
|
||||
Node* const names_length = LoadAndUntagFixedArrayBaseLength(names);
|
||||
CSA_ASSERT(this, IntPtrGreaterThan(names_length, IntPtrConstant(0)));
|
||||
|
||||
Variable var_i(this, MachineType::PointerRepresentation());
|
||||
var_i.Bind(IntPtrConstant(0));
|
||||
|
||||
Variable* vars[] = {&var_i};
|
||||
const int vars_count = sizeof(vars) / sizeof(vars[0]);
|
||||
Label loop(this, vars_count, vars);
|
||||
|
||||
Goto(&loop);
|
||||
Bind(&loop);
|
||||
{
|
||||
Node* const i = var_i.value();
|
||||
Node* const i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
|
||||
Node* const i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
|
||||
|
||||
Node* const name = LoadFixedArrayElement(names, i);
|
||||
Node* const index = LoadFixedArrayElement(names, i_plus_1);
|
||||
Node* const capture =
|
||||
LoadFixedArrayElement(result_elements, SmiUntag(index));
|
||||
|
||||
Node* const language_mode = SmiConstant(Smi::FromInt(STRICT));
|
||||
CallRuntime(Runtime::kSetProperty, context, group_object, name, capture,
|
||||
language_mode);
|
||||
|
||||
var_i.Bind(i_plus_2);
|
||||
Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length), &out,
|
||||
&loop);
|
||||
}
|
||||
}
|
||||
|
||||
Bind(&out);
|
||||
@ -352,7 +432,7 @@ Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBody(Node* const context,
|
||||
{
|
||||
Node* const match_indices = indices_or_null;
|
||||
Node* const result =
|
||||
ConstructNewResultFromMatchInfo(context, match_indices, string);
|
||||
ConstructNewResultFromMatchInfo(context, regexp, match_indices, string);
|
||||
var_result.Bind(result);
|
||||
Goto(&out);
|
||||
}
|
||||
@ -2522,7 +2602,7 @@ TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) {
|
||||
Bind(&if_matched);
|
||||
{
|
||||
Node* result =
|
||||
ConstructNewResultFromMatchInfo(context, match_indices, string);
|
||||
ConstructNewResultFromMatchInfo(context, regexp, match_indices, string);
|
||||
Return(result);
|
||||
}
|
||||
}
|
||||
|
@ -88,6 +88,7 @@
|
||||
V(get_string, "get") \
|
||||
V(get_space_string, "get ") \
|
||||
V(global_string, "global") \
|
||||
V(group_string, "group") \
|
||||
V(has_string, "has") \
|
||||
V(hour_string, "hour") \
|
||||
V(ignoreCase_string, "ignoreCase") \
|
||||
|
@ -770,6 +770,15 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
|
||||
DCHECK(0 < index && index <= captures_started_);
|
||||
DCHECK_NOT_NULL(name);
|
||||
|
||||
// Disallow captures named '__proto__'.
|
||||
static const char16_t proto_string[] = u"__proto__";
|
||||
if (name->size() == arraysize(proto_string) - 1) {
|
||||
if (std::equal(name->begin(), name->end(), &proto_string[0])) {
|
||||
ReportError(CStrVector("Illegal capture group name"));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (named_captures_ == nullptr) {
|
||||
named_captures_ = new (zone()) ZoneList<RegExpCapture*>(1, zone());
|
||||
} else {
|
||||
|
@ -18,15 +18,15 @@ assertThrows("/(?<ab>a)\\k<a>/u"); // Invalid reference.
|
||||
assertThrows("/\\k<a>(?<ab>a)/u"); // Invalid reference.
|
||||
|
||||
// Fallback behavior in non-unicode mode.
|
||||
assertThrows("/(?<>a)/");
|
||||
assertThrows("/(?<aa)/");
|
||||
assertThrows("/(?<42a>a)/");
|
||||
assertThrows("/(?<:a>a)/");
|
||||
assertThrows("/(?<a:>a)/");
|
||||
assertThrows("/(?<a>a)(?<a>a)/");
|
||||
assertThrows("/(?<a>a)(?<b>b)(?<a>a)/");
|
||||
assertThrows("/(?<a>a)\\k<ab>/");
|
||||
assertThrows("/(?<ab>a)\\k<a>/");
|
||||
assertThrows("/(?<>a)/", SyntaxError);
|
||||
assertThrows("/(?<aa)/", SyntaxError);
|
||||
assertThrows("/(?<42a>a)/", SyntaxError);
|
||||
assertThrows("/(?<:a>a)/", SyntaxError);
|
||||
assertThrows("/(?<a:>a)/", SyntaxError);
|
||||
assertThrows("/(?<a>a)(?<a>a)/", SyntaxError);
|
||||
assertThrows("/(?<a>a)(?<b>b)(?<a>a)/", SyntaxError);
|
||||
assertThrows("/(?<a>a)\\k<ab>/", SyntaxError);
|
||||
assertThrows("/(?<ab>a)\\k<a>/", SyntaxError);
|
||||
|
||||
assertEquals(["k<a>"], "xxxk<a>xxx".match(/\k<a>/));
|
||||
assertEquals(["k<a"], "xxxk<a>xxx".match(/\k<a/));
|
||||
@ -74,3 +74,29 @@ assertEquals(["bab", "b"], "bab".match(/(?<a>\k<a>\w)../u));
|
||||
// Reference before group.
|
||||
assertEquals(["bab", "b"], "bab".match(/\k<a>(?<a>b)\w\k<a>/u));
|
||||
assertEquals(["bab", "b", "a"], "bab".match(/(?<b>b)\k<a>(?<a>a)\k<b>/u));
|
||||
|
||||
// Reference properties.
|
||||
assertEquals("a", /(?<a>a)(?<b>b)\k<a>/u.exec("aba").group.a);
|
||||
assertEquals("b", /(?<a>a)(?<b>b)\k<a>/u.exec("aba").group.b);
|
||||
assertEquals(undefined, /(?<a>a)(?<b>b)\k<a>/u.exec("aba").group.c);
|
||||
assertFalse(/(?<a>a)(?<b>b)\k<a>/u.exec("aba").group.hasOwnProperty("c"));
|
||||
assertEquals(undefined, /(?<a>a)(?<b>b)\k<a>|(?<c>c)/u.exec("aba").group.c);
|
||||
assertTrue(/(?<a>a)(?<b>b)\k<a>|(?<c>c)/u
|
||||
.exec("aba").group.hasOwnProperty("c"));
|
||||
|
||||
// Unicode names.
|
||||
assertEquals("a", /(?<π>a)/u.exec("bab").group.π);
|
||||
assertEquals("a", /(?<\u{03C0}>a)/u.exec("bab").group.\u03C0);
|
||||
assertEquals("a", /(?<$>a)/u.exec("bab").group.$);
|
||||
assertEquals("a", /(?<_>a)/u.exec("bab").group._);
|
||||
assertEquals("a", /(?<$𐒤>a)/u.exec("bab").group.$𐒤);
|
||||
assertEquals("a", /(?<_\u200C>a)/u.exec("bab").group._\u200C);
|
||||
assertEquals("a", /(?<_\u200D>a)/u.exec("bab").group._\u200D);
|
||||
assertEquals("a", /(?<ಠ_ಠ>a)/u.exec("bab").group.ಠ_ಠ);
|
||||
assertThrows('/(?<❤>a)/u', SyntaxError);
|
||||
assertThrows('/(?<𐒤>a)/u', SyntaxError); // ID_Continue but not ID_Start.
|
||||
|
||||
// Capture name conflicts.
|
||||
assertThrows(() => /(?<__proto__>a)/u, SyntaxError);
|
||||
assertEquals("a", /(?<__proto_>a)/u.exec("a").group.__proto_);
|
||||
assertEquals("a", /(?<__proto___>a)/u.exec("a").group.__proto___);
|
||||
|
@ -156,6 +156,9 @@
|
||||
# desugaring regexp property class relies on ICU.
|
||||
'harmony/regexp-property-*': [PASS, ['no_i18n == True', FAIL]],
|
||||
|
||||
# noi18n build cannot parse characters in supplementary plane.
|
||||
'harmony/regexp-named-captures': [PASS, ['no_i18n == True', FAIL]],
|
||||
|
||||
# Allocates a large array buffer, which TSAN sometimes cannot handle.
|
||||
'regress/regress-599717': [PASS, ['tsan', SKIP]],
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user