[regexp] Support string disjunctions in unicode set mode

Add support for string disjunctions within regular expression character
classes in unicode sets mode (/v).

Bug: v8:11935
Change-Id: Ida607123ced11c4dc3dfc687996f6abffeb6eeff
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4051243
Commit-Queue: Patrick Thier <pthier@chromium.org>
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84480}
This commit is contained in:
pthier 2022-11-25 10:05:04 +01:00 committed by V8 LUCI CQ
parent 1211605a39
commit 5d7782f694
8 changed files with 587 additions and 274 deletions

View File

@ -69,6 +69,11 @@ class CanBeHandledVisitor final : private RegExpVisitor {
return nullptr;
}
void* VisitClassSetOperand(RegExpClassSetOperand* node, void*) override {
result_ = !node->has_strings();
return nullptr;
}
void* VisitClassSetExpression(RegExpClassSetExpression* node,
void*) override {
result_ = false;
@ -391,11 +396,10 @@ class CompileVisitor : private RegExpVisitor {
return nullptr;
}
void* VisitClassRanges(RegExpClassRanges* node, void*) override {
void CompileCharacterRanges(ZoneList<CharacterRange>* ranges, bool negated) {
// A character class is compiled as Disjunction over its `CharacterRange`s.
ZoneList<CharacterRange>* ranges = node->ranges(zone_);
CharacterRange::Canonicalize(ranges);
if (node->is_negated()) {
if (negated) {
// The complement of a disjoint, non-adjacent (i.e. `Canonicalize`d)
// union of k intervals is a union of at most k + 1 intervals.
ZoneList<CharacterRange>* negated =
@ -422,6 +426,17 @@ class CompileVisitor : private RegExpVisitor {
assembler_.ConsumeRange(from_uc16, to_uc16);
});
}
void* VisitClassRanges(RegExpClassRanges* node, void*) override {
CompileCharacterRanges(node->ranges(zone_), node->is_negated());
return nullptr;
}
void* VisitClassSetOperand(RegExpClassSetOperand* node, void*) override {
// TODO(v8:11935): Support strings.
DCHECK(!node->has_strings());
CompileCharacterRanges(node->ranges(), false);
return nullptr;
}

View File

@ -193,6 +193,22 @@ void* RegExpUnparser::VisitClassRanges(RegExpClassRanges* that, void* data) {
return nullptr;
}
void* RegExpUnparser::VisitClassSetOperand(RegExpClassSetOperand* that,
void* data) {
os_ << "![";
for (int i = 0; i < that->ranges()->length(); i++) {
if (i > 0) os_ << " ";
VisitCharacterRange(that->ranges()->at(i));
}
for (auto iter : *that->strings()) {
os_ << " '";
os_ << std::string(iter.first.begin(), iter.first.end());
os_ << "'";
}
os_ << "]";
return nullptr;
}
void* RegExpUnparser::VisitClassSetExpression(RegExpClassSetExpression* that,
void* data) {
switch (that->operation()) {
@ -362,6 +378,37 @@ RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
}
}
RegExpClassSetOperand::RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings)
: ranges_(ranges), strings_(strings) {
DCHECK_NOT_NULL(ranges);
DCHECK_NOT_NULL(strings);
min_match_ = 0;
max_match_ = 0;
if (!ranges->is_empty()) {
min_match_ = 1;
max_match_ = 2;
}
for (auto string : *strings) {
min_match_ = std::min(min_match_, string.second->min_match());
max_match_ = std::max(max_match_, string.second->max_match());
}
}
RegExpClassSetExpression::RegExpClassSetExpression(
OperationType op, bool is_negated, bool may_contain_strings,
ZoneList<RegExpTree*>* operands)
: operation_(op),
is_negated_(is_negated),
may_contain_strings_(may_contain_strings),
operands_(operands) {
DCHECK_NOT_NULL(operands);
DCHECK_IMPLIES(is_negated_, !may_contain_strings_);
max_match_ = 0;
for (auto op : *operands) {
max_match_ = std::max(max_match_, op->max_match());
}
}
} // namespace internal
} // namespace v8

View File

@ -22,6 +22,7 @@ namespace internal {
VISIT(Alternative) \
VISIT(Assertion) \
VISIT(ClassRanges) \
VISIT(ClassSetOperand) \
VISIT(ClassSetExpression) \
VISIT(Atom) \
VISIT(Quantifier) \
@ -365,45 +366,101 @@ class RegExpClassRanges final : public RegExpTree {
ClassRangesFlags class_ranges_flags_;
};
struct CharacterClassStringLess {
bool operator()(const base::Vector<const base::uc32>& lhs,
const base::Vector<const base::uc32>& rhs) const {
// Longer strings first so we generate matches for the largest string
// possible.
if (lhs.length() != rhs.length()) {
return lhs.length() > rhs.length();
}
for (int i = 0; i < lhs.length(); i++) {
if (lhs[i] != rhs[i]) {
return lhs[i] < rhs[i];
}
}
return false;
}
};
// A type used for strings as part of character classes (only possible in
// unicode sets mode).
// We use a ZoneMap instead of an UnorderedZoneMap because we need to match
// the longest alternatives first. By using a ZoneMap with the custom comparator
// we can avoid sorting before assembling the code.
// Strings are likely short (the largest string in current unicode properties
// consists of 10 code points).
using CharacterClassStrings = ZoneMap<base::Vector<const base::uc32>,
RegExpTree*, CharacterClassStringLess>;
// TODO(pthier): If we are sure we don't want to use icu::UnicodeSets
// (performance evaluation pending), this class can be merged with
// RegExpClassRanges.
class RegExpClassSetOperand final : public RegExpTree {
public:
RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings);
DECL_BOILERPLATE(ClassSetOperand);
bool IsTextElement() override { return true; }
int min_match() override { return min_match_; }
int max_match() override { return max_match_; }
void Union(RegExpClassSetOperand* other, Zone* zone);
void Intersect(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges, Zone* zone);
void Subtract(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges, Zone* zone);
bool has_strings() const { return !strings_->empty(); }
ZoneList<CharacterRange>* ranges() { return ranges_; }
CharacterClassStrings* strings() { return strings_; }
private:
ZoneList<CharacterRange>* ranges_;
CharacterClassStrings* strings_;
int min_match_;
int max_match_;
};
class RegExpClassSetExpression final : public RegExpTree {
public:
enum class OperationType { kUnion, kIntersection, kSubtraction };
RegExpClassSetExpression(OperationType op, bool is_negated,
ZoneList<RegExpTree*>* operands)
: operation_(op), is_negated_(is_negated), operands_(operands) {}
bool may_contain_strings,
ZoneList<RegExpTree*>* operands);
DECL_BOILERPLATE(ClassSetExpression);
bool IsTextElement() override { return true; }
// At least 1 character is consumed.
int min_match() override { return 1; }
// Up to two code points might be consumed.
int max_match() override { return 2; }
int min_match() override { return 0; }
int max_match() override { return max_match_; }
OperationType operation() const { return operation_; }
bool is_negated() const { return is_negated_; }
bool may_contain_strings() const { return may_contain_strings_; }
const ZoneList<RegExpTree*>* operands() const { return operands_; }
ZoneList<RegExpTree*>* operands() { return operands_; }
private:
RegExpClassRanges* ToCharacterClass(Zone* zone);
// Recursively evaluates the tree rooted at |root|, computing the valid
// CharacterRanges after applying all set operations and storing the result in
// |result_ranges|. |temp_ranges| is list used for intermediate results,
// passed as parameter to avoid allocating new lists all the time.
static void ComputeCharacterRanges(RegExpTree* root,
ZoneList<CharacterRange>* result_ranges,
ZoneList<CharacterRange>* temp_ranges,
Zone* zone);
// CharacterRanges and strings after applying all set operations.
// The original tree will be modified by this method, so don't store pointers
// to inner nodes of the tree somewhere else!
// Modifying the tree in-place saves memory and speeds up multiple calls of
// the method (e.g. when unrolling quantifiers).
// |temp_ranges| is used for intermediate results, passed as parameter to
// avoid allocating new lists all the time.
static RegExpClassSetOperand* ComputeExpression(
RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone);
const OperationType operation_;
const bool is_negated_;
const bool may_contain_strings_;
ZoneList<RegExpTree*>* operands_ = nullptr;
#ifdef ENABLE_SLOW_DCHECKS
// Cache ranges for each node during computation for (slow) DCHECKs.
ZoneList<CharacterRange>* ranges_ = nullptr;
#endif
int max_match_;
};
class RegExpAtom final : public RegExpTree {

View File

@ -535,9 +535,145 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
return result;
}
RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
Zone* zone = compiler->zone();
const int size = (has_strings() ? static_cast<int>(strings()->size()) : 0) +
(ranges()->is_empty() ? 0 : 1);
if (size == 0) {
// If neither ranges nor strings are present, the operand is equal to an
// empty range (matching nothing).
ZoneList<CharacterRange>* empty =
zone->template New<ZoneList<CharacterRange>>(0, zone);
return zone->template New<RegExpClassRanges>(zone, empty)
->ToNode(compiler, on_success);
}
ZoneList<RegExpTree*>* alternatives =
zone->template New<ZoneList<RegExpTree*>>(size, zone);
// Strings are sorted by length first (larger strings before shorter ones).
// See the comment on CharacterClassStrings.
// Empty strings (if present) are added after character ranges.
RegExpTree* empty_string = nullptr;
if (has_strings()) {
for (auto string : *strings()) {
if (string.second->IsEmpty()) {
empty_string = string.second;
} else {
alternatives->Add(string.second, zone);
}
}
}
if (!ranges()->is_empty()) {
alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
zone);
}
if (empty_string != nullptr) {
alternatives->Add(empty_string, zone);
}
RegExpTree* node = nullptr;
if (size == 1) {
DCHECK_EQ(alternatives->length(), 1);
node = alternatives->first();
} else {
node = zone->template New<RegExpDisjunction>(alternatives);
}
return node->ToNode(compiler, on_success);
}
RegExpNode* RegExpClassSetExpression::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
return ToCharacterClass(compiler->zone())->ToNode(compiler, on_success);
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* temp_ranges =
zone->template New<ZoneList<CharacterRange>>(4, zone);
RegExpClassSetOperand* root = ComputeExpression(this, temp_ranges, zone);
return root->ToNode(compiler, on_success);
}
void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) {
ranges()->AddAll(*other->ranges(), zone);
strings()->insert(other->strings()->begin(), other->strings()->end());
}
void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges,
Zone* zone) {
CharacterRange::Intersect(ranges(), other->ranges(), temp_ranges, zone);
std::swap(*ranges(), *temp_ranges);
temp_ranges->Rewind(0);
for (auto iter = strings()->begin(); iter != strings()->end();) {
if (other->strings()->find(iter->first) == other->strings()->end()) {
iter = strings()->erase(iter);
} else {
iter++;
}
}
}
void RegExpClassSetOperand::Subtract(RegExpClassSetOperand* other,
ZoneList<CharacterRange>* temp_ranges,
Zone* zone) {
CharacterRange::Subtract(ranges(), other->ranges(), temp_ranges, zone);
std::swap(*ranges(), *temp_ranges);
temp_ranges->Rewind(0);
for (auto iter = strings()->begin(); iter != strings()->end();) {
if (other->strings()->find(iter->first) != other->strings()->end()) {
iter = strings()->erase(iter);
} else {
iter++;
}
}
}
// static
RegExpClassSetOperand* RegExpClassSetExpression::ComputeExpression(
RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone) {
DCHECK(temp_ranges->is_empty());
if (root->IsClassSetOperand()) {
return root->AsClassSetOperand();
}
DCHECK(root->IsClassSetExpression());
RegExpClassSetExpression* node = root->AsClassSetExpression();
RegExpClassSetOperand* result =
ComputeExpression(node->operands()->at(0), temp_ranges, zone);
switch (node->operation()) {
case OperationType::kUnion: {
for (int i = 1; i < node->operands()->length(); i++) {
RegExpClassSetOperand* op =
ComputeExpression(node->operands()->at(i), temp_ranges, zone);
result->Union(op, zone);
}
CharacterRange::Canonicalize(result->ranges());
break;
}
case OperationType::kIntersection: {
for (int i = 1; i < node->operands()->length(); i++) {
RegExpClassSetOperand* op =
ComputeExpression(node->operands()->at(i), temp_ranges, zone);
result->Intersect(op, temp_ranges, zone);
}
break;
}
case OperationType::kSubtraction: {
for (int i = 1; i < node->operands()->length(); i++) {
RegExpClassSetOperand* op =
ComputeExpression(node->operands()->at(i), temp_ranges, zone);
result->Subtract(op, temp_ranges, zone);
}
break;
}
}
if (node->is_negated()) {
DCHECK(!result->has_strings());
CharacterRange::Negate(result->ranges(), temp_ranges, zone);
std::swap(*result->ranges(), *temp_ranges);
temp_ranges->Rewind(0);
}
// Store the result as single operand of the current node.
node->operands()->Set(0, result);
node->operands()->Rewind(1);
return result;
}
namespace {
@ -1498,128 +1634,6 @@ void CharacterSet::Canonicalize() {
CharacterRange::Canonicalize(ranges_);
}
RegExpClassRanges* RegExpClassSetExpression::ToCharacterClass(Zone* zone) {
ZoneList<CharacterRange>* result_ranges =
zone->template New<ZoneList<CharacterRange>>(2, zone);
ZoneList<CharacterRange>* temp_ranges =
zone->template New<ZoneList<CharacterRange>>(2, zone);
ComputeCharacterRanges(this, result_ranges, temp_ranges, zone);
return zone->template New<RegExpClassRanges>(zone, result_ranges);
}
// static
void RegExpClassSetExpression::ComputeCharacterRanges(
RegExpTree* root, ZoneList<CharacterRange>* result_ranges,
ZoneList<CharacterRange>* temp_ranges, Zone* zone) {
DCHECK_EQ(temp_ranges->length(), 0);
DCHECK(root->IsClassRanges() || root->IsClassSetExpression());
if (root->IsClassRanges()) {
DCHECK(!root->AsClassRanges()->is_negated());
ZoneList<CharacterRange>* ranges = root->AsClassRanges()->ranges(zone);
CharacterRange::Canonicalize(ranges);
result_ranges->AddAll(*ranges, zone);
return;
}
RegExpClassSetExpression* node = root->AsClassSetExpression();
switch (node->operation()) {
case OperationType::kUnion: {
ZoneList<CharacterRange>* op_ranges =
zone->template New<ZoneList<CharacterRange>>(2, zone);
for (int i = 0; i < node->operands()->length(); i++) {
RegExpTree* op = node->operands()->at(i);
ComputeCharacterRanges(op, op_ranges, temp_ranges, zone);
result_ranges->AddAll(*op_ranges, zone);
op_ranges->Rewind(0);
}
CharacterRange::Canonicalize(result_ranges);
break;
}
case OperationType::kIntersection: {
ZoneList<CharacterRange>* op_ranges =
zone->template New<ZoneList<CharacterRange>>(2, zone);
ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
zone);
result_ranges->AddAll(*op_ranges, zone);
op_ranges->Rewind(0);
for (int i = 1; i < node->operands()->length(); i++) {
ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
zone);
CharacterRange::Intersect(result_ranges, op_ranges, temp_ranges, zone);
std::swap(*result_ranges, *temp_ranges);
temp_ranges->Rewind(0);
op_ranges->Rewind(0);
}
break;
}
case OperationType::kSubtraction: {
ZoneList<CharacterRange>* op_ranges =
zone->template New<ZoneList<CharacterRange>>(2, zone);
ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
zone);
result_ranges->AddAll(*op_ranges, zone);
op_ranges->Rewind(0);
for (int i = 1; i < node->operands()->length(); i++) {
ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
zone);
CharacterRange::Subtract(result_ranges, op_ranges, temp_ranges, zone);
std::swap(*result_ranges, *temp_ranges);
temp_ranges->Rewind(0);
op_ranges->Rewind(0);
}
#ifdef ENABLE_SLOW_DCHECKS
// Check that the result is equal to subtracting the union of all RHS
// operands from the LHS operand.
// TODO(pthier): It is unclear whether this variant is faster or slower
// than subtracting multiple ranges in practice.
ZoneList<CharacterRange>* lhs_range =
node->operands()->at(0)->IsClassRanges()
? node->operands()->at(0)->AsClassRanges()->ranges(zone)
: node->operands()->at(0)->AsClassSetExpression()->ranges_;
ZoneList<CharacterRange>* rhs_union =
zone->template New<ZoneList<CharacterRange>>(2, zone);
for (int i = 1; i < node->operands()->length(); i++) {
ZoneList<CharacterRange>* op_range =
node->operands()->at(i)->IsClassRanges()
? node->operands()->at(i)->AsClassRanges()->ranges(zone)
: node->operands()->at(i)->AsClassSetExpression()->ranges_;
rhs_union->AddAll(*op_range, zone);
}
CharacterRange::Canonicalize(rhs_union);
ZoneList<CharacterRange>* ranges_check =
zone->template New<ZoneList<CharacterRange>>(2, zone);
CharacterRange::Subtract(lhs_range, rhs_union, ranges_check, zone);
DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
// Check that the result is equal to intersecting the LHS operand with the
// complemented union of all RHS operands
ZoneList<CharacterRange>* rhs_union_negated =
zone->template New<ZoneList<CharacterRange>>(rhs_union->length(),
zone);
CharacterRange::Negate(rhs_union, rhs_union_negated, zone);
ranges_check->Rewind(0);
CharacterRange::Intersect(lhs_range, rhs_union_negated, ranges_check,
zone);
DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
#endif
break;
}
}
if (node->is_negated()) {
CharacterRange::Negate(result_ranges, temp_ranges, zone);
std::swap(*result_ranges, *temp_ranges);
temp_ranges->Rewind(0);
}
DCHECK_EQ(temp_ranges->length(), 0);
#ifdef ENABLE_SLOW_DCHECKS
// Cache results for DCHECKs.
node->ranges_ =
zone->template New<ZoneList<CharacterRange>>(*result_ranges, zone);
#endif
}
// static
void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
if (character_ranges->length() <= 1) return;
@ -1740,6 +1754,9 @@ void CharacterRange::Subtract(const ZoneList<CharacterRange>* src,
DCHECK(CharacterRange::IsCanonical(src));
DCHECK(CharacterRange::IsCanonical(to_remove));
DCHECK_EQ(0, result->length());
if (src->is_empty()) return;
int src_index = 0;
int to_remove_index = 0;
base::uc32 from = src->at(src_index).from();

View File

@ -44,7 +44,9 @@ namespace internal {
T(UnterminatedCharacterClass, "Unterminated character class") \
T(OutOfOrderCharacterClass, "Range out of order in character class") \
T(InvalidClassSetOperation, "Invalid set operation in character class") \
T(InvalidCharacterInClass, "Invalid character in character class")
T(InvalidCharacterInClass, "Invalid character in character class") \
T(NegatedCharacterClassWithStrings, \
"Negated character class may contain strings")
enum class RegExpError : uint32_t {
#define TEMPLATE(NAME, STRING) k##NAME,

View File

@ -18,6 +18,8 @@
#ifdef V8_INTL_SUPPORT
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/usetiter.h"
#endif // V8_INTL_SUPPORT
namespace v8 {
@ -62,6 +64,7 @@ class RegExpTextBuilder {
void FlushPendingSurrogate();
void FlushText();
RegExpTree* PopLastAtom();
RegExpTree* ToRegExp();
private:
static const base::uc16 kNoPendingSurrogate = 0;
@ -280,6 +283,15 @@ RegExpTree* RegExpTextBuilder::PopLastAtom() {
return nullptr;
}
RegExpTree* RegExpTextBuilder::ToRegExp() {
FlushText();
size_t num_alternatives = terms_->size();
if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
if (num_alternatives == 1) return terms_->back();
return zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>(
base::VectorOf(terms_->begin(), terms_->size()), zone()));
}
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder {
public:
@ -455,12 +467,15 @@ class RegExpParserImpl final {
ZoneList<CharacterRange>* ranges,
Zone* zone,
bool add_unicode_case_equivalents);
RegExpTree* ParseClassStringDisjunction();
RegExpTree* ParseClassStringDisjunction(ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings);
RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
ClassSetOperandType* type_out);
RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
ClassSetOperandType* type_out,
ZoneList<CharacterRange>* ranges);
ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings);
base::uc32 ParseClassSetCharacter();
// Parses and returns a single escaped character.
base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state,
bool* is_escaped_unicode_character);
@ -468,12 +483,14 @@ class RegExpParserImpl final {
RegExpTree* ParseClassUnion(const RegExpBuilder* builder, bool is_negated,
RegExpTree* first_operand,
ClassSetOperandType first_operand_type,
ZoneList<CharacterRange>* ranges);
ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings);
RegExpTree* ParseClassIntersection(const RegExpBuilder* builder,
bool is_negated,
RegExpTree* first_operand);
bool is_negated, RegExpTree* first_operand,
ClassSetOperandType first_operand_type);
RegExpTree* ParseClassSubtraction(const RegExpBuilder* builder,
bool is_negated, RegExpTree* first_operand);
bool is_negated, RegExpTree* first_operand,
ClassSetOperandType first_operand_type);
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
base::uc32 ParseOctalLiteral();
@ -498,15 +515,15 @@ class RegExpParserImpl final {
int captures_started() const { return captures_started_; }
int position() const { return next_pos_ - 1; }
bool failed() const { return failed_; }
RegExpFlags flags() const { return top_level_flags_; }
bool IsUnicodeMode() const {
// Either /v or /u enable UnicodeMode
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
return IsUnicode(top_level_flags_) || IsUnicodeSets(top_level_flags_) ||
force_unicode_;
return IsUnicode(flags()) || IsUnicodeSets(flags()) || force_unicode_;
}
bool unicode_sets() const { return IsUnicodeSets(top_level_flags_); }
bool ignore_case() const { return IsIgnoreCase(top_level_flags_); }
bool unicode_sets() const { return IsUnicodeSets(flags()); }
bool ignore_case() const { return IsIgnoreCase(flags()); }
static bool IsSyntaxCharacterOrSlash(base::uc32 c);
static bool IsClassSetSyntaxCharacter(base::uc32 c);
@ -869,7 +886,7 @@ template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
0, nullptr, top_level_flags_, zone());
0, nullptr, flags(), zone());
RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
@ -2377,10 +2394,27 @@ bool RegExpParserImpl<CharT>::TryParseCharacterClassEscape(
}
}
namespace {
// Add |string| to |ranges| if length of |string| == 1, otherwise add |string|
// to |strings|.
void AddClassString(ZoneList<base::uc32>* normalized_string,
RegExpTree* regexp_string, ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings, Zone* zone) {
if (normalized_string->length() == 1) {
ranges->Add(CharacterRange::Singleton(normalized_string->at(0)), zone);
} else {
strings->emplace(normalized_string->ToVector(), regexp_string);
}
}
} // namespace
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassStringDisjunction
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction() {
RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
DCHECK(unicode_sets());
DCHECK_EQ(current(), '\\');
DCHECK_EQ(Next(), 'q');
@ -2391,73 +2425,98 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction() {
}
Advance();
// TODO(pthier, v8:11935): Implement.
return ReportError(RegExpError::kInvalidCharacterClass);
ZoneList<base::uc32>* string =
zone()->template New<ZoneList<base::uc32>>(4, zone());
RegExpTextBuilder::SmallRegExpTreeVector string_storage(
ZoneAllocator<RegExpTree*>{zone()});
RegExpTextBuilder string_builder(zone(), &string_storage, flags());
while (has_more() && current() != '}') {
if (current() == '|') {
AddClassString(string, string_builder.ToRegExp(), ranges, strings,
zone());
string = zone()->template New<ZoneList<base::uc32>>(4, zone());
string_storage.clear();
Advance();
} else {
base::uc32 c = ParseClassSetCharacter(CHECK_FAILED);
if (ignore_case()) {
#ifdef V8_INTL_SUPPORT
c = u_foldCase(c, U_FOLD_CASE_DEFAULT);
#else
c = AsciiAlphaToLower(c);
#endif
}
string->Add(c, zone());
string_builder.AddUnicodeCharacter(c);
}
}
AddClassString(string, string_builder.ToRegExp(), ranges, strings, zone());
// We don't need to handle missing closing '}' here.
// If the character class is correctly closed, ParseClassSetCharacter will
// report an error.
DCHECK_EQ(current(), '}');
Advance();
return nullptr;
}
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
// Tree returned based on type_out:
// * kClassStringDisjunction: RegExpAlternative | RegExpAtom
// * kNestedClass: RegExpClassSetExpression
// * For all other types: RegExpClassRanges
// * For all other types: RegExpClassSetOperand
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
const RegExpBuilder* builder, ClassSetOperandType* type_out) {
ZoneList<CharacterRange>* ranges =
zone()->template New<ZoneList<CharacterRange>>(1, zone());
CharacterClassStrings* strings =
zone()->template New<CharacterClassStrings>(zone());
RegExpTree* tree =
ParseClassSetOperand(builder, type_out, ranges CHECK_FAILED);
ParseClassSetOperand(builder, type_out, ranges, strings CHECK_FAILED);
DCHECK_IMPLIES(*type_out != ClassSetOperandType::kNestedClass,
tree == nullptr);
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
ranges->length() == 1);
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
tree == nullptr);
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kCharacterClassEscape,
!ranges->is_empty());
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kCharacterClassEscape,
tree == nullptr);
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassStringDisjunction,
ranges->is_empty());
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassStringDisjunction,
tree->IsAtom() || tree->IsAlternative());
strings->empty());
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
ranges->is_empty());
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
strings->empty());
DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
tree->IsClassSetExpression());
// ClassSetRange is only used within ClassSetUnion().
DCHECK_NE(*type_out, ClassSetOperandType::kClassSetRange);
// There are no restrictions for kCharacterClassEscape.
// CharacterClassEscape includes \p{}, which can contain ranges, strings or
// both and \P{}, which could contain nothing (i.e. \P{Any}).
if (tree == nullptr) {
tree = zone()->template New<RegExpClassRanges>(zone(), ranges);
tree = zone()->template New<RegExpClassSetOperand>(ranges, strings);
}
return tree;
}
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
// Based on |type_out| either a tree is returned or ranges modifed (never both).
// Tree returned based on type_out:
// * kClassStringDisjunction: RegExpAlternative | RegExpAtom
// * kNestedClass: RegExpClassSetExpression
// For all other types, ranges is modified and nullptr is returned.
// Based on |type_out| either a tree is returned or ranges/strings modified.
// If a tree is returned, ranges/strings are not modified.
// If |type_out| is kNestedClass, a tree of type RegExpClassSetExpression is
// returned. For all other types, ranges is modified and nullptr is returned.
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
const RegExpBuilder* builder, ClassSetOperandType* type_out,
ZoneList<CharacterRange>* ranges) {
ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
DCHECK(unicode_sets());
const base::uc32 c = current();
base::uc32 c = current();
if (c == '\\') {
base::uc32 next = Next();
switch (next) {
case 'b':
*type_out = ClassSetOperandType::kClassSetCharacter;
ranges->Add(CharacterRange::Singleton('\b'), zone());
Advance(2);
return nullptr;
case 'q':
*type_out = ClassSetOperandType::kClassStringDisjunction;
return ParseClassStringDisjunction();
case kEndMarker:
return ReportError(RegExpError::kEscapeAtEndOfPattern);
const base::uc32 next = Next();
if (next == 'q') {
*type_out = ClassSetOperandType::kClassStringDisjunction;
ParseClassStringDisjunction(ranges, strings CHECK_FAILED);
return nullptr;
}
static constexpr InClassEscapeState kInClassEscape =
InClassEscapeState::kInClass;
@ -2467,44 +2526,86 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
*type_out = ClassSetOperandType::kCharacterClassEscape;
return nullptr;
}
bool dummy = false; // Unused.
base::uc32 escaped_char = ParseCharacterEscape(kInClassEscape, &dummy);
*type_out = ClassSetOperandType::kClassSetCharacter;
ranges->Add(CharacterRange::Singleton(escaped_char), zone());
return nullptr;
}
if (c == '[') {
*type_out = ClassSetOperandType::kNestedClass;
return ParseCharacterClass(builder);
}
if (IsClassSetSyntaxCharacter(c)) {
return ReportError(RegExpError::kInvalidCharacterInClass);
}
if (IsClassSetReservedDoublePunctuator(c)) {
return ReportError(RegExpError::kInvalidClassSetOperation);
}
*type_out = ClassSetOperandType::kClassSetCharacter;
c = ParseClassSetCharacter(CHECK_FAILED);
ranges->Add(CharacterRange::Singleton(c), zone());
Advance();
return nullptr;
}
template <class CharT>
base::uc32 RegExpParserImpl<CharT>::ParseClassSetCharacter() {
DCHECK(unicode_sets());
const base::uc32 c = current();
if (c == '\\') {
const base::uc32 next = Next();
switch (next) {
case 'b':
Advance(2);
return '\b';
case kEndMarker:
ReportError(RegExpError::kEscapeAtEndOfPattern);
return 0;
}
static constexpr InClassEscapeState kInClassEscape =
InClassEscapeState::kInClass;
bool dummy = false; // Unused.
return ParseCharacterEscape(kInClassEscape, &dummy);
}
if (IsClassSetSyntaxCharacter(c)) {
ReportError(RegExpError::kInvalidCharacterInClass);
return 0;
}
if (IsClassSetReservedDoublePunctuator(c)) {
ReportError(RegExpError::kInvalidClassSetOperation);
return 0;
}
Advance();
return c;
}
namespace {
bool MayContainStrings(ClassSetOperandType type, RegExpTree* operand) {
switch (type) {
case ClassSetOperandType::kClassSetCharacter:
case ClassSetOperandType::kClassSetRange:
return false;
case ClassSetOperandType::kCharacterClassEscape:
case ClassSetOperandType::kClassStringDisjunction:
return operand->AsClassSetOperand()->has_strings();
case ClassSetOperandType::kNestedClass:
if (operand->IsClassRanges()) return false;
return operand->AsClassSetExpression()->may_contain_strings();
}
}
} // namespace
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassUnion
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges) {
ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges,
CharacterClassStrings* strings) {
DCHECK(unicode_sets());
ZoneList<RegExpTree*>* operands =
zone()->template New<ZoneList<RegExpTree*>>(2, zone());
bool may_contain_strings = false;
// Add the lhs to operands if necessary.
// Either the lhs values were added to |ranges| (in which case |first_operand|
// is null), or the lhs was evaluated to a tree and passed as |first_operand|
// (in which case |ranges| are empty).
DCHECK_EQ(first_operand != nullptr, ranges->is_empty());
// Either the lhs values were added to |ranges|/|strings| (in which case
// |first_operand| is nullptr), or the lhs was evaluated to a tree and passed
// as |first_operand| (in which case |ranges| and |strings| are empty).
if (first_operand != nullptr) {
may_contain_strings = MayContainStrings(first_operand_type, first_operand);
operands->Add(first_operand, zone());
}
ClassSetOperandType last_type = first_operand_type;
@ -2531,7 +2632,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
if (last_type != ClassSetOperandType::kClassSetCharacter) {
return ReportError(RegExpError::kInvalidCharacterClass);
}
ParseClassSetOperand(builder, &last_type, ranges CHECK_FAILED);
ParseClassSetOperand(builder, &last_type, ranges, strings CHECK_FAILED);
if (last_type != ClassSetOperandType::kClassSetCharacter) {
return ReportError(RegExpError::kInvalidCharacterClass);
}
@ -2550,18 +2651,22 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
last_type = ClassSetOperandType::kClassSetRange;
} else {
DCHECK_NE(current(), '-');
RegExpTree* operand =
ParseClassSetOperand(builder, &last_type, ranges CHECK_FAILED);
RegExpTree* operand = ParseClassSetOperand(builder, &last_type, ranges,
strings CHECK_FAILED);
if (operand != nullptr) {
may_contain_strings |= MayContainStrings(last_type, operand);
// Add the range we started building as operand and reset the current
// range.
if (!ranges->is_empty()) {
if (!ranges->is_empty() || !strings->empty()) {
if (needs_case_folding) {
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
}
operands->Add(zone()->template New<RegExpClassRanges>(zone(), ranges),
zone());
may_contain_strings |= !strings->empty();
operands->Add(
zone()->template New<RegExpClassSetOperand>(ranges, strings),
zone());
ranges = zone()->template New<ZoneList<CharacterRange>>(2, zone());
strings = zone()->template New<CharacterClassStrings>(zone());
}
operands->Add(operand, zone());
}
@ -2573,26 +2678,37 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
}
// Add the range we started building as operand.
if (!ranges->is_empty()) {
if (!ranges->is_empty() || !strings->empty()) {
if (needs_case_folding) {
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
}
operands->Add(zone()->template New<RegExpClassRanges>(zone(), ranges),
may_contain_strings |= !strings->empty();
operands->Add(zone()->template New<RegExpClassSetOperand>(ranges, strings),
zone());
}
DCHECK_EQ(current(), ']');
Advance();
if (is_negated && may_contain_strings) {
return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
}
return zone()->template New<RegExpClassSetExpression>(
RegExpClassSetExpression::OperationType::kUnion, is_negated, operands);
RegExpClassSetExpression::OperationType::kUnion, is_negated,
may_contain_strings, operands);
}
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassIntersection
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand) {
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
ClassSetOperandType first_operand_type) {
DCHECK(unicode_sets());
DCHECK(current() == '&' && Next() == '&');
bool may_contain_strings =
MayContainStrings(first_operand_type, first_operand);
ZoneList<RegExpTree*>* operands =
zone()->template New<ZoneList<RegExpTree*>>(2, zone());
operands->Add(first_operand, zone());
@ -2606,27 +2722,38 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
return ReportError(RegExpError::kInvalidCharacterInClass);
}
ClassSetOperandType dummy; // unused
RegExpTree* operand = ParseClassSetOperand(builder, &dummy CHECK_FAILED);
ClassSetOperandType operand_type;
RegExpTree* operand =
ParseClassSetOperand(builder, &operand_type CHECK_FAILED);
may_contain_strings &= MayContainStrings(operand_type, operand);
operands->Add(operand, zone());
}
if (!has_more()) {
return ReportError(RegExpError::kUnterminatedCharacterClass);
}
if (is_negated && may_contain_strings) {
return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
}
DCHECK_EQ(current(), ']');
Advance();
return zone()->template New<RegExpClassSetExpression>(
RegExpClassSetExpression::OperationType::kIntersection, is_negated,
operands);
may_contain_strings, operands);
}
// TODO(v8:11935): Change permalink once proposal is in stage 4.
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSubtraction
template <class CharT>
RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand) {
const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
ClassSetOperandType first_operand_type) {
DCHECK(unicode_sets());
DCHECK(current() == '-' && Next() == '-');
const bool may_contain_strings =
MayContainStrings(first_operand_type, first_operand);
if (is_negated && may_contain_strings) {
return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
}
ZoneList<RegExpTree*>* operands =
zone()->template New<ZoneList<RegExpTree*>>(2, zone());
operands->Add(first_operand, zone());
@ -2646,7 +2773,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
Advance();
return zone()->template New<RegExpClassSetExpression>(
RegExpClassSetExpression::OperationType::kSubtraction, is_negated,
operands);
may_contain_strings, operands);
}
// https://tc39.es/ecma262/#prod-CharacterClass
@ -2684,27 +2811,34 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
character_class_flags);
} else {
ClassSetOperandType operand_type;
RegExpTree* operand =
ParseClassSetOperand(builder, &operand_type, ranges CHECK_FAILED);
CharacterClassStrings* strings =
zone()->template New<CharacterClassStrings>(zone());
RegExpTree* operand = ParseClassSetOperand(builder, &operand_type, ranges,
strings CHECK_FAILED);
switch (current()) {
case '-':
if (Next() == '-') {
if (operand == nullptr) {
operand = zone()->template New<RegExpClassRanges>(zone(), ranges);
operand =
zone()->template New<RegExpClassSetOperand>(ranges, strings);
}
return ParseClassSubtraction(builder, is_negated, operand);
return ParseClassSubtraction(builder, is_negated, operand,
operand_type);
}
// ClassSetRange is handled in ParseClassUnion().
break;
case '&':
if (Next() == '&') {
if (operand == nullptr) {
operand = zone()->template New<RegExpClassRanges>(zone(), ranges);
operand =
zone()->template New<RegExpClassSetOperand>(ranges, strings);
}
return ParseClassIntersection(builder, is_negated, operand);
return ParseClassIntersection(builder, is_negated, operand,
operand_type);
}
}
return ParseClassUnion(builder, is_negated, operand, operand_type, ranges);
return ParseClassUnion(builder, is_negated, operand, operand_type, ranges,
strings);
}
}

View File

@ -48,10 +48,29 @@ assertEarlyError('/[~~]/v');
assertEarlyError('/[a&&&]/v');
assertEarlyError('/[&&&a]/v');
// Unterminated string disjunction.
assertEarlyError('/[\q{foo]/v');
assertEarlyError('/[\q{foo|]/v');
// Negating classes containing strings is not allowed.
assertEarlyError('/[^\q{foo}]/v');
assertEarlyError('/[^\q{}]/v'); // Empty string counts as string.
assertEarlyError('/[^[\q{foo}]]/v');
assertEarlyError('/[^[\p{Basic_Emoji}]/v');
assertEarlyError('/[^\q{foo}&&\q{bar}]/v');
assertEarlyError('/[^\q{foo}--\q{bar}]/v');
// Exceptions when negating the class is allowed:
// The "string" contains only single characters.
/[^\q{a|b|c}]/v;
// Not all operands of an intersection contain strings.
/[^\q{foo}&&\q{bar}&&a]/v;
// The first operand of a subtraction doesn't contain strings.
/[^a--\q{foo}--\q{bar}]/v;
const allAscii = Array.from(
{length: 127}, (v, i) => { return String.fromCharCode(i); });
function check(re, expectMatch, expectNoMatch) {
function check(re, expectMatch, expectNoMatch = [], negationValid = true) {
if (expectNoMatch === undefined) {
const expectSet = new Set(expectMatch.map(val => {
return (typeof val == 'number') ? String(val) : val; }));
@ -63,14 +82,22 @@ function check(re, expectMatch, expectNoMatch) {
for (const noMatch of expectNoMatch) {
assertFalse(re.test(noMatch), `${re}.test(${noMatch})`);
}
// Nest the current RegExp in a negated class and check expectations are
// inversed.
const inverted = new RegExp(`[^${re.source}]`, re.flags);
for (const match of expectMatch) {
assertFalse(inverted.test(match), `${inverted}.test(${match})`);
}
for (const noMatch of expectNoMatch) {
assertTrue(inverted.test(noMatch), `${inverted}.test(${noMatch})`);
if (!negationValid) {
// Negation of classes containing strings is an error.
const negated = `[^${re.source}]`;
assertThrows(() => { new RegExp(negated, `${re.flags}`); }, SyntaxError,
`Invalid regular expression: /${negated}/: ` +
`Negated character class may contain strings`);
} else {
// Nest the current RegExp in a negated class and check expectations are
// inversed.
const inverted = new RegExp(`[^${re.source}]`, re.flags);
for (const match of expectMatch) {
assertFalse(inverted.test(match), `${inverted}.test(${match})`);
}
for (const noMatch of expectNoMatch) {
assertTrue(inverted.test(noMatch), `${inverted}.test(${noMatch})`);
}
}
}
@ -126,6 +153,41 @@ check(/[Ā-č]/v, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
check(/[ĀĂĄĆ]/vi, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
check(/[āăąć]/vi, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
// String disjunctions
check(/[\q{foo|bar|0|5}]/v, ['foo', 'bar', 0, 5], ['fo', 'baz'], false)
check(/[\q{foo|bar}[05]]/v, ['foo', 'bar', 0, 5], ['fo', 'baz'], false)
check(/[\q{foo|bar|0|5}&&\q{bar}]/v, ['bar'], ['foo', 0, 5, 'fo', 'baz'], false)
// The second operand of the intersection doesn't contain strings, so the result
// will not contain strings and therefore negation is valid.
check(/[\q{foo|bar|0|5}&&\d]/v, [0, 5], ['foo', 'bar', 'fo', 'baz'], true)
check(/[\q{foo|bar|0|5}--\q{foo}]/v, ['bar', 0, 5], ['foo', 'fo', 'baz'], false)
check(/[\q{foo|bar|0|5}--\d]/v, ['foo', 'bar'], [0, 5, 'fo', 'baz'], false)
check(
/[\q{foo|bar|0|5}&&\q{bAr}]/vi, ['bar', 'bAr', 'BAR'],
['foo', 0, 5, 'fo', 'baz'], false)
check(
/[\q{foo|bar|0|5}--\q{FoO}]/vi, ['bar', 'bAr', 'BAR', 0, 5],
['foo', 'FOO', 'fo', 'baz'], false)
check(/[\q{ĀĂĄĆ|AaAc}&&\q{āăąć}]/vi, ['ĀĂĄĆ', 'āăąć'], ['AaAc'], false);
check(
/[\q{ĀĂĄĆ|AaAc}--\q{āăąć}]/vi, ['AaAc', 'aAaC'], ['ĀĂĄĆ', 'āăąć'],
false);
// Empty string disjunctions matches nothing, but succeeds.
let res = /[\q{}]/v.exec('foo');
assertNotNull(res);
assertEquals(1, res.length);
assertEquals('', res[0]);
// Ensure longest strings are matched first.
assertEquals(['xyz'], /[a-c\q{W|xy|xyz}]/v.exec('xyzabc'))
assertEquals(['xyz'], /[a-c\q{W|xyz|xy}]/v.exec('xyzabc'))
assertEquals(['xyz'], /[\q{W|xyz|xy}a-c]/v.exec('xyzabc'))
// Empty string is last.
assertEquals(['a'], /[\q{W|}a-c]/v.exec('abc'))
// Some more sophisticated tests taken from
// https://v8.dev/features/regexp-v-flag
assertFalse(/[\p{Script_Extensions=Greek}--π]/v.test('π'));

View File

@ -325,29 +325,17 @@
'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P': [SKIP],
'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-difference-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/character-union-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class': [SKIP],
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character': [SKIP],
@ -366,24 +354,9 @@
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-property-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-string-literal': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-character': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-property-of-strings-escape': [SKIP],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-string-literal': [SKIP],
# https://bugs.chromium.org/p/v8/issues/detail?id=13173
'built-ins/RegExp/duplicate-named-capturing-groups-syntax': [FAIL],
@ -1049,18 +1022,24 @@
'built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape': [PASS,FAIL],
'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape': [PASS,FAIL],
# Unicode in identifiers.
'language/identifiers/part-unicode-*': [FAIL],