[regexp] Support string disjunctions in unicode set mode

Add support for string disjunctions within regular expression character classes in unicode sets mode (/v). Bug: v8:11935 Change-Id: Ida607123ced11c4dc3dfc687996f6abffeb6eeff Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4051243 Commit-Queue: Patrick Thier <pthier@chromium.org> Reviewed-by: Mathias Bynens <mathias@chromium.org> Cr-Commit-Position: refs/heads/main@{#84480}
2022-11-25 10:05:04 +01:00 · 2022-11-25 10:05:04 +01:00 · 5d7782f694
commit 5d7782f694
parent 1211605a39
8 changed files with 587 additions and 274 deletions
--- a/src/regexp/experimental/experimental-compiler.cc
+++ b/src/regexp/experimental/experimental-compiler.cc
@ -69,6 +69,11 @@ class CanBeHandledVisitor final : private RegExpVisitor {
    return nullptr;
  }

+  void* VisitClassSetOperand(RegExpClassSetOperand* node, void*) override {
+    result_ = !node->has_strings();
+    return nullptr;
+  }
+
  void* VisitClassSetExpression(RegExpClassSetExpression* node,
                                void*) override {
    result_ = false;
@ -391,11 +396,10 @@ class CompileVisitor : private RegExpVisitor {
    return nullptr;
  }

-  void* VisitClassRanges(RegExpClassRanges* node, void*) override {
+  void CompileCharacterRanges(ZoneList<CharacterRange>* ranges, bool negated) {
    // A character class is compiled as Disjunction over its `CharacterRange`s.
-    ZoneList<CharacterRange>* ranges = node->ranges(zone_);
    CharacterRange::Canonicalize(ranges);
-    if (node->is_negated()) {
+    if (negated) {
      // The complement of a disjoint, non-adjacent (i.e. `Canonicalize`d)
      // union of k intervals is a union of at most k + 1 intervals.
      ZoneList<CharacterRange>* negated =
@ -422,6 +426,17 @@ class CompileVisitor : private RegExpVisitor {

      assembler_.ConsumeRange(from_uc16, to_uc16);
    });
+  }
+
+  void* VisitClassRanges(RegExpClassRanges* node, void*) override {
+    CompileCharacterRanges(node->ranges(zone_), node->is_negated());
+    return nullptr;
+  }
+
+  void* VisitClassSetOperand(RegExpClassSetOperand* node, void*) override {
+    // TODO(v8:11935): Support strings.
+    DCHECK(!node->has_strings());
+    CompileCharacterRanges(node->ranges(), false);
    return nullptr;
  }

--- a/src/regexp/regexp-ast.cc
+++ b/src/regexp/regexp-ast.cc
@ -193,6 +193,22 @@ void* RegExpUnparser::VisitClassRanges(RegExpClassRanges* that, void* data) {
  return nullptr;
 }

+void* RegExpUnparser::VisitClassSetOperand(RegExpClassSetOperand* that,
+                                           void* data) {
+  os_ << "![";
+  for (int i = 0; i < that->ranges()->length(); i++) {
+    if (i > 0) os_ << " ";
+    VisitCharacterRange(that->ranges()->at(i));
+  }
+  for (auto iter : *that->strings()) {
+    os_ << " '";
+    os_ << std::string(iter.first.begin(), iter.first.end());
+    os_ << "'";
+  }
+  os_ << "]";
+  return nullptr;
+}
+
 void* RegExpUnparser::VisitClassSetExpression(RegExpClassSetExpression* that,
                                              void* data) {
  switch (that->operation()) {
@ -362,6 +378,37 @@ RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
  }
 }

+RegExpClassSetOperand::RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
+                                             CharacterClassStrings* strings)
+    : ranges_(ranges), strings_(strings) {
+  DCHECK_NOT_NULL(ranges);
+  DCHECK_NOT_NULL(strings);
+  min_match_ = 0;
+  max_match_ = 0;
+  if (!ranges->is_empty()) {
+    min_match_ = 1;
+    max_match_ = 2;
+  }
+  for (auto string : *strings) {
+    min_match_ = std::min(min_match_, string.second->min_match());
+    max_match_ = std::max(max_match_, string.second->max_match());
+  }
+}
+
+RegExpClassSetExpression::RegExpClassSetExpression(
+    OperationType op, bool is_negated, bool may_contain_strings,
+    ZoneList<RegExpTree*>* operands)
+    : operation_(op),
+      is_negated_(is_negated),
+      may_contain_strings_(may_contain_strings),
+      operands_(operands) {
+  DCHECK_NOT_NULL(operands);
+  DCHECK_IMPLIES(is_negated_, !may_contain_strings_);
+  max_match_ = 0;
+  for (auto op : *operands) {
+    max_match_ = std::max(max_match_, op->max_match());
+  }
+}

 }  // namespace internal
 }  // namespace v8
--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@ -22,6 +22,7 @@ namespace internal {
  VISIT(Alternative)                      \
  VISIT(Assertion)                        \
  VISIT(ClassRanges)                      \
+  VISIT(ClassSetOperand)                  \
  VISIT(ClassSetExpression)               \
  VISIT(Atom)                             \
  VISIT(Quantifier)                       \
@ -365,45 +366,101 @@ class RegExpClassRanges final : public RegExpTree {
  ClassRangesFlags class_ranges_flags_;
 };

+struct CharacterClassStringLess {
+  bool operator()(const base::Vector<const base::uc32>& lhs,
+                  const base::Vector<const base::uc32>& rhs) const {
+    // Longer strings first so we generate matches for the largest string
+    // possible.
+    if (lhs.length() != rhs.length()) {
+      return lhs.length() > rhs.length();
+    }
+    for (int i = 0; i < lhs.length(); i++) {
+      if (lhs[i] != rhs[i]) {
+        return lhs[i] < rhs[i];
+      }
+    }
+    return false;
+  }
+};
+
+// A type used for strings as part of character classes (only possible in
+// unicode sets mode).
+// We use a ZoneMap instead of an UnorderedZoneMap because we need to match
+// the longest alternatives first. By using a ZoneMap with the custom comparator
+// we can avoid sorting before assembling the code.
+// Strings are likely short (the largest string in current unicode properties
+// consists of 10 code points).
+using CharacterClassStrings = ZoneMap<base::Vector<const base::uc32>,
+                                      RegExpTree*, CharacterClassStringLess>;
+
+// TODO(pthier): If we are sure we don't want to use icu::UnicodeSets
+// (performance evaluation pending), this class can be merged with
+// RegExpClassRanges.
+class RegExpClassSetOperand final : public RegExpTree {
+ public:
+  RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
+                        CharacterClassStrings* strings);
+
+  DECL_BOILERPLATE(ClassSetOperand);
+
+  bool IsTextElement() override { return true; }
+  int min_match() override { return min_match_; }
+  int max_match() override { return max_match_; }
+
+  void Union(RegExpClassSetOperand* other, Zone* zone);
+  void Intersect(RegExpClassSetOperand* other,
+                 ZoneList<CharacterRange>* temp_ranges, Zone* zone);
+  void Subtract(RegExpClassSetOperand* other,
+                ZoneList<CharacterRange>* temp_ranges, Zone* zone);
+
+  bool has_strings() const { return !strings_->empty(); }
+  ZoneList<CharacterRange>* ranges() { return ranges_; }
+  CharacterClassStrings* strings() { return strings_; }
+
+ private:
+  ZoneList<CharacterRange>* ranges_;
+  CharacterClassStrings* strings_;
+  int min_match_;
+  int max_match_;
+};
+
 class RegExpClassSetExpression final : public RegExpTree {
 public:
  enum class OperationType { kUnion, kIntersection, kSubtraction };

  RegExpClassSetExpression(OperationType op, bool is_negated,
-                           ZoneList<RegExpTree*>* operands)
-      : operation_(op), is_negated_(is_negated), operands_(operands) {}
+                           bool may_contain_strings,
+                           ZoneList<RegExpTree*>* operands);

  DECL_BOILERPLATE(ClassSetExpression);

  bool IsTextElement() override { return true; }
-  // At least 1 character is consumed.
-  int min_match() override { return 1; }
-  // Up to two code points might be consumed.
-  int max_match() override { return 2; }
+  int min_match() override { return 0; }
+  int max_match() override { return max_match_; }

  OperationType operation() const { return operation_; }
  bool is_negated() const { return is_negated_; }
+  bool may_contain_strings() const { return may_contain_strings_; }
  const ZoneList<RegExpTree*>* operands() const { return operands_; }
+  ZoneList<RegExpTree*>* operands() { return operands_; }

 private:
-  RegExpClassRanges* ToCharacterClass(Zone* zone);
-
  // Recursively evaluates the tree rooted at |root|, computing the valid
-  // CharacterRanges after applying all set operations and storing the result in
-  // |result_ranges|. |temp_ranges| is list used for intermediate results,
-  // passed as parameter to avoid allocating new lists all the time.
-  static void ComputeCharacterRanges(RegExpTree* root,
-                                     ZoneList<CharacterRange>* result_ranges,
-                                     ZoneList<CharacterRange>* temp_ranges,
-                                     Zone* zone);
+  // CharacterRanges and strings after applying all set operations.
+  // The original tree will be modified by this method, so don't store pointers
+  // to inner nodes of the tree somewhere else!
+  // Modifying the tree in-place saves memory and speeds up multiple calls of
+  // the method (e.g. when unrolling quantifiers).
+  // |temp_ranges| is used for intermediate results, passed as parameter to
+  // avoid allocating new lists all the time.
+  static RegExpClassSetOperand* ComputeExpression(
+      RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone);

  const OperationType operation_;
  const bool is_negated_;
+  const bool may_contain_strings_;
  ZoneList<RegExpTree*>* operands_ = nullptr;
-#ifdef ENABLE_SLOW_DCHECKS
-  // Cache ranges for each node during computation for (slow) DCHECKs.
-  ZoneList<CharacterRange>* ranges_ = nullptr;
-#endif
+  int max_match_;
 };

 class RegExpAtom final : public RegExpTree {
--- a/src/regexp/regexp-compiler-tonode.cc
+++ b/src/regexp/regexp-compiler-tonode.cc
@ -535,9 +535,145 @@ RegExpNode* RegExpClassRanges::ToNode(RegExpCompiler* compiler,
  return result;
 }

+RegExpNode* RegExpClassSetOperand::ToNode(RegExpCompiler* compiler,
+                                          RegExpNode* on_success) {
+  Zone* zone = compiler->zone();
+  const int size = (has_strings() ? static_cast<int>(strings()->size()) : 0) +
+                   (ranges()->is_empty() ? 0 : 1);
+  if (size == 0) {
+    // If neither ranges nor strings are present, the operand is equal to an
+    // empty range (matching nothing).
+    ZoneList<CharacterRange>* empty =
+        zone->template New<ZoneList<CharacterRange>>(0, zone);
+    return zone->template New<RegExpClassRanges>(zone, empty)
+        ->ToNode(compiler, on_success);
+  }
+  ZoneList<RegExpTree*>* alternatives =
+      zone->template New<ZoneList<RegExpTree*>>(size, zone);
+  // Strings are sorted by length first (larger strings before shorter ones).
+  // See the comment on CharacterClassStrings.
+  // Empty strings (if present) are added after character ranges.
+  RegExpTree* empty_string = nullptr;
+  if (has_strings()) {
+    for (auto string : *strings()) {
+      if (string.second->IsEmpty()) {
+        empty_string = string.second;
+      } else {
+        alternatives->Add(string.second, zone);
+      }
+    }
+  }
+  if (!ranges()->is_empty()) {
+    alternatives->Add(zone->template New<RegExpClassRanges>(zone, ranges()),
+                      zone);
+  }
+  if (empty_string != nullptr) {
+    alternatives->Add(empty_string, zone);
+  }
+
+  RegExpTree* node = nullptr;
+  if (size == 1) {
+    DCHECK_EQ(alternatives->length(), 1);
+    node = alternatives->first();
+  } else {
+    node = zone->template New<RegExpDisjunction>(alternatives);
+  }
+  return node->ToNode(compiler, on_success);
+}
+
 RegExpNode* RegExpClassSetExpression::ToNode(RegExpCompiler* compiler,
                                             RegExpNode* on_success) {
-  return ToCharacterClass(compiler->zone())->ToNode(compiler, on_success);
+  Zone* zone = compiler->zone();
+  ZoneList<CharacterRange>* temp_ranges =
+      zone->template New<ZoneList<CharacterRange>>(4, zone);
+  RegExpClassSetOperand* root = ComputeExpression(this, temp_ranges, zone);
+  return root->ToNode(compiler, on_success);
+}
+
+void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) {
+  ranges()->AddAll(*other->ranges(), zone);
+  strings()->insert(other->strings()->begin(), other->strings()->end());
+}
+
+void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
+                                      ZoneList<CharacterRange>* temp_ranges,
+                                      Zone* zone) {
+  CharacterRange::Intersect(ranges(), other->ranges(), temp_ranges, zone);
+  std::swap(*ranges(), *temp_ranges);
+  temp_ranges->Rewind(0);
+  for (auto iter = strings()->begin(); iter != strings()->end();) {
+    if (other->strings()->find(iter->first) == other->strings()->end()) {
+      iter = strings()->erase(iter);
+    } else {
+      iter++;
+    }
+  }
+}
+
+void RegExpClassSetOperand::Subtract(RegExpClassSetOperand* other,
+                                     ZoneList<CharacterRange>* temp_ranges,
+                                     Zone* zone) {
+  CharacterRange::Subtract(ranges(), other->ranges(), temp_ranges, zone);
+  std::swap(*ranges(), *temp_ranges);
+  temp_ranges->Rewind(0);
+  for (auto iter = strings()->begin(); iter != strings()->end();) {
+    if (other->strings()->find(iter->first) != other->strings()->end()) {
+      iter = strings()->erase(iter);
+    } else {
+      iter++;
+    }
+  }
+}
+
+// static
+RegExpClassSetOperand* RegExpClassSetExpression::ComputeExpression(
+    RegExpTree* root, ZoneList<CharacterRange>* temp_ranges, Zone* zone) {
+  DCHECK(temp_ranges->is_empty());
+  if (root->IsClassSetOperand()) {
+    return root->AsClassSetOperand();
+  }
+  DCHECK(root->IsClassSetExpression());
+  RegExpClassSetExpression* node = root->AsClassSetExpression();
+  RegExpClassSetOperand* result =
+      ComputeExpression(node->operands()->at(0), temp_ranges, zone);
+  switch (node->operation()) {
+    case OperationType::kUnion: {
+      for (int i = 1; i < node->operands()->length(); i++) {
+        RegExpClassSetOperand* op =
+            ComputeExpression(node->operands()->at(i), temp_ranges, zone);
+        result->Union(op, zone);
+      }
+      CharacterRange::Canonicalize(result->ranges());
+      break;
+    }
+    case OperationType::kIntersection: {
+      for (int i = 1; i < node->operands()->length(); i++) {
+        RegExpClassSetOperand* op =
+            ComputeExpression(node->operands()->at(i), temp_ranges, zone);
+        result->Intersect(op, temp_ranges, zone);
+      }
+      break;
+    }
+    case OperationType::kSubtraction: {
+      for (int i = 1; i < node->operands()->length(); i++) {
+        RegExpClassSetOperand* op =
+            ComputeExpression(node->operands()->at(i), temp_ranges, zone);
+        result->Subtract(op, temp_ranges, zone);
+      }
+      break;
+    }
+  }
+  if (node->is_negated()) {
+    DCHECK(!result->has_strings());
+    CharacterRange::Negate(result->ranges(), temp_ranges, zone);
+    std::swap(*result->ranges(), *temp_ranges);
+    temp_ranges->Rewind(0);
+  }
+  // Store the result as single operand of the current node.
+  node->operands()->Set(0, result);
+  node->operands()->Rewind(1);
+
+  return result;
 }

 namespace {
@ -1498,128 +1634,6 @@ void CharacterSet::Canonicalize() {
  CharacterRange::Canonicalize(ranges_);
 }

-RegExpClassRanges* RegExpClassSetExpression::ToCharacterClass(Zone* zone) {
-  ZoneList<CharacterRange>* result_ranges =
-      zone->template New<ZoneList<CharacterRange>>(2, zone);
-  ZoneList<CharacterRange>* temp_ranges =
-      zone->template New<ZoneList<CharacterRange>>(2, zone);
-  ComputeCharacterRanges(this, result_ranges, temp_ranges, zone);
-  return zone->template New<RegExpClassRanges>(zone, result_ranges);
-}
-
-// static
-void RegExpClassSetExpression::ComputeCharacterRanges(
-    RegExpTree* root, ZoneList<CharacterRange>* result_ranges,
-    ZoneList<CharacterRange>* temp_ranges, Zone* zone) {
-  DCHECK_EQ(temp_ranges->length(), 0);
-  DCHECK(root->IsClassRanges() || root->IsClassSetExpression());
-  if (root->IsClassRanges()) {
-    DCHECK(!root->AsClassRanges()->is_negated());
-    ZoneList<CharacterRange>* ranges = root->AsClassRanges()->ranges(zone);
-    CharacterRange::Canonicalize(ranges);
-    result_ranges->AddAll(*ranges, zone);
-    return;
-  }
-  RegExpClassSetExpression* node = root->AsClassSetExpression();
-  switch (node->operation()) {
-    case OperationType::kUnion: {
-      ZoneList<CharacterRange>* op_ranges =
-          zone->template New<ZoneList<CharacterRange>>(2, zone);
-      for (int i = 0; i < node->operands()->length(); i++) {
-        RegExpTree* op = node->operands()->at(i);
-        ComputeCharacterRanges(op, op_ranges, temp_ranges, zone);
-        result_ranges->AddAll(*op_ranges, zone);
-        op_ranges->Rewind(0);
-      }
-      CharacterRange::Canonicalize(result_ranges);
-      break;
-    }
-    case OperationType::kIntersection: {
-      ZoneList<CharacterRange>* op_ranges =
-          zone->template New<ZoneList<CharacterRange>>(2, zone);
-      ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
-                             zone);
-      result_ranges->AddAll(*op_ranges, zone);
-      op_ranges->Rewind(0);
-      for (int i = 1; i < node->operands()->length(); i++) {
-        ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
-                               zone);
-        CharacterRange::Intersect(result_ranges, op_ranges, temp_ranges, zone);
-        std::swap(*result_ranges, *temp_ranges);
-        temp_ranges->Rewind(0);
-        op_ranges->Rewind(0);
-      }
-      break;
-    }
-    case OperationType::kSubtraction: {
-      ZoneList<CharacterRange>* op_ranges =
-          zone->template New<ZoneList<CharacterRange>>(2, zone);
-      ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
-                             zone);
-      result_ranges->AddAll(*op_ranges, zone);
-      op_ranges->Rewind(0);
-      for (int i = 1; i < node->operands()->length(); i++) {
-        ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
-                               zone);
-        CharacterRange::Subtract(result_ranges, op_ranges, temp_ranges, zone);
-        std::swap(*result_ranges, *temp_ranges);
-        temp_ranges->Rewind(0);
-        op_ranges->Rewind(0);
-      }
-#ifdef ENABLE_SLOW_DCHECKS
-      // Check that the result is equal to subtracting the union of all RHS
-      // operands from the LHS operand.
-      // TODO(pthier): It is unclear whether this variant is faster or slower
-      // than subtracting multiple ranges in practice.
-      ZoneList<CharacterRange>* lhs_range =
-          node->operands()->at(0)->IsClassRanges()
-              ? node->operands()->at(0)->AsClassRanges()->ranges(zone)
-              : node->operands()->at(0)->AsClassSetExpression()->ranges_;
-      ZoneList<CharacterRange>* rhs_union =
-          zone->template New<ZoneList<CharacterRange>>(2, zone);
-      for (int i = 1; i < node->operands()->length(); i++) {
-        ZoneList<CharacterRange>* op_range =
-            node->operands()->at(i)->IsClassRanges()
-                ? node->operands()->at(i)->AsClassRanges()->ranges(zone)
-                : node->operands()->at(i)->AsClassSetExpression()->ranges_;
-        rhs_union->AddAll(*op_range, zone);
-      }
-      CharacterRange::Canonicalize(rhs_union);
-      ZoneList<CharacterRange>* ranges_check =
-          zone->template New<ZoneList<CharacterRange>>(2, zone);
-      CharacterRange::Subtract(lhs_range, rhs_union, ranges_check, zone);
-      DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
-
-      // Check that the result is equal to intersecting the LHS operand with the
-      // complemented union of all RHS operands
-      ZoneList<CharacterRange>* rhs_union_negated =
-          zone->template New<ZoneList<CharacterRange>>(rhs_union->length(),
-                                                       zone);
-      CharacterRange::Negate(rhs_union, rhs_union_negated, zone);
-      ranges_check->Rewind(0);
-      CharacterRange::Intersect(lhs_range, rhs_union_negated, ranges_check,
-                                zone);
-      DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
-#endif
-      break;
-    }
-  }
-
-  if (node->is_negated()) {
-    CharacterRange::Negate(result_ranges, temp_ranges, zone);
-    std::swap(*result_ranges, *temp_ranges);
-    temp_ranges->Rewind(0);
-  }
-
-  DCHECK_EQ(temp_ranges->length(), 0);
-
-#ifdef ENABLE_SLOW_DCHECKS
-  // Cache results for DCHECKs.
-  node->ranges_ =
-      zone->template New<ZoneList<CharacterRange>>(*result_ranges, zone);
-#endif
-}
-
 // static
 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
  if (character_ranges->length() <= 1) return;
@ -1740,6 +1754,9 @@ void CharacterRange::Subtract(const ZoneList<CharacterRange>* src,
  DCHECK(CharacterRange::IsCanonical(src));
  DCHECK(CharacterRange::IsCanonical(to_remove));
  DCHECK_EQ(0, result->length());
+
+  if (src->is_empty()) return;
+
  int src_index = 0;
  int to_remove_index = 0;
  base::uc32 from = src->at(src_index).from();
--- a/src/regexp/regexp-error.h
+++ b/src/regexp/regexp-error.h
@ -44,7 +44,9 @@ namespace internal {
  T(UnterminatedCharacterClass, "Unterminated character class")           \
  T(OutOfOrderCharacterClass, "Range out of order in character class")    \
  T(InvalidClassSetOperation, "Invalid set operation in character class") \
-  T(InvalidCharacterInClass, "Invalid character in character class")
+  T(InvalidCharacterInClass, "Invalid character in character class")      \
+  T(NegatedCharacterClassWithStrings,                                     \
+    "Negated character class may contain strings")

 enum class RegExpError : uint32_t {
 #define TEMPLATE(NAME, STRING) k##NAME,
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@ -18,6 +18,8 @@

 #ifdef V8_INTL_SUPPORT
 #include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/usetiter.h"
 #endif  // V8_INTL_SUPPORT

 namespace v8 {
@ -62,6 +64,7 @@ class RegExpTextBuilder {
  void FlushPendingSurrogate();
  void FlushText();
  RegExpTree* PopLastAtom();
+  RegExpTree* ToRegExp();

 private:
  static const base::uc16 kNoPendingSurrogate = 0;
@ -280,6 +283,15 @@ RegExpTree* RegExpTextBuilder::PopLastAtom() {
  return nullptr;
 }

+RegExpTree* RegExpTextBuilder::ToRegExp() {
+  FlushText();
+  size_t num_alternatives = terms_->size();
+  if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
+  if (num_alternatives == 1) return terms_->back();
+  return zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>(
+      base::VectorOf(terms_->begin(), terms_->size()), zone()));
+}
+
 // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
 class RegExpBuilder {
 public:
@ -455,12 +467,15 @@ class RegExpParserImpl final {
                                    ZoneList<CharacterRange>* ranges,
                                    Zone* zone,
                                    bool add_unicode_case_equivalents);
-  RegExpTree* ParseClassStringDisjunction();
+  RegExpTree* ParseClassStringDisjunction(ZoneList<CharacterRange>* ranges,
+                                          CharacterClassStrings* strings);
  RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
                                   ClassSetOperandType* type_out);
  RegExpTree* ParseClassSetOperand(const RegExpBuilder* builder,
                                   ClassSetOperandType* type_out,
-                                   ZoneList<CharacterRange>* ranges);
+                                   ZoneList<CharacterRange>* ranges,
+                                   CharacterClassStrings* strings);
+  base::uc32 ParseClassSetCharacter();
  // Parses and returns a single escaped character.
  base::uc32 ParseCharacterEscape(InClassEscapeState in_class_escape_state,
                                  bool* is_escaped_unicode_character);
@ -468,12 +483,14 @@ class RegExpParserImpl final {
  RegExpTree* ParseClassUnion(const RegExpBuilder* builder, bool is_negated,
                              RegExpTree* first_operand,
                              ClassSetOperandType first_operand_type,
-                              ZoneList<CharacterRange>* ranges);
+                              ZoneList<CharacterRange>* ranges,
+                              CharacterClassStrings* strings);
  RegExpTree* ParseClassIntersection(const RegExpBuilder* builder,
-                                     bool is_negated,
-                                     RegExpTree* first_operand);
+                                     bool is_negated, RegExpTree* first_operand,
+                                     ClassSetOperandType first_operand_type);
  RegExpTree* ParseClassSubtraction(const RegExpBuilder* builder,
-                                    bool is_negated, RegExpTree* first_operand);
+                                    bool is_negated, RegExpTree* first_operand,
+                                    ClassSetOperandType first_operand_type);
  RegExpTree* ParseCharacterClass(const RegExpBuilder* state);

  base::uc32 ParseOctalLiteral();
@ -498,15 +515,15 @@ class RegExpParserImpl final {
  int captures_started() const { return captures_started_; }
  int position() const { return next_pos_ - 1; }
  bool failed() const { return failed_; }
+  RegExpFlags flags() const { return top_level_flags_; }
  bool IsUnicodeMode() const {
    // Either /v or /u enable UnicodeMode
    // TODO(v8:11935): Change permalink once proposal is in stage 4.
    // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-parsepattern
-    return IsUnicode(top_level_flags_) || IsUnicodeSets(top_level_flags_) ||
-           force_unicode_;
+    return IsUnicode(flags()) || IsUnicodeSets(flags()) || force_unicode_;
  }
-  bool unicode_sets() const { return IsUnicodeSets(top_level_flags_); }
-  bool ignore_case() const { return IsIgnoreCase(top_level_flags_); }
+  bool unicode_sets() const { return IsUnicodeSets(flags()); }
+  bool ignore_case() const { return IsIgnoreCase(flags()); }

  static bool IsSyntaxCharacterOrSlash(base::uc32 c);
  static bool IsClassSetSyntaxCharacter(base::uc32 c);
@ -869,7 +886,7 @@ template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
  // Used to store current state while parsing subexpressions.
  RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
-                                  0, nullptr, top_level_flags_, zone());
+                                  0, nullptr, flags(), zone());
  RegExpParserState* state = &initial_state;
  // Cache the builder in a local variable for quick access.
  RegExpBuilder* builder = initial_state.builder();
@ -2377,10 +2394,27 @@ bool RegExpParserImpl<CharT>::TryParseCharacterClassEscape(
  }
 }

+namespace {
+
+// Add |string| to |ranges| if length of |string| == 1, otherwise add |string|
+// to |strings|.
+void AddClassString(ZoneList<base::uc32>* normalized_string,
+                    RegExpTree* regexp_string, ZoneList<CharacterRange>* ranges,
+                    CharacterClassStrings* strings, Zone* zone) {
+  if (normalized_string->length() == 1) {
+    ranges->Add(CharacterRange::Singleton(normalized_string->at(0)), zone);
+  } else {
+    strings->emplace(normalized_string->ToVector(), regexp_string);
+  }
+}
+
+}  // namespace
+
 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassStringDisjunction
 template <class CharT>
-RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction() {
+RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction(
+    ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
  DCHECK(unicode_sets());
  DCHECK_EQ(current(), '\\');
  DCHECK_EQ(Next(), 'q');
@ -2391,73 +2425,98 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassStringDisjunction() {
  }
  Advance();

-  // TODO(pthier, v8:11935): Implement.
-  return ReportError(RegExpError::kInvalidCharacterClass);
+  ZoneList<base::uc32>* string =
+      zone()->template New<ZoneList<base::uc32>>(4, zone());
+  RegExpTextBuilder::SmallRegExpTreeVector string_storage(
+      ZoneAllocator<RegExpTree*>{zone()});
+  RegExpTextBuilder string_builder(zone(), &string_storage, flags());
+
+  while (has_more() && current() != '}') {
+    if (current() == '|') {
+      AddClassString(string, string_builder.ToRegExp(), ranges, strings,
+                     zone());
+      string = zone()->template New<ZoneList<base::uc32>>(4, zone());
+      string_storage.clear();
+      Advance();
+    } else {
+      base::uc32 c = ParseClassSetCharacter(CHECK_FAILED);
+      if (ignore_case()) {
+#ifdef V8_INTL_SUPPORT
+        c = u_foldCase(c, U_FOLD_CASE_DEFAULT);
+#else
+        c = AsciiAlphaToLower(c);
+#endif
+      }
+      string->Add(c, zone());
+      string_builder.AddUnicodeCharacter(c);
+    }
+  }
+
+  AddClassString(string, string_builder.ToRegExp(), ranges, strings, zone());
+
+  // We don't need to handle missing closing '}' here.
+  // If the character class is correctly closed, ParseClassSetCharacter will
+  // report an error.
+  DCHECK_EQ(current(), '}');
+  Advance();
+  return nullptr;
 }

 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
 // Tree returned based on type_out:
-//  * kClassStringDisjunction: RegExpAlternative | RegExpAtom
 //  * kNestedClass: RegExpClassSetExpression
-//  * For all other types: RegExpClassRanges
+//  * For all other types: RegExpClassSetOperand
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
    const RegExpBuilder* builder, ClassSetOperandType* type_out) {
  ZoneList<CharacterRange>* ranges =
      zone()->template New<ZoneList<CharacterRange>>(1, zone());
+  CharacterClassStrings* strings =
+      zone()->template New<CharacterClassStrings>(zone());
  RegExpTree* tree =
-      ParseClassSetOperand(builder, type_out, ranges CHECK_FAILED);
+      ParseClassSetOperand(builder, type_out, ranges, strings CHECK_FAILED);
+  DCHECK_IMPLIES(*type_out != ClassSetOperandType::kNestedClass,
+                 tree == nullptr);
  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
                 ranges->length() == 1);
  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassSetCharacter,
-                 tree == nullptr);
-  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kCharacterClassEscape,
-                 !ranges->is_empty());
-  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kCharacterClassEscape,
-                 tree == nullptr);
-  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassStringDisjunction,
-                 ranges->is_empty());
-  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kClassStringDisjunction,
-                 tree->IsAtom() || tree->IsAlternative());
+                 strings->empty());
  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
                 ranges->is_empty());
+  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
+                 strings->empty());
  DCHECK_IMPLIES(*type_out == ClassSetOperandType::kNestedClass,
                 tree->IsClassSetExpression());
  // ClassSetRange is only used within ClassSetUnion().
  DCHECK_NE(*type_out, ClassSetOperandType::kClassSetRange);
+  // There are no restrictions for kCharacterClassEscape.
+  // CharacterClassEscape includes \p{}, which can contain ranges, strings or
+  // both and \P{}, which could contain nothing (i.e. \P{Any}).
  if (tree == nullptr) {
-    tree = zone()->template New<RegExpClassRanges>(zone(), ranges);
+    tree = zone()->template New<RegExpClassSetOperand>(ranges, strings);
  }
  return tree;
 }

 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSetOperand
-// Based on |type_out| either a tree is returned or ranges modifed (never both).
-// Tree returned based on type_out:
-//  * kClassStringDisjunction: RegExpAlternative | RegExpAtom
-//  * kNestedClass: RegExpClassSetExpression
-// For all other types, ranges is modified and nullptr is returned.
+// Based on |type_out| either a tree is returned or ranges/strings modified.
+// If a tree is returned, ranges/strings are not modified.
+// If |type_out| is kNestedClass, a tree of type RegExpClassSetExpression is
+// returned. For all other types, ranges is modified and nullptr is returned.
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
    const RegExpBuilder* builder, ClassSetOperandType* type_out,
-    ZoneList<CharacterRange>* ranges) {
+    ZoneList<CharacterRange>* ranges, CharacterClassStrings* strings) {
  DCHECK(unicode_sets());
-  const base::uc32 c = current();
+  base::uc32 c = current();
  if (c == '\\') {
-    base::uc32 next = Next();
-    switch (next) {
-      case 'b':
-        *type_out = ClassSetOperandType::kClassSetCharacter;
-        ranges->Add(CharacterRange::Singleton('\b'), zone());
-        Advance(2);
-        return nullptr;
-      case 'q':
-        *type_out = ClassSetOperandType::kClassStringDisjunction;
-        return ParseClassStringDisjunction();
-      case kEndMarker:
-        return ReportError(RegExpError::kEscapeAtEndOfPattern);
+    const base::uc32 next = Next();
+    if (next == 'q') {
+      *type_out = ClassSetOperandType::kClassStringDisjunction;
+      ParseClassStringDisjunction(ranges, strings CHECK_FAILED);
+      return nullptr;
    }
    static constexpr InClassEscapeState kInClassEscape =
        InClassEscapeState::kInClass;
@ -2467,44 +2526,86 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSetOperand(
      *type_out = ClassSetOperandType::kCharacterClassEscape;
      return nullptr;
    }
-
-    bool dummy = false;  // Unused.
-    base::uc32 escaped_char = ParseCharacterEscape(kInClassEscape, &dummy);
-    *type_out = ClassSetOperandType::kClassSetCharacter;
-    ranges->Add(CharacterRange::Singleton(escaped_char), zone());
-    return nullptr;
  }
+
  if (c == '[') {
    *type_out = ClassSetOperandType::kNestedClass;
    return ParseCharacterClass(builder);
  }
-  if (IsClassSetSyntaxCharacter(c)) {
-    return ReportError(RegExpError::kInvalidCharacterInClass);
-  }
-  if (IsClassSetReservedDoublePunctuator(c)) {
-    return ReportError(RegExpError::kInvalidClassSetOperation);
-  }
+
  *type_out = ClassSetOperandType::kClassSetCharacter;
+  c = ParseClassSetCharacter(CHECK_FAILED);
  ranges->Add(CharacterRange::Singleton(c), zone());
-  Advance();
  return nullptr;
 }

+template <class CharT>
+base::uc32 RegExpParserImpl<CharT>::ParseClassSetCharacter() {
+  DCHECK(unicode_sets());
+  const base::uc32 c = current();
+  if (c == '\\') {
+    const base::uc32 next = Next();
+    switch (next) {
+      case 'b':
+        Advance(2);
+        return '\b';
+      case kEndMarker:
+        ReportError(RegExpError::kEscapeAtEndOfPattern);
+        return 0;
+    }
+    static constexpr InClassEscapeState kInClassEscape =
+        InClassEscapeState::kInClass;
+
+    bool dummy = false;  // Unused.
+    return ParseCharacterEscape(kInClassEscape, &dummy);
+  }
+  if (IsClassSetSyntaxCharacter(c)) {
+    ReportError(RegExpError::kInvalidCharacterInClass);
+    return 0;
+  }
+  if (IsClassSetReservedDoublePunctuator(c)) {
+    ReportError(RegExpError::kInvalidClassSetOperation);
+    return 0;
+  }
+  Advance();
+  return c;
+}
+
+namespace {
+
+bool MayContainStrings(ClassSetOperandType type, RegExpTree* operand) {
+  switch (type) {
+    case ClassSetOperandType::kClassSetCharacter:
+    case ClassSetOperandType::kClassSetRange:
+      return false;
+    case ClassSetOperandType::kCharacterClassEscape:
+    case ClassSetOperandType::kClassStringDisjunction:
+      return operand->AsClassSetOperand()->has_strings();
+    case ClassSetOperandType::kNestedClass:
+      if (operand->IsClassRanges()) return false;
+      return operand->AsClassSetExpression()->may_contain_strings();
+  }
+}
+
+}  // namespace
+
 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassUnion
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
    const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
-    ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges) {
+    ClassSetOperandType first_operand_type, ZoneList<CharacterRange>* ranges,
+    CharacterClassStrings* strings) {
  DCHECK(unicode_sets());
  ZoneList<RegExpTree*>* operands =
      zone()->template New<ZoneList<RegExpTree*>>(2, zone());
+  bool may_contain_strings = false;
  // Add the lhs to operands if necessary.
-  // Either the lhs values were added to |ranges| (in which case |first_operand|
-  // is null), or the lhs was evaluated to a tree and passed as |first_operand|
-  // (in which case |ranges| are empty).
-  DCHECK_EQ(first_operand != nullptr, ranges->is_empty());
+  // Either the lhs values were added to |ranges|/|strings| (in which case
+  // |first_operand| is nullptr), or the lhs was evaluated to a tree and passed
+  // as |first_operand| (in which case |ranges| and |strings| are empty).
  if (first_operand != nullptr) {
+    may_contain_strings = MayContainStrings(first_operand_type, first_operand);
    operands->Add(first_operand, zone());
  }
  ClassSetOperandType last_type = first_operand_type;
@ -2531,7 +2632,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
      if (last_type != ClassSetOperandType::kClassSetCharacter) {
        return ReportError(RegExpError::kInvalidCharacterClass);
      }
-      ParseClassSetOperand(builder, &last_type, ranges CHECK_FAILED);
+      ParseClassSetOperand(builder, &last_type, ranges, strings CHECK_FAILED);
      if (last_type != ClassSetOperandType::kClassSetCharacter) {
        return ReportError(RegExpError::kInvalidCharacterClass);
      }
@ -2550,18 +2651,22 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
      last_type = ClassSetOperandType::kClassSetRange;
    } else {
      DCHECK_NE(current(), '-');
-      RegExpTree* operand =
-          ParseClassSetOperand(builder, &last_type, ranges CHECK_FAILED);
+      RegExpTree* operand = ParseClassSetOperand(builder, &last_type, ranges,
+                                                 strings CHECK_FAILED);
      if (operand != nullptr) {
+        may_contain_strings |= MayContainStrings(last_type, operand);
        // Add the range we started building as operand and reset the current
        // range.
-        if (!ranges->is_empty()) {
+        if (!ranges->is_empty() || !strings->empty()) {
          if (needs_case_folding) {
            CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
          }
-          operands->Add(zone()->template New<RegExpClassRanges>(zone(), ranges),
-                        zone());
+          may_contain_strings |= !strings->empty();
+          operands->Add(
+              zone()->template New<RegExpClassSetOperand>(ranges, strings),
+              zone());
          ranges = zone()->template New<ZoneList<CharacterRange>>(2, zone());
+          strings = zone()->template New<CharacterClassStrings>(zone());
        }
        operands->Add(operand, zone());
      }
@ -2573,26 +2678,37 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
  }

  // Add the range we started building as operand.
-  if (!ranges->is_empty()) {
+  if (!ranges->is_empty() || !strings->empty()) {
    if (needs_case_folding) {
      CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
    }
-    operands->Add(zone()->template New<RegExpClassRanges>(zone(), ranges),
+    may_contain_strings |= !strings->empty();
+    operands->Add(zone()->template New<RegExpClassSetOperand>(ranges, strings),
                  zone());
  }
+
  DCHECK_EQ(current(), ']');
  Advance();
+
+  if (is_negated && may_contain_strings) {
+    return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
+  }
+
  return zone()->template New<RegExpClassSetExpression>(
-      RegExpClassSetExpression::OperationType::kUnion, is_negated, operands);
+      RegExpClassSetExpression::OperationType::kUnion, is_negated,
+      may_contain_strings, operands);
 }

 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassIntersection
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
-    const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand) {
+    const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
+    ClassSetOperandType first_operand_type) {
  DCHECK(unicode_sets());
  DCHECK(current() == '&' && Next() == '&');
+  bool may_contain_strings =
+      MayContainStrings(first_operand_type, first_operand);
  ZoneList<RegExpTree*>* operands =
      zone()->template New<ZoneList<RegExpTree*>>(2, zone());
  operands->Add(first_operand, zone());
@ -2606,27 +2722,38 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassIntersection(
      return ReportError(RegExpError::kInvalidCharacterInClass);
    }

-    ClassSetOperandType dummy;  // unused
-    RegExpTree* operand = ParseClassSetOperand(builder, &dummy CHECK_FAILED);
+    ClassSetOperandType operand_type;
+    RegExpTree* operand =
+        ParseClassSetOperand(builder, &operand_type CHECK_FAILED);
+    may_contain_strings &= MayContainStrings(operand_type, operand);
    operands->Add(operand, zone());
  }
  if (!has_more()) {
    return ReportError(RegExpError::kUnterminatedCharacterClass);
  }
+  if (is_negated && may_contain_strings) {
+    return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
+  }
  DCHECK_EQ(current(), ']');
  Advance();
  return zone()->template New<RegExpClassSetExpression>(
      RegExpClassSetExpression::OperationType::kIntersection, is_negated,
-      operands);
+      may_contain_strings, operands);
 }

 // TODO(v8:11935): Change permalink once proposal is in stage 4.
 // https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#prod-ClassSubtraction
 template <class CharT>
 RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
-    const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand) {
+    const RegExpBuilder* builder, bool is_negated, RegExpTree* first_operand,
+    ClassSetOperandType first_operand_type) {
  DCHECK(unicode_sets());
  DCHECK(current() == '-' && Next() == '-');
+  const bool may_contain_strings =
+      MayContainStrings(first_operand_type, first_operand);
+  if (is_negated && may_contain_strings) {
+    return ReportError(RegExpError::kNegatedCharacterClassWithStrings);
+  }
  ZoneList<RegExpTree*>* operands =
      zone()->template New<ZoneList<RegExpTree*>>(2, zone());
  operands->Add(first_operand, zone());
@ -2646,7 +2773,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassSubtraction(
  Advance();
  return zone()->template New<RegExpClassSetExpression>(
      RegExpClassSetExpression::OperationType::kSubtraction, is_negated,
-      operands);
+      may_contain_strings, operands);
 }

 // https://tc39.es/ecma262/#prod-CharacterClass
@ -2684,27 +2811,34 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
                                                   character_class_flags);
  } else {
    ClassSetOperandType operand_type;
-    RegExpTree* operand =
-        ParseClassSetOperand(builder, &operand_type, ranges CHECK_FAILED);
+    CharacterClassStrings* strings =
+        zone()->template New<CharacterClassStrings>(zone());
+    RegExpTree* operand = ParseClassSetOperand(builder, &operand_type, ranges,
+                                               strings CHECK_FAILED);
    switch (current()) {
      case '-':
        if (Next() == '-') {
          if (operand == nullptr) {
-            operand = zone()->template New<RegExpClassRanges>(zone(), ranges);
+            operand =
+                zone()->template New<RegExpClassSetOperand>(ranges, strings);
          }
-          return ParseClassSubtraction(builder, is_negated, operand);
+          return ParseClassSubtraction(builder, is_negated, operand,
+                                       operand_type);
        }
        // ClassSetRange is handled in ParseClassUnion().
        break;
      case '&':
        if (Next() == '&') {
          if (operand == nullptr) {
-            operand = zone()->template New<RegExpClassRanges>(zone(), ranges);
+            operand =
+                zone()->template New<RegExpClassSetOperand>(ranges, strings);
          }
-          return ParseClassIntersection(builder, is_negated, operand);
+          return ParseClassIntersection(builder, is_negated, operand,
+                                        operand_type);
        }
    }
-    return ParseClassUnion(builder, is_negated, operand, operand_type, ranges);
+    return ParseClassUnion(builder, is_negated, operand, operand_type, ranges,
+                           strings);
  }
 }

--- a/test/mjsunit/harmony/regexp-unicode-sets.js
+++ b/test/mjsunit/harmony/regexp-unicode-sets.js
@ -48,10 +48,29 @@ assertEarlyError('/[~~]/v');
 assertEarlyError('/[a&&&]/v');
 assertEarlyError('/[&&&a]/v');

+// Unterminated string disjunction.
+assertEarlyError('/[\q{foo]/v');
+assertEarlyError('/[\q{foo|]/v');
+
+// Negating classes containing strings is not allowed.
+assertEarlyError('/[^\q{foo}]/v');
+assertEarlyError('/[^\q{}]/v');  // Empty string counts as string.
+assertEarlyError('/[^[\q{foo}]]/v');
+assertEarlyError('/[^[\p{Basic_Emoji}]/v');
+assertEarlyError('/[^\q{foo}&&\q{bar}]/v');
+assertEarlyError('/[^\q{foo}--\q{bar}]/v');
+// Exceptions when negating the class is allowed:
+// The "string" contains only single characters.
+/[^\q{a|b|c}]/v;
+// Not all operands of an intersection contain strings.
+/[^\q{foo}&&\q{bar}&&a]/v;
+// The first operand of a subtraction doesn't contain strings.
+/[^a--\q{foo}--\q{bar}]/v;
+
 const allAscii = Array.from(
    {length: 127}, (v, i) => { return String.fromCharCode(i); });

-function check(re, expectMatch, expectNoMatch) {
+function check(re, expectMatch, expectNoMatch = [], negationValid = true) {
  if (expectNoMatch === undefined) {
    const expectSet = new Set(expectMatch.map(val => {
      return (typeof val == 'number') ? String(val) : val; }));
@ -63,14 +82,22 @@ function check(re, expectMatch, expectNoMatch) {
  for (const noMatch of expectNoMatch) {
    assertFalse(re.test(noMatch), `${re}.test(${noMatch})`);
  }
-  // Nest the current RegExp in a negated class and check expectations are
-  // inversed.
-  const inverted = new RegExp(`[^${re.source}]`, re.flags);
-  for (const match of expectMatch) {
-    assertFalse(inverted.test(match), `${inverted}.test(${match})`);
-  }
-  for (const noMatch of expectNoMatch) {
-    assertTrue(inverted.test(noMatch), `${inverted}.test(${noMatch})`);
+  if (!negationValid) {
+    // Negation of classes containing strings is an error.
+    const negated = `[^${re.source}]`;
+    assertThrows(() => { new RegExp(negated, `${re.flags}`); }, SyntaxError,
+        `Invalid regular expression: /${negated}/: ` +
+        `Negated character class may contain strings`);
+  } else {
+    // Nest the current RegExp in a negated class and check expectations are
+    // inversed.
+    const inverted = new RegExp(`[^${re.source}]`, re.flags);
+    for (const match of expectMatch) {
+      assertFalse(inverted.test(match), `${inverted}.test(${match})`);
+    }
+    for (const noMatch of expectNoMatch) {
+      assertTrue(inverted.test(noMatch), `${inverted}.test(${noMatch})`);
+    }
  }
 }

@ -126,6 +153,41 @@ check(/[Ā-č]/v, Array.from('ĀāĂăĄąĆć'), Array.from('abc'));
 check(/[ĀĂĄĆ]/vi, Array.from('ĀāĂăĄąĆć'), Array.from('abc'));
 check(/[āăąć]/vi, Array.from('ĀāĂăĄąĆć'), Array.from('abc'));

+// String disjunctions
+check(/[\q{foo|bar|0|5}]/v, ['foo', 'bar', 0, 5], ['fo', 'baz'], false)
+check(/[\q{foo|bar}[05]]/v, ['foo', 'bar', 0, 5], ['fo', 'baz'], false)
+check(/[\q{foo|bar|0|5}&&\q{bar}]/v, ['bar'], ['foo', 0, 5, 'fo', 'baz'], false)
+// The second operand of the intersection doesn't contain strings, so the result
+// will not contain strings and therefore negation is valid.
+check(/[\q{foo|bar|0|5}&&\d]/v, [0, 5], ['foo', 'bar', 'fo', 'baz'], true)
+check(/[\q{foo|bar|0|5}--\q{foo}]/v, ['bar', 0, 5], ['foo', 'fo', 'baz'], false)
+check(/[\q{foo|bar|0|5}--\d]/v, ['foo', 'bar'], [0, 5, 'fo', 'baz'], false)
+
+check(
+    /[\q{foo|bar|0|5}&&\q{bAr}]/vi, ['bar', 'bAr', 'BAR'],
+    ['foo', 0, 5, 'fo', 'baz'], false)
+check(
+    /[\q{foo|bar|0|5}--\q{FoO}]/vi, ['bar', 'bAr', 'BAR', 0, 5],
+    ['foo', 'FOO', 'fo', 'baz'], false)
+
+check(/[\q{ĀĂĄĆ|AaAc}&&\q{āăąć}]/vi, ['ĀĂĄĆ', 'āăąć'], ['AaAc'], false);
+check(
+    /[\q{ĀĂĄĆ|AaAc}--\q{āăąć}]/vi, ['AaAc', 'aAaC'], ['ĀĂĄĆ', 'āăąć'],
+    false);
+
+// Empty string disjunctions matches nothing, but succeeds.
+let res = /[\q{}]/v.exec('foo');
+assertNotNull(res);
+assertEquals(1, res.length);
+assertEquals('', res[0]);
+
+// Ensure longest strings are matched first.
+assertEquals(['xyz'], /[a-c\q{W|xy|xyz}]/v.exec('xyzabc'))
+assertEquals(['xyz'], /[a-c\q{W|xyz|xy}]/v.exec('xyzabc'))
+assertEquals(['xyz'], /[\q{W|xyz|xy}a-c]/v.exec('xyzabc'))
+// Empty string is last.
+assertEquals(['a'], /[\q{W|}a-c]/v.exec('abc'))
+
 // Some more sophisticated tests taken from
 // https://v8.dev/features/regexp-v-flag
 assertFalse(/[\p{Script_Extensions=Greek}--π]/v.test('π'));
--- a/test/test262/test262.status
+++ b/test/test262/test262.status
@ -325,29 +325,17 @@
  'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P': [SKIP],
  'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-difference-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/character-union-string-literal': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character': [SKIP],
@ -366,24 +354,9 @@
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-character-property-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-property-of-strings-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-union-string-literal': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-class': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-string-literal': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-class': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-string-literal': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-class': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-union-character': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape': [SKIP],
  'built-ins/RegExp/unicodeSets/generated/string-literal-union-property-of-strings-escape': [SKIP],
-  'built-ins/RegExp/unicodeSets/generated/string-literal-union-string-literal': [SKIP],

  # https://bugs.chromium.org/p/v8/issues/detail?id=13173
  'built-ins/RegExp/duplicate-named-capturing-groups-syntax': [FAIL],
@ -1049,18 +1022,24 @@
  'built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [PASS,FAIL],
  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class': [PASS,FAIL],
-  'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/string-literal-difference-character-property-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/string-literal-intersection-character-property-escape': [PASS,FAIL],
+  'built-ins/RegExp/unicodeSets/generated/string-literal-union-character-property-escape': [PASS,FAIL],

  # Unicode in identifiers.
  'language/identifiers/part-unicode-*': [FAIL],