[regexp] Split experimental regexp code into multiple files

Bug: v8:10765 Change-Id: I49e425d861d900ab66b6f7801cddec8a7175ac03 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2385462 Commit-Queue: Martin Bidlingmaier <mbid@google.com> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#69637}
2020-08-31 21:49:18 +02:00 · 2020-08-31 21:49:18 +02:00 · e2aa1a89dd
commit e2aa1a89dd
parent 7c912ffac1
8 changed files with 1066 additions and 893 deletions
--- a/BUILD.gn
+++ b/BUILD.gn
@ -3067,6 +3067,12 @@ v8_source_set("v8_base_without_compiler") {
    "src/profiler/tick-sample.h",
    "src/profiler/tracing-cpu-profiler.cc",
    "src/profiler/tracing-cpu-profiler.h",
+    "src/regexp/experimental/experimental-bytecode.cc",
+    "src/regexp/experimental/experimental-bytecode.h",
+    "src/regexp/experimental/experimental-compiler.cc",
+    "src/regexp/experimental/experimental-compiler.h",
+    "src/regexp/experimental/experimental-interpreter.cc",
+    "src/regexp/experimental/experimental-interpreter.h",
    "src/regexp/experimental/experimental.cc",
    "src/regexp/experimental/experimental.h",
    "src/regexp/property-sequences.cc",
--- a/src/regexp/experimental/experimental-bytecode.cc
+++ b/src/regexp/experimental/experimental-bytecode.cc
@ -0,0 +1,78 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/regexp/experimental/experimental-bytecode.h"
+
+#include <iomanip>
+
+namespace v8 {
+namespace internal {
+
+namespace {
+
+std::ostream& PrintAsciiOrHex(std::ostream& os, uc16 c) {
+  if (c < 128 && std::isprint(c)) {
+    os << static_cast<char>(c);
+  } else {
+    os << "0x" << std::hex << static_cast<int>(c);
+  }
+  return os;
+}
+
+}  // namespace
+
+std::ostream& operator<<(std::ostream& os, const RegExpInstruction& inst) {
+  switch (inst.opcode) {
+    case RegExpInstruction::CONSUME_RANGE: {
+      os << "CONSUME_RANGE [";
+      PrintAsciiOrHex(os, inst.payload.consume_range.min);
+      os << ", ";
+      PrintAsciiOrHex(os, inst.payload.consume_range.max);
+      os << "]";
+      break;
+    }
+    case RegExpInstruction::FORK:
+      os << "FORK " << inst.payload.pc;
+      break;
+    case RegExpInstruction::JMP:
+      os << "JMP " << inst.payload.pc;
+      break;
+    case RegExpInstruction::ACCEPT:
+      os << "ACCEPT";
+      break;
+  }
+  return os;
+}
+
+namespace {
+
+// The maximum number of digits required to display a non-negative number < n
+// in base 10.
+int DigitsRequiredBelow(int n) {
+  DCHECK_GE(n, 0);
+
+  int result = 1;
+  for (int i = 10; i < n; i *= 10) {
+    result += 1;
+  }
+  return result;
+}
+
+}  // namespace
+
+std::ostream& operator<<(std::ostream& os,
+                         Vector<const RegExpInstruction> insts) {
+  int inst_num = insts.length();
+  int line_digit_num = DigitsRequiredBelow(inst_num);
+
+  for (int i = 0; i != inst_num; ++i) {
+    const RegExpInstruction& inst = insts[i];
+    os << std::setfill('0') << std::setw(line_digit_num) << i << ": " << inst
+       << std::endl;
+  }
+  return os;
+}
+
+}  // namespace internal
+}  // namespace v8
--- a/src/regexp/experimental/experimental-bytecode.h
+++ b/src/regexp/experimental/experimental-bytecode.h
@ -0,0 +1,167 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_BYTECODE_H_
+#define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_BYTECODE_H_
+
+#include <ios>
+
+#include "src/utils/vector.h"
+
+// ----------------------------------------------------------------------------
+// Definition and semantics of the EXPERIMENTAL bytecode.
+// Background:
+// - Russ Cox's blog post series on regular expression matching, in particular
+//   https://swtch.com/~rsc/regexp/regexp2.html
+// - The re2 regular regexp library: https://github.com/google/re2
+//
+// This comment describes the bytecode used by the experimental regexp engine
+// and its abstract semantics in terms of a VM.  An implementation of the
+// semantics that avoids exponential runtime can be found in `NfaInterpreter`.
+//
+// The experimental bytecode describes a non-deterministic finite automaton. It
+// runs on a multithreaded virtual machine (VM), i.e. in several threads
+// concurrently.  (These "threads" don't need to be actual operating system
+// threads.)  Apart from a list of threads, the VM maintains an immutable
+// shared input string which threads can read from.  Each thread is given by a
+// program counter (PC, index of the current instruction), a fixed number of
+// registers of indices into the input string, and a monotonically increasing
+// index which represents the current position within the input string.
+//
+// For the precise encoding of the instruction set, see the definition `struct
+// RegExpInstruction` below.  Currently we support the following instructions:
+// - CONSUME_RANGE: Check whether the codepoint of the current character is
+//   contained in a non-empty closed interval [min, max] specified in the
+//   instruction payload.  Abort this thread if false, otherwise advance the
+//   input position by 1 and continue with the next instruction.
+// - ACCEPT: Stop this thread and signify the end of a match at the current
+//   input position.
+// - FORK: If executed by a thread t, spawn a new thread t0 whose register
+//   values and input position agree with those of t, but whose PC value is set
+//   to the value specified in the instruction payload.  The register values of
+//   t and t0 agree directly after the FORK, but they can diverge.  Thread t
+//   continues with the instruction directly after the current FORK
+//   instruction.
+// - JMP: Instead of incrementing the PC value after execution of this
+//   instruction by 1, set PC of this thread to the value specified in the
+//   instruction payload and continue there.
+//
+// Special care must be exercised with respect to thread priority.  It is
+// possible that more than one thread executes an ACCEPT statement.  The output
+// of the program is given by the contents of the matching thread's registers,
+// so this is ambiguous in case of multiple matches.  To resolve the ambiguity,
+// every implementation of the VM  must output the match that a backtracking
+// implementation would output (i.e. behave the same as Irregexp).
+//
+// A backtracking implementation of the VM maintains a stack of postponed
+// threads.  Upon encountering a FORK statement, this VM will create a copy of
+// the current thread, set the copy's PC value according to the instruction
+// payload, and push it to the stack of postponed threads.  The VM will then
+// continue execution of the current thread.
+//
+// If at some point a thread t executes a MATCH statement, the VM stops and
+// outputs the registers of t.  Postponed threads are discarded.  On the other
+// hand, if a thread t is aborted because some input character didn't pass a
+// check, then the VM pops the topmost postponed thread and continues execution
+// with this thread.  If there are no postponed threads, then the VM outputs
+// failure, i.e. no matches.
+//
+// Equivalently, we can describe the behavior of the backtracking VM in terms
+// of priority: Threads are linearly ordered by priority, and matches generated
+// by threads with high priority must be preferred over matches generated by
+// threads with low priority, regardless of the chronological order in which
+// matches were found.  If a thread t executes a FORK statement and spawns a
+// thread t0, then the priority of t0 is such that the following holds:
+// * t0 < t, i.e. t0 has lower priority than t.
+// * For all threads u such that u != t and u != t0, we have t0 < u iff t < u,
+//   i.e. the t0 compares to other threads the same as t.
+// For example, if there are currently 3 threads s, t, u such that s < t < u,
+// then after t executes a fork, the thread priorities will be s < t0 < t < u.
+
+namespace v8 {
+namespace internal {
+
+// Bytecode format.
+// Currently very simple fixed-size: The opcode is encoded in the first 4
+// bytes, the payload takes another 4 bytes.
+struct RegExpInstruction {
+  enum Opcode : int32_t {
+    CONSUME_RANGE,
+    FORK,
+    JMP,
+    ACCEPT,
+  };
+
+  struct Uc16Range {
+    uc16 min;  // Inclusive.
+    uc16 max;  // Inclusive.
+  };
+
+  static RegExpInstruction ConsumeRange(Uc16Range consume_range) {
+    RegExpInstruction result;
+    result.opcode = CONSUME_RANGE;
+    result.payload.consume_range = consume_range;
+    return result;
+  }
+
+  static RegExpInstruction Fork(int32_t alt_index) {
+    RegExpInstruction result;
+    result.opcode = FORK;
+    result.payload.pc = alt_index;
+    return result;
+  }
+
+  static RegExpInstruction Jmp(int32_t alt_index) {
+    RegExpInstruction result;
+    result.opcode = JMP;
+    result.payload.pc = alt_index;
+    return result;
+  }
+
+  static RegExpInstruction Accept() {
+    RegExpInstruction result;
+    result.opcode = ACCEPT;
+    return result;
+  }
+
+  Opcode opcode;
+  union {
+    // Payload of CONSUME_RANGE:
+    Uc16Range consume_range;
+    // Payload of FORK and JMP, the next/forked program counter (pc):
+    int32_t pc;
+  } payload;
+  STATIC_ASSERT(sizeof(payload) == 4);
+};
+STATIC_ASSERT(sizeof(RegExpInstruction) == 8);
+// TODO(mbid,v8:10765): This is rather wasteful.  We can fit the opcode in 2-3
+// bits, so the remaining 29/30 bits can be used as payload.  Problem: The
+// payload of CONSUME_RANGE consists of two 16-bit values `min` and `max`, so
+// this wouldn't fit.  We could encode the payload of a CONSUME_RANGE
+// instruction by the start of the interval and its length instead, and then
+// only allows lengths that fit into 14/13 bits.  A longer range can then be
+// encoded as a disjunction of smaller ranges.
+//
+// Another thought: CONSUME_RANGEs are only valid if the payloads are such that
+// min <= max. Thus there are
+//
+//     2^16 + 2^16 - 1 + ... + 1
+//   = 2^16 * (2^16 + 1) / 2
+//   = 2^31 + 2^15
+//
+// valid payloads for a CONSUME_RANGE instruction.  If we want to fit
+// instructions into 4 bytes, we would still have almost 2^31 instructions left
+// over if we encode everything as tight as possible.  For example, we could
+// use another 2^29 values for JMP, another 2^29 for FORK, 1 value for ACCEPT,
+// and then still have almost 2^30 instructions left over for something like
+// zero-width assertions and captures.
+
+std::ostream& operator<<(std::ostream& os, const RegExpInstruction& inst);
+std::ostream& operator<<(std::ostream& os,
+                         Vector<const RegExpInstruction> insts);
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_BYTECODE_H_
--- a/src/regexp/experimental/experimental-compiler.cc
+++ b/src/regexp/experimental/experimental-compiler.cc
@ -0,0 +1,365 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/regexp/experimental/experimental-compiler.h"
+
+#include "src/zone/zone-list-inl.h"
+
+namespace v8 {
+namespace internal {
+
+namespace {
+
+// TODO(mbid, v8:10765): Currently the experimental engine doesn't support
+// UTF-16, but this shouldn't be too hard to implement.
+constexpr uc32 kMaxSupportedCodepoint = 0xFFFFu;
+
+class CanBeHandledVisitor final : private RegExpVisitor {
+  // Visitor to implement `ExperimentalRegExp::CanBeHandled`.
+ public:
+  static bool Check(RegExpTree* node, JSRegExp::Flags flags, Zone* zone) {
+    if (!AreSuitableFlags(flags)) {
+      return false;
+    }
+    CanBeHandledVisitor visitor(zone);
+    node->Accept(&visitor, nullptr);
+    return visitor.result_;
+  }
+
+ private:
+  explicit CanBeHandledVisitor(Zone* zone) : zone_(zone) {}
+
+  static bool AreSuitableFlags(JSRegExp::Flags flags) {
+    // TODO(mbid, v8:10765): We should be able to support all flags in the
+    // future.
+    static constexpr JSRegExp::Flags allowed_flags = JSRegExp::kGlobal;
+    return (flags & ~allowed_flags) == 0;
+  }
+
+  void* VisitDisjunction(RegExpDisjunction* node, void*) override {
+    for (RegExpTree* alt : *node->alternatives()) {
+      alt->Accept(this, nullptr);
+      if (!result_) {
+        return nullptr;
+      }
+    }
+    return nullptr;
+  }
+
+  void* VisitAlternative(RegExpAlternative* node, void*) override {
+    for (RegExpTree* child : *node->nodes()) {
+      child->Accept(this, nullptr);
+      if (!result_) {
+        return nullptr;
+      }
+    }
+    return nullptr;
+  }
+
+  void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
+    result_ = result_ && AreSuitableFlags(node->flags());
+    for (CharacterRange r : *node->ranges(zone_)) {
+      // TODO(mbid, v8:10765): We don't support full unicode yet, so we only
+      // allow character ranges that can be specified with two-byte characters.
+      if (r.to() > kMaxSupportedCodepoint) {
+        result_ = false;
+        return nullptr;
+      }
+    }
+    return nullptr;
+  }
+
+  void* VisitAssertion(RegExpAssertion* node, void*) override {
+    // TODO(mbid, v8:10765): We should be able to support at least some
+    // assertions. re2 does, too.
+    result_ = false;
+    return nullptr;
+  }
+
+  void* VisitAtom(RegExpAtom* node, void*) override {
+    result_ = result_ && AreSuitableFlags(node->flags());
+    return nullptr;
+  }
+
+  void* VisitText(RegExpText* node, void*) override {
+    for (TextElement& el : *node->elements()) {
+      el.tree()->Accept(this, nullptr);
+      if (!result_) {
+        return nullptr;
+      }
+    }
+    return nullptr;
+  }
+
+  void* VisitQuantifier(RegExpQuantifier* node, void*) override {
+    // TODO(mbid, v8:10765): Theoretically we can support arbitrary min() and
+    // max(), but the size of the automaton grows linearly with finite max().
+    // We probably want a cut-off value here, or maybe we can "virtualize" the
+    // repetitions.
+    // Non-greedy quantifiers are easy to implement, but not supported atm.
+    // It's not clear to me how a possessive quantifier would be implemented,
+    // we should check whether re2 supports this.
+    result_ = result_ && node->min() == 0 &&
+              node->max() == RegExpTree::kInfinity && node->is_greedy();
+    if (!result_) {
+      return nullptr;
+    }
+    node->body()->Accept(this, nullptr);
+    return nullptr;
+  }
+
+  void* VisitCapture(RegExpCapture* node, void*) override {
+    // TODO(mbid, v8:10765): This can be implemented with the NFA interpreter,
+    // but not with the lazy DFA.  See also re2.
+    result_ = false;
+    return nullptr;
+  }
+
+  void* VisitGroup(RegExpGroup* node, void*) override {
+    node->body()->Accept(this, nullptr);
+    return nullptr;
+  }
+
+  void* VisitLookaround(RegExpLookaround* node, void*) override {
+    // TODO(mbid, v8:10765): This will be hard to support, but not impossible I
+    // think.  See product automata.
+    result_ = false;
+    return nullptr;
+  }
+
+  void* VisitBackReference(RegExpBackReference* node, void*) override {
+    // This can't be implemented without backtracking.
+    result_ = false;
+    return nullptr;
+  }
+
+  void* VisitEmpty(RegExpEmpty* node, void*) override { return nullptr; }
+
+ private:
+  bool result_ = true;
+  Zone* zone_;
+};
+
+}  // namespace
+
+bool ExperimentalRegExpCompiler::CanBeHandled(RegExpTree* tree,
+                                              JSRegExp::Flags flags,
+                                              Zone* zone) {
+  DCHECK(FLAG_enable_experimental_regexp_engine);
+  return CanBeHandledVisitor::Check(tree, flags, zone);
+}
+
+namespace {
+
+class CompileVisitor : private RegExpVisitor {
+ public:
+  static ZoneList<RegExpInstruction> Compile(RegExpTree* tree,
+                                             JSRegExp::Flags flags,
+                                             Zone* zone) {
+    CompileVisitor compiler(zone);
+
+    tree->Accept(&compiler, nullptr);
+    compiler.code_.Add(RegExpInstruction::Accept(), zone);
+
+    return std::move(compiler.code_);
+  }
+
+ private:
+  // TODO(mbid,v8:10765): Use some upper bound for code_ capacity computed from
+  // the `tree` size we're going to compile?
+  explicit CompileVisitor(Zone* zone) : zone_(zone), code_(0, zone) {}
+
+  // Generate a disjunction of code fragments compiled by a function `alt_gen`.
+  // `alt_gen` is called repeatedly with argument `int i = 0, 1, ..., alt_num -
+  // 1` and should push code corresponding to the ith alternative onto `code_`.
+  template <class F>
+  void CompileDisjunction(int alt_num, F gen_alt) {
+    // An alternative a0 | a1 | a2 is compiled into
+    //   FORK <a2>
+    //   FORK <a1>
+    //   <a0>
+    //   JMP $end
+    //   <a1>
+    //   JMP $end
+    //   <a2>
+    // where $end is the index of the next instruction after <a2>.
+    //
+    // By the semantics of the FORK instruction (see above at definition and
+    // semantics), the forked thread has lower priority than the current
+    // thread.  This means that with the code we're generating here, the thread
+    // matching the alternative a0 is indeed the thread with the highest
+    // priority, followed by the thread for a1 and so on.
+
+    if (alt_num == 0) {
+      return;
+    }
+
+    // Record the index of the first of the alt_num - 1 fork instructions in the
+    // beginning.
+    int forks_begin = code_.length();
+    // Add FORKs to alts[alt_num - 1], alts[alt_num - 2], ..., alts[1].
+    for (int i = alt_num - 1; i != 0; --i) {
+      // The FORK's address is patched once we know the address of the ith
+      // alternative.
+      code_.Add(RegExpInstruction::Fork(-1), zone_);
+    }
+
+    // List containing the index of the final JMP instruction after each
+    // alternative but the last one.
+    ZoneList<int> jmp_indices(alt_num - 1, zone_);
+
+    for (int i = 0; i != alt_num; ++i) {
+      if (i != 0) {
+        // If this is not the first alternative, we have to patch the
+        // corresponding FORK statement in the beginning.
+        code_[forks_begin + alt_num - 1 - i].payload.pc = code_.length();
+      }
+      gen_alt(i);
+      if (i != alt_num - 1) {
+        // If this is not the last alternative, we have to emit a JMP past the
+        // remaining alternatives.  We don't know this address yet, so we have
+        // to patch patch it once all alternatives are emitted.
+        jmp_indices.Add(code_.length(), zone_);
+        code_.Add(RegExpInstruction::Jmp(-1), zone_);
+      }
+    }
+
+    // All alternatives are emitted.  Now we can patch the JMP instruction
+    // after each but the last alternative.
+    int end_index = code_.length();
+    for (int jmp_index : jmp_indices) {
+      code_[jmp_index].payload.pc = end_index;
+    }
+  }
+
+  void* VisitDisjunction(RegExpDisjunction* node, void*) override {
+    ZoneList<RegExpTree*>& alts = *node->alternatives();
+    CompileDisjunction(alts.length(),
+                       [&](int i) { alts[i]->Accept(this, nullptr); });
+    return nullptr;
+  }
+
+  void* VisitAlternative(RegExpAlternative* node, void*) override {
+    for (RegExpTree* child : *node->nodes()) {
+      child->Accept(this, nullptr);
+    }
+    return nullptr;
+  }
+
+  void* VisitAssertion(RegExpAssertion* node, void*) override {
+    // TODO(mbid,v8:10765): Support this case.
+    UNREACHABLE();
+  }
+
+  void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
+    // A character class is compiled as Disjunction over its `CharacterRange`s.
+    ZoneList<CharacterRange>* ranges = node->ranges(zone_);
+    CharacterRange::Canonicalize(ranges);
+    if (node->is_negated()) {
+      // Capacity 2 for the common case where we compute the complement of a
+      // single interval range that doesn't contain 0 and kMaxCodePoint.
+      ZoneList<CharacterRange>* negated =
+          zone_->New<ZoneList<CharacterRange>>(2, zone_);
+      CharacterRange::Negate(ranges, negated, zone_);
+      ranges = negated;
+    }
+
+    CompileDisjunction(ranges->length(), [&](int i) {
+      // We don't support utf16 for now, so only ranges that can be specified
+      // by (complements of) ranges with uc16 bounds.
+      STATIC_ASSERT(kMaxSupportedCodepoint <= std::numeric_limits<uc16>::max());
+
+      uc32 from = (*ranges)[i].from();
+      DCHECK_LE(from, kMaxSupportedCodepoint);
+      uc16 from_uc16 = static_cast<uc16>(from);
+
+      uc32 to = (*ranges)[i].to();
+      DCHECK_IMPLIES(to > kMaxSupportedCodepoint, to == String::kMaxCodePoint);
+      uc16 to_uc16 = static_cast<uc16>(std::min(to, kMaxSupportedCodepoint));
+
+      RegExpInstruction::Uc16Range range{from_uc16, to_uc16};
+      code_.Add(RegExpInstruction::ConsumeRange(range), zone_);
+    });
+    return nullptr;
+  }
+
+  void* VisitAtom(RegExpAtom* node, void*) override {
+    for (uc16 c : node->data()) {
+      code_.Add(
+          RegExpInstruction::ConsumeRange(RegExpInstruction::Uc16Range{c, c}),
+          zone_);
+    }
+    return nullptr;
+  }
+
+  void* VisitQuantifier(RegExpQuantifier* node, void*) override {
+    // TODO(mbid,v8:10765): For now we support a quantifier of the form /x*/,
+    // i.e. greedy match of any number of /x/.  See also the comment in
+    // `CanBeHandledVisitor::VisitQuantifier`.
+    DCHECK_EQ(node->min(), 0);
+    DCHECK_EQ(node->max(), RegExpTree::kInfinity);
+    DCHECK(node->is_greedy());
+
+    // The repetition of /x/ is compiled into
+    //
+    //   a: FORK d
+    //   b: <x>
+    //   c: JMP a
+    //   d: ...
+    //
+    // Note that a FORKed thread has lower priority than the main thread, so
+    // this will indeed match greedily.
+
+    int initial_fork_index = code_.length();
+    // The FORK's address is patched once we're done.
+    code_.Add(RegExpInstruction::Fork(-1), zone_);
+    node->body()->Accept(this, nullptr);
+    code_.Add(RegExpInstruction::Jmp(initial_fork_index), zone_);
+    int end_index = code_.length();
+    code_[initial_fork_index].payload.pc = end_index;
+    return nullptr;
+  }
+
+  void* VisitCapture(RegExpCapture* node, void*) override {
+    // TODO(mbid,v8:10765): Support this case.
+    UNREACHABLE();
+  }
+
+  void* VisitGroup(RegExpGroup* node, void*) override {
+    node->body()->Accept(this, nullptr);
+    return nullptr;
+  }
+
+  void* VisitLookaround(RegExpLookaround* node, void*) override {
+    // TODO(mbid,v8:10765): Support this case.
+    UNREACHABLE();
+  }
+
+  void* VisitBackReference(RegExpBackReference* node, void*) override {
+    UNREACHABLE();
+  }
+
+  void* VisitEmpty(RegExpEmpty* node, void*) override { return nullptr; }
+
+  void* VisitText(RegExpText* node, void*) override {
+    for (TextElement& text_el : *node->elements()) {
+      text_el.tree()->Accept(this, nullptr);
+    }
+    return nullptr;
+  }
+
+ private:
+  Zone* zone_;
+  ZoneList<RegExpInstruction> code_;
+};
+
+}  // namespace
+
+ZoneList<RegExpInstruction> ExperimentalRegExpCompiler::Compile(
+    RegExpTree* tree, JSRegExp::Flags flags, Zone* zone) {
+  return CompileVisitor::Compile(tree, flags, zone);
+}
+
+}  // namespace internal
+}  // namespace v8
--- a/src/regexp/experimental/experimental-compiler.h
+++ b/src/regexp/experimental/experimental-compiler.h
@ -0,0 +1,33 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_COMPILER_H_
+#define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_COMPILER_H_
+
+#include "src/regexp/experimental/experimental-bytecode.h"
+#include "src/regexp/regexp-ast.h"
+#include "src/zone/zone-list.h"
+
+namespace v8 {
+namespace internal {
+
+class ExperimentalRegExpCompiler final : public AllStatic {
+ public:
+  // Checks whether a given RegExpTree can be compiled into an experimental
+  // bytecode program.  This mostly amounts to the absence of back references,
+  // but see the definition.
+  // TODO(mbid,v8:10765): Currently more things are not handled, e.g. some
+  // quantifiers and unicode.
+  static bool CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, Zone* zone);
+  // Compile regexp into a bytecode program.  The regexp must be handlable by
+  // the experimental engine; see`CanBeHandled`.  The program is returned as a
+  // ZoneList backed by the same Zone that is used in the RegExpTree argument.
+  static ZoneList<RegExpInstruction> Compile(RegExpTree* tree,
+                                             JSRegExp::Flags flags, Zone* zone);
+};
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_COMPILER_H_
--- a/src/regexp/experimental/experimental-interpreter.cc
+++ b/src/regexp/experimental/experimental-interpreter.cc
@ -0,0 +1,349 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/regexp/experimental/experimental-interpreter.h"
+
+#include "src/base/optional.h"
+#include "src/base/small-vector.h"
+
+namespace v8 {
+namespace internal {
+
+using MatchRange = ExperimentalRegExpInterpreter::MatchRange;
+
+namespace {
+
+template <class Character>
+class NfaInterpreter {
+  // Executes a bytecode program in breadth-first mode, without backtracking.
+  // `Character` can be instantiated with `uint8_t` or `uc16` for one byte or
+  // two byte input strings.
+  //
+  // In contrast to the backtracking implementation, this has linear time
+  // complexity in the length of the input string. Breadth-first mode means
+  // that threads are executed in lockstep with respect to their input
+  // position, i.e. the threads share a common input index.  This is similar
+  // to breadth-first simulation of a non-deterministic finite automaton (nfa),
+  // hence the name of the class.
+  //
+  // To follow the semantics of a backtracking VM implementation, we have to be
+  // careful about whether we stop execution when a thread executes ACCEPT.
+  // For example, consider execution of the bytecode generated by the regexp
+  //
+  //   r = /abc|..|[a-c]{10,}/
+  //
+  // on input "abcccccccccccccc".  Clearly the three alternatives
+  // - /abc/
+  // - /../
+  // - /[a-c]{10,}/
+  // all match this input.  A backtracking implementation will report "abc" as
+  // match, because it explores the first alternative before the others.
+  //
+  // However, if we execute breadth first, then we execute the 3 threads
+  // - t1, which tries to match /abc/
+  // - t2, which tries to match /../
+  // - t3, which tries to match /[a-c]{10,}/
+  // in lockstep i.e. by iterating over the input and feeding all threads one
+  // character at a time.  t2 will execute an ACCEPT after two characters,
+  // while t1 will only execute ACCEPT after three characters. Thus we find a
+  // match for the second alternative before a match of the first alternative.
+  //
+  // This shows that we cannot always stop searching as soon as some thread t
+  // executes ACCEPT:  If there is a thread u with higher priority than t, then
+  // it must be finished first.  If u produces a match, then we can discard the
+  // match of t because matches produced by threads with higher priority are
+  // preferred over matches of threads with lower priority.  On the other hand,
+  // we are allowed to abort all threads with lower priority than t if t
+  // produces a match: Such threads can only produce worse matches.  In the
+  // example above, we can abort t3 after two characters because of t2's match.
+  //
+  // Thus the interpreter keeps track of a priority-ordered list of threads.
+  // If a thread ACCEPTs, all threads with lower priority are discarded, and
+  // the search continues with the threads with higher priority.  If no threads
+  // with high priority are left, we return the match that was produced by the
+  // ACCEPTing thread with highest priority.
+ public:
+  NfaInterpreter(Vector<const RegExpInstruction> bytecode,
+                 Vector<const Character> input, int32_t input_index)
+      : bytecode_(bytecode),
+        input_(input),
+        input_index_(input_index),
+        pc_last_input_index_(bytecode.size()),
+        active_threads_(),
+        blocked_threads_(),
+        best_match_(base::nullopt) {
+    DCHECK(!bytecode_.empty());
+    DCHECK_GE(input_index_, 0);
+    DCHECK_LE(input_index_, input_.length());
+
+    std::fill(pc_last_input_index_.begin(), pc_last_input_index_.end(), -1);
+  }
+
+  // Finds up to `max_match_num` matches and writes their boundaries to
+  // `matches_out`.  The search begins at the current input index.  Returns the
+  // number of matches found.
+  int FindMatches(MatchRange* matches_out, int max_match_num) {
+    int match_num;
+    for (match_num = 0; match_num != max_match_num; ++match_num) {
+      base::Optional<MatchRange> match = FindNextMatch();
+      if (!match.has_value()) {
+        break;
+      }
+
+      matches_out[match_num] = *match;
+      SetInputIndex(match->end);
+    }
+    return match_num;
+  }
+
+ private:
+  // The state of a "thread" executing experimental regexp bytecode.  (Not to
+  // be confused with an OS thread.)
+  struct InterpreterThread {
+    // This thread's program counter, i.e. the index within `bytecode_` of the
+    // next instruction to be executed.
+    int32_t pc;
+    // The index in the input string where this thread started executing.
+    int32_t match_begin;
+  };
+
+  // Change the current input index for future calls to `FindNextMatch`.
+  void SetInputIndex(int new_input_index) {
+    DCHECK_GE(input_index_, 0);
+    DCHECK_LE(input_index_, input_.length());
+
+    input_index_ = new_input_index;
+  }
+
+  // Find the next match, begin search at input_index_;
+  base::Optional<MatchRange> FindNextMatch() {
+    DCHECK(active_threads_.empty());
+    // TODO(mbid,v8:10765): Can we get around resetting `pc_last_input_index_`
+    // here? As long as
+    //
+    //   pc_last_input_index_[pc] < input_index_
+    //
+    // for all possible program counters pc that are reachable without input
+    // from pc = 0 and
+    //
+    //   pc_last_input_index_[k] <= input_index_
+    //
+    // for all k > 0 hold I think everything should be fine.  Maybe we can do
+    // something about this in `SetInputIndex`.
+    std::fill(pc_last_input_index_.begin(), pc_last_input_index_.end(), -1);
+
+    DCHECK(blocked_threads_.empty());
+    DCHECK(active_threads_.empty());
+    DCHECK_EQ(best_match_, base::nullopt);
+
+    // All threads start at bytecode 0.
+    PushActiveThreadUnchecked(InterpreterThread{0, input_index_});
+    // Run the initial thread, potentially forking new threads, until every
+    // thread is blocked without further input.
+    RunActiveThreads();
+
+    // We stop if one of the following conditions hold:
+    // - We have exhausted the entire input.
+    // - We have found a match at some point, and there are no remaining
+    //   threads with higher priority than the thread that produced the match.
+    //   Threads with low priority have been aborted earlier, and the remaining
+    //   threads are blocked here, so the latter simply means that
+    //   `blocked_threads_` is empty.
+    while (input_index_ != input_.length() &&
+           !(best_match_.has_value() && blocked_threads_.empty())) {
+      DCHECK(active_threads_.empty());
+      uc16 input_char = input_[input_index_];
+      ++input_index_;
+
+      // If we haven't found a match yet, we add a thread with least priority
+      // that attempts a match starting after `input_char`.
+      if (!best_match_.has_value()) {
+        active_threads_.emplace_back(InterpreterThread{0, input_index_});
+      }
+
+      // We unblock all blocked_threads_ by feeding them the input char.
+      FlushBlockedThreads(input_char);
+
+      // Run all threads until they block or accept.
+      RunActiveThreads();
+    }
+
+    // Clean up the data structures we used.
+    base::Optional<MatchRange> result = best_match_;
+    best_match_ = base::nullopt;
+    blocked_threads_.clear();
+    active_threads_.clear();
+
+    return result;
+  }
+
+  // Run an active thread `t` until it executes a CONSUME_RANGE or ACCEPT
+  // instruction, or its PC value was already processed.
+  // - If processing of `t` can't continue because of CONSUME_RANGE, it is
+  //   pushed on `blocked_threads_`.
+  // - If `t` executes ACCEPT, set `best_match` according to `t.match_begin` and
+  //   the current input index. All remaining `active_threads_` are discarded.
+  void RunActiveThread(InterpreterThread t) {
+    while (true) {
+      RegExpInstruction inst = bytecode_[t.pc];
+      switch (inst.opcode) {
+        case RegExpInstruction::CONSUME_RANGE: {
+          blocked_threads_.emplace_back(t);
+          return;
+        }
+        case RegExpInstruction::FORK: {
+          InterpreterThread fork = t;
+          fork.pc = inst.payload.pc;
+          ++t.pc;
+
+          // t has higher priority than fork.  If t.pc hasn't been processed,we
+          // push fork on the active_thread_ stack and continue directly with
+          // t.  Otherwise we continue directly with fork if possible.
+          if (!IsPcProcessed(t.pc)) {
+            MarkPcProcessed(t.pc);
+            PushActiveThread(fork);
+            break;
+          } else if (!IsPcProcessed(fork.pc)) {
+            t = fork;
+            MarkPcProcessed(t.pc);
+            break;
+          }
+          return;
+        }
+        case RegExpInstruction::JMP:
+          t.pc = inst.payload.pc;
+          if (IsPcProcessed(t.pc)) return;
+          MarkPcProcessed(t.pc);
+          break;
+        case RegExpInstruction::ACCEPT:
+          best_match_ = MatchRange{t.match_begin, input_index_};
+          active_threads_.clear();
+          return;
+      }
+    }
+  }
+
+  // Run each active thread until it can't continue without further input.
+  // `active_threads_` is empty afterwards.  `blocked_threads_` are sorted from
+  // low to high priority.
+  void RunActiveThreads() {
+    while (!active_threads_.empty()) {
+      InterpreterThread t = active_threads_.back();
+      active_threads_.pop_back();
+      RunActiveThread(t);
+    }
+  }
+
+  // Unblock all blocked_threads_ by feeding them an `input_char`.  Should only
+  // be called with `input_index_` pointing to the character *after*
+  // `input_char` so that `pc_last_input_index_` is updated correctly.
+  void FlushBlockedThreads(uc16 input_char) {
+    // The threads in blocked_threads_ are sorted from high to low priority,
+    // but active_threads_ needs to be sorted from low to high priority, so we
+    // need to activate blocked threads in reverse order.
+    //
+    // TODO(mbid,v8:10765): base::SmallVector doesn't support `rbegin()` and
+    // `rend()`, should we implement that instead of this awkward iteration?
+    // Maybe we could at least use an int i and check for i >= 0, but
+    // SmallVectors don't have length() methods.
+    for (size_t i = blocked_threads_.size(); i > 0; --i) {
+      InterpreterThread t = blocked_threads_[i - 1];
+      RegExpInstruction inst = bytecode_[t.pc];
+      DCHECK_EQ(inst.opcode, RegExpInstruction::CONSUME_RANGE);
+      RegExpInstruction::Uc16Range range = inst.payload.consume_range;
+      if (input_char >= range.min && input_char <= range.max) {
+        ++t.pc;
+        PushActiveThreadUnchecked(t);
+      }
+    }
+    blocked_threads_.clear();
+  }
+
+  // It is redundant to have two threads t, t0 execute at the same PC value,
+  // because one of t, t0 matches iff the other does.  We can thus discard
+  // the one with lower priority.  We check whether a thread executed at some
+  // PC value by recording for every possible value of PC what the value of
+  // input_index_ was the last time a thread executed at PC. If a thread
+  // tries to continue execution at a PC value that we have seen before at
+  // the current input index, we abort it. (We execute threads with higher
+  // priority first, so the second thread is guaranteed to have lower
+  // priority.)
+  //
+  // Check whether we've seen an active thread with a given pc value since the
+  // last increment of `input_index_`.
+  bool IsPcProcessed(int pc) {
+    DCHECK_LE(pc_last_input_index_[pc], input_index_);
+    return pc_last_input_index_[pc] == input_index_;
+  }
+
+  // Mark a pc as having been processed since the last increment of
+  // `input_index_`.
+  void MarkPcProcessed(int pc) {
+    DCHECK_LE(pc_last_input_index_[pc], input_index_);
+    pc_last_input_index_[pc] = input_index_;
+  }
+
+  // Functions to push a thread `t` onto the list of active threads, but only
+  // if `t.pc` was not already the pc of some other thread at the current
+  // subject index.
+  void PushActiveThreadUnchecked(InterpreterThread t) {
+    DCHECK(!IsPcProcessed(t.pc));
+
+    MarkPcProcessed(t.pc);
+    active_threads_.emplace_back(t);
+  }
+  void PushActiveThread(InterpreterThread t) {
+    if (IsPcProcessed(t.pc)) {
+      return;
+    }
+    PushActiveThreadUnchecked(t);
+  }
+
+  Vector<const RegExpInstruction> bytecode_;
+  Vector<const Character> input_;
+  int input_index_;
+
+  // TODO(mbid,v8:10765): The following `SmallVector`s have somehwat
+  // arbitrarily chosen small capacity sizes; should benchmark to find a good
+  // value.
+
+  // pc_last_input_index_[k] records the value of input_index_ the last
+  // time a thread t such that t.pc == k was activated, i.e. put on
+  // active_threads_.  Thus pc_last_input_index.size() == bytecode.size().  See
+  // also `RunActiveThread`.
+  base::SmallVector<int, 64> pc_last_input_index_;
+
+  // Active threads can potentially (but not necessarily) continue without
+  // input.  Sorted from low to high priority.
+  base::SmallVector<InterpreterThread, 64> active_threads_;
+
+  // The pc of a blocked thread points to an instruction that consumes a
+  // character. Sorted from high to low priority (so the opposite of
+  // `active_threads_`).
+  base::SmallVector<InterpreterThread, 64> blocked_threads_;
+
+  // The best match found so far during the current search.  If several threads
+  // ACCEPTed, then this will be the match of the accepting thread with highest
+  // priority.
+  base::Optional<MatchRange> best_match_;
+};
+
+}  // namespace
+
+int ExperimentalRegExpInterpreter::FindMatchesNfaOneByte(
+    Vector<const RegExpInstruction> bytecode, Vector<const uint8_t> input,
+    int start_index, MatchRange* matches_out, int max_match_num) {
+  NfaInterpreter<uint8_t> interpreter(bytecode, input, start_index);
+  return interpreter.FindMatches(matches_out, max_match_num);
+}
+
+int ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte(
+    Vector<const RegExpInstruction> bytecode, Vector<const uc16> input,
+    int start_index, MatchRange* matches_out, int max_match_num) {
+  NfaInterpreter<uc16> interpreter(bytecode, input, start_index);
+  return interpreter.FindMatches(matches_out, max_match_num);
+}
+
+}  // namespace internal
+}  // namespace v8
--- a/src/regexp/experimental/experimental-interpreter.h
+++ b/src/regexp/experimental/experimental-interpreter.h
@ -0,0 +1,40 @@
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_
+#define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_
+
+#include "src/regexp/experimental/experimental-bytecode.h"
+#include "src/utils/vector.h"
+
+namespace v8 {
+namespace internal {
+
+class ExperimentalRegExpInterpreter final : public AllStatic {
+ public:
+  // A half-open range in an a string denoting a (sub)match.  Used to access
+  // output registers of regexp execution grouped by [begin, end) pairs.
+  struct MatchRange {
+    int32_t begin;  // inclusive
+    int32_t end;    // exclusive
+  };
+
+  // Executes a bytecode program in breadth-first NFA mode, without
+  // backtracking, to find matching substrings.  Trys to find up to
+  // `max_match_num` matches in `input`, starting at `start_index`.  Returns
+  // the actual number of matches found.  The boundaires of matching subranges
+  // are written to `matches_out`.  Provided in variants for one-byte and
+  // two-byte strings.
+  static int FindMatchesNfaOneByte(Vector<const RegExpInstruction> bytecode,
+                                   Vector<const uint8_t> input, int start_index,
+                                   MatchRange* matches_out, int max_match_num);
+  static int FindMatchesNfaTwoByte(Vector<const RegExpInstruction> bytecode,
+                                   Vector<const uc16> input, int start_index,
+                                   MatchRange* matches_out, int max_match_num);
+};
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_
--- a/src/regexp/experimental/experimental.cc
+++ b/src/regexp/experimental/experimental.cc
@ -4,157 +4,18 @@

 #include "src/regexp/experimental/experimental.h"

-#include <iomanip>
-#include <ios>
-
-#include "src/base/optional.h"
-#include "src/base/small-vector.h"
 #include "src/objects/js-regexp-inl.h"
-#include "src/regexp/regexp-ast.h"
+#include "src/regexp/experimental/experimental-compiler.h"
+#include "src/regexp/experimental/experimental-interpreter.h"
 #include "src/regexp/regexp-parser.h"
 #include "src/utils/ostreams.h"

 namespace v8 {
 namespace internal {

-namespace {
-
-// TODO(mbid, v8:10765): Currently the experimental engine doesn't support
-// UTF-16, but this shouldn't be too hard to implement.
-constexpr uc32 kMaxSupportedCodepoint = 0xFFFFu;
-
-class CanBeHandledVisitor final : private RegExpVisitor {
-  // Visitor to implement `ExperimentalRegExp::CanBeHandled`.
- public:
-  static bool Check(RegExpTree* node, JSRegExp::Flags flags, Zone* zone) {
-    if (!AreSuitableFlags(flags)) {
-      return false;
-    }
-    CanBeHandledVisitor visitor(zone);
-    node->Accept(&visitor, nullptr);
-    return visitor.result_;
-  }
-
- private:
-  explicit CanBeHandledVisitor(Zone* zone) : zone_(zone) {}
-
-  static bool AreSuitableFlags(JSRegExp::Flags flags) {
-    // TODO(mbid, v8:10765): We should be able to support all flags in the
-    // future.
-    static constexpr JSRegExp::Flags allowed_flags = JSRegExp::kGlobal;
-    return (flags & ~allowed_flags) == 0;
-  }
-
-  void* VisitDisjunction(RegExpDisjunction* node, void*) override {
-    for (RegExpTree* alt : *node->alternatives()) {
-      alt->Accept(this, nullptr);
-      if (!result_) {
-        return nullptr;
-      }
-    }
-    return nullptr;
-  }
-
-  void* VisitAlternative(RegExpAlternative* node, void*) override {
-    for (RegExpTree* child : *node->nodes()) {
-      child->Accept(this, nullptr);
-      if (!result_) {
-        return nullptr;
-      }
-    }
-    return nullptr;
-  }
-
-  void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
-    result_ = result_ && AreSuitableFlags(node->flags());
-    for (CharacterRange r : *node->ranges(zone_)) {
-      // TODO(mbid, v8:10765): We don't support full unicode yet, so we only
-      // allow character ranges that can be specified with two-byte characters.
-      if (r.to() > kMaxSupportedCodepoint) {
-        result_ = false;
-        return nullptr;
-      }
-    }
-    return nullptr;
-  }
-
-  void* VisitAssertion(RegExpAssertion* node, void*) override {
-    // TODO(mbid, v8:10765): We should be able to support at least some
-    // assertions. re2 does, too.
-    result_ = false;
-    return nullptr;
-  }
-
-  void* VisitAtom(RegExpAtom* node, void*) override {
-    result_ = result_ && AreSuitableFlags(node->flags());
-    return nullptr;
-  }
-
-  void* VisitText(RegExpText* node, void*) override {
-    for (TextElement& el : *node->elements()) {
-      el.tree()->Accept(this, nullptr);
-      if (!result_) {
-        return nullptr;
-      }
-    }
-    return nullptr;
-  }
-
-  void* VisitQuantifier(RegExpQuantifier* node, void*) override {
-    // TODO(mbid, v8:10765): Theoretically we can support arbitrary min() and
-    // max(), but the size of the automaton grows linearly with finite max().
-    // We probably want a cut-off value here, or maybe we can "virtualize" the
-    // repetitions.
-    // Non-greedy quantifiers are easy to implement, but not supported atm.
-    // It's not clear to me how a possessive quantifier would be implemented,
-    // we should check whether re2 supports this.
-    result_ = result_ && node->min() == 0 &&
-              node->max() == RegExpTree::kInfinity && node->is_greedy();
-    if (!result_) {
-      return nullptr;
-    }
-    node->body()->Accept(this, nullptr);
-    return nullptr;
-  }
-
-  void* VisitCapture(RegExpCapture* node, void*) override {
-    // TODO(mbid, v8:10765): This can be implemented with the NFA interpreter,
-    // but not with the lazy DFA.  See also re2.
-    result_ = false;
-    return nullptr;
-  }
-
-  void* VisitGroup(RegExpGroup* node, void*) override {
-    node->body()->Accept(this, nullptr);
-    return nullptr;
-  }
-
-  void* VisitLookaround(RegExpLookaround* node, void*) override {
-    // TODO(mbid, v8:10765): This will be hard to support, but not impossible I
-    // think.  See product automata.
-    result_ = false;
-    return nullptr;
-  }
-
-  void* VisitBackReference(RegExpBackReference* node, void*) override {
-    // This can't be implemented without backtracking.
-    result_ = false;
-    return nullptr;
-  }
-
-  void* VisitEmpty(RegExpEmpty* node, void*) override { return nullptr; }
-
- private:
-  bool result_ = true;
-  Zone* zone_;
-};
-
-}  // namespace
-
 bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
                                      Zone* zone) {
-  DCHECK(FLAG_enable_experimental_regexp_engine);
-  return CanBeHandledVisitor::Check(tree, flags, zone);
+  return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, zone);
 }

 void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
@ -182,423 +43,6 @@ bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
         Smi::FromInt(JSRegExp::kUninitializedValue);
 }

-// ----------------------------------------------------------------------------
-// Definition and semantics of the EXPERIMENTAL bytecode.
-// Background:
-// - Russ Cox's blog post series on regular expression matching, in particular
-//   https://swtch.com/~rsc/regexp/regexp2.html
-// - The re2 regular regexp library: https://github.com/google/re2
-//
-// This comment describes the bytecode used by the experimental regexp engine
-// and its abstract semantics in terms of a VM.  An implementation of the
-// semantics that avoids exponential runtime can be found in `NfaInterpreter`.
-//
-// The experimental bytecode describes a non-deterministic finite automaton. It
-// runs on a multithreaded virtual machine (VM), i.e. in several threads
-// concurrently.  (These "threads" don't need to be actual operating system
-// threads.)  Apart from a list of threads, the VM maintains an immutable
-// shared input string which threads can read from.  Each thread is given by a
-// program counter (PC, index of the current instruction), a fixed number of
-// registers of indices into the input string, and a monotonically increasing
-// index which represents the current position within the input string.
-//
-// For the precise encoding of the instruction set, see the definition `struct
-// RegExpInstruction` below.  Currently we support the following instructions:
-// - CONSUME_RANGE: Check whether the codepoint of the current character is
-//   contained in a non-empty closed interval [min, max] specified in the
-//   instruction payload.  Abort this thread if false, otherwise advance the
-//   input position by 1 and continue with the next instruction.
-// - ACCEPT: Stop this thread and signify the end of a match at the current
-//   input position.
-// - FORK: If executed by a thread t, spawn a new thread t0 whose register
-//   values and input position agree with those of t, but whose PC value is set
-//   to the value specified in the instruction payload.  The register values of
-//   t and t0 agree directly after the FORK, but they can diverge.  Thread t
-//   continues with the instruction directly after the current FORK
-//   instruction.
-// - JMP: Instead of incrementing the PC value after execution of this
-//   instruction by 1, set PC of this thread to the value specified in the
-//   instruction payload and continue there.
-//
-// Special care must be exercised with respect to thread priority.  It is
-// possible that more than one thread executes an ACCEPT statement.  The output
-// of the program is given by the contents of the matching thread's registers,
-// so this is ambiguous in case of multiple matches.  To resolve the ambiguity,
-// every implementation of the VM  must output the match that a backtracking
-// implementation would output (i.e. behave the same as Irregexp).
-//
-// A backtracking implementation of the VM maintains a stack of postponed
-// threads.  Upon encountering a FORK statement, this VM will create a copy of
-// the current thread, set the copy's PC value according to the instruction
-// payload, and push it to the stack of postponed threads.  The VM will then
-// continue execution of the current thread.
-//
-// If at some point a thread t executes a MATCH statement, the VM stops and
-// outputs the registers of t.  Postponed threads are discarded.  On the other
-// hand, if a thread t is aborted because some input character didn't pass a
-// check, then the VM pops the topmost postponed thread and continues execution
-// with this thread.  If there are no postponed threads, then the VM outputs
-// failure, i.e. no matches.
-//
-// Equivalently, we can describe the behavior of the backtracking VM in terms
-// of priority: Threads are linearly ordered by priority, and matches generated
-// by threads with high priority must be preferred over matches generated by
-// threads with low priority, regardless of the chronological order in which
-// matches were found.  If a thread t executes a FORK statement and spawns a
-// thread t0, then the priority of t0 is such that the following holds:
-// * t0 < t, i.e. t0 has lower priority than t.
-// * For all threads u such that u != t and u != t0, we have t0 < u iff t < u,
-//   i.e. the t0 compares to other threads the same as t.
-// For example, if there are currently 3 threads s, t, u such that s < t < u,
-// then after t executes a fork, the thread priorities will be s < t0 < t < u.
-
-namespace {
-
-struct Uc16Range {
-  uc16 min;  // Inclusive.
-  uc16 max;  // Inclusive.
-};
-
-// Bytecode format.
-// Currently very simple fixed-size: The opcode is encoded in the first 4
-// bytes, the payload takes another 4 bytes.
-struct RegExpInstruction {
-  enum Opcode : int32_t {
-    CONSUME_RANGE,
-    FORK,
-    JMP,
-    ACCEPT,
-  };
-
-  static RegExpInstruction ConsumeRange(Uc16Range consume_range) {
-    RegExpInstruction result;
-    result.opcode = CONSUME_RANGE;
-    result.payload.consume_range = consume_range;
-    return result;
-  }
-
-  static RegExpInstruction Fork(int32_t alt_index) {
-    RegExpInstruction result;
-    result.opcode = FORK;
-    result.payload.pc = alt_index;
-    return result;
-  }
-
-  static RegExpInstruction Jmp(int32_t alt_index) {
-    RegExpInstruction result;
-    result.opcode = JMP;
-    result.payload.pc = alt_index;
-    return result;
-  }
-
-  static RegExpInstruction Accept() {
-    RegExpInstruction result;
-    result.opcode = ACCEPT;
-    return result;
-  }
-
-  Opcode opcode;
-  union {
-    // Payload of CONSUME_RANGE:
-    Uc16Range consume_range;
-    // Payload of FORK and JMP, the next/forked program counter (pc):
-    int32_t pc;
-  } payload;
-  STATIC_ASSERT(sizeof(payload) == 4);
-};
-STATIC_ASSERT(sizeof(RegExpInstruction) == 8);
-// TODO(mbid,v8:10765): This is rather wasteful.  We can fit the opcode in 2-3
-// bits, so the remaining 29/30 bits can be used as payload.  Problem: The
-// payload of CONSUME_RANGE consists of two 16-bit values `min` and `max`, so
-// this wouldn't fit.  We could encode the payload of a CONSUME_RANGE
-// instruction by the start of the interval and its length instead, and then
-// only allows lengths that fit into 14/13 bits.  A longer range can then be
-// encoded as a disjunction of smaller ranges.
-//
-// Another thought: CONSUME_RANGEs are only valid if the payloads are such that
-// min <= max. Thus there are
-//
-//     2^16 + 2^16 - 1 + ... + 1
-//   = 2^16 * (2^16 + 1) / 2
-//   = 2^31 + 2^15
-//
-// valid payloads for a CONSUME_RANGE instruction.  If we want to fit
-// instructions into 4 bytes, we would still have almost 2^31 instructions left
-// over if we encode everything as tight as possible.  For example, we could
-// use another 2^29 values for JMP, another 2^29 for FORK, 1 value for ACCEPT,
-// and then still have almost 2^30 instructions left over for something like
-// zero-width assertions and captures.
-
-std::ostream& PrintAsciiOrHex(std::ostream& os, uc16 c) {
-  if (c < 128 && std::isprint(c)) {
-    os << static_cast<char>(c);
-  } else {
-    os << "0x" << std::hex << static_cast<int>(c);
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const RegExpInstruction& inst) {
-  switch (inst.opcode) {
-    case RegExpInstruction::CONSUME_RANGE: {
-      os << "CONSUME_RANGE [";
-      PrintAsciiOrHex(os, inst.payload.consume_range.min);
-      os << ", ";
-      PrintAsciiOrHex(os, inst.payload.consume_range.max);
-      os << "]";
-      break;
-    }
-    case RegExpInstruction::FORK:
-      os << "FORK " << inst.payload.pc;
-      break;
-    case RegExpInstruction::JMP:
-      os << "JMP " << inst.payload.pc;
-      break;
-    case RegExpInstruction::ACCEPT:
-      os << "ACCEPT";
-      break;
-  }
-  return os;
-}
-
-// The maximum number of digits required to display a non-negative number < n
-// in base 10.
-int DigitsRequiredBelow(int n) {
-  DCHECK_GE(n, 0);
-
-  int result = 1;
-  for (int i = 10; i < n; i *= 10) {
-    result += 1;
-  }
-  return result;
-}
-
-std::ostream& operator<<(std::ostream& os,
-                         Vector<const RegExpInstruction> insts) {
-  int inst_num = insts.length();
-  int line_digit_num = DigitsRequiredBelow(inst_num);
-
-  for (int i = 0; i != inst_num; ++i) {
-    const RegExpInstruction& inst = insts[i];
-    os << std::setfill('0') << std::setw(line_digit_num) << i << ": " << inst
-       << std::endl;
-  }
-  return os;
-}
-
-Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
-  RegExpInstruction* inst_begin =
-      reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
-  int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
-  DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
-  return Vector<RegExpInstruction>(inst_begin, inst_num);
-}
-
-class Compiler : private RegExpVisitor {
- public:
-  static Handle<ByteArray> Compile(RegExpTree* tree, Isolate* isolate,
-                                   Zone* zone) {
-    Compiler compiler(zone);
-
-    tree->Accept(&compiler, nullptr);
-    compiler.code_.Add(RegExpInstruction::Accept(), zone);
-
-    int byte_length = sizeof(RegExpInstruction) * compiler.code_.length();
-    Handle<ByteArray> array = isolate->factory()->NewByteArray(byte_length);
-    MemCopy(array->GetDataStartAddress(), compiler.code_.begin(), byte_length);
-
-    return array;
-  }
-
- private:
-  // TODO(mbid,v8:10765): Use some upper bound for code_ capacity computed from
-  // the `tree` size we're going to compile?
-  explicit Compiler(Zone* zone) : zone_(zone), code_(0, zone) {}
-
-  // Generate a disjunction of code fragments compiled by a function `alt_gen`.
-  // `alt_gen` is called repeatedly with argument `int i = 0, 1, ..., alt_num -
-  // 1` and should push code corresponding to the ith alternative onto `code_`.
-  template <class F>
-  void CompileDisjunction(int alt_num, F gen_alt) {
-    // An alternative a0 | a1 | a2 is compiled into
-    //   FORK <a2>
-    //   FORK <a1>
-    //   <a0>
-    //   JMP $end
-    //   <a1>
-    //   JMP $end
-    //   <a2>
-    // where $end is the index of the next instruction after <a2>.
-    //
-    // By the semantics of the FORK instruction (see above at definition and
-    // semantics), the forked thread has lower priority than the current
-    // thread.  This means that with the code we're generating here, the thread
-    // matching the alternative a0 is indeed the thread with the highest
-    // priority, followed by the thread for a1 and so on.
-
-    if (alt_num == 0) {
-      return;
-    }
-
-    // Record the index of the first of the alt_num - 1 fork instructions in the
-    // beginning.
-    int forks_begin = code_.length();
-    // Add FORKs to alts[alt_num - 1], alts[alt_num - 2], ..., alts[1].
-    for (int i = alt_num - 1; i != 0; --i) {
-      // The FORK's address is patched once we know the address of the ith
-      // alternative.
-      code_.Add(RegExpInstruction::Fork(-1), zone_);
-    }
-
-    // List containing the index of the final JMP instruction after each
-    // alternative but the last one.
-    ZoneList<int> jmp_indices(alt_num - 1, zone_);
-
-    for (int i = 0; i != alt_num; ++i) {
-      if (i != 0) {
-        // If this is not the first alternative, we have to patch the
-        // corresponding FORK statement in the beginning.
-        code_[forks_begin + alt_num - 1 - i].payload.pc = code_.length();
-      }
-      gen_alt(i);
-      if (i != alt_num - 1) {
-        // If this is not the last alternative, we have to emit a JMP past the
-        // remaining alternatives.  We don't know this address yet, so we have
-        // to patch patch it once all alternatives are emitted.
-        jmp_indices.Add(code_.length(), zone_);
-        code_.Add(RegExpInstruction::Jmp(-1), zone_);
-      }
-    }
-
-    // All alternatives are emitted.  Now we can patch the JMP instruction
-    // after each but the last alternative.
-    int end_index = code_.length();
-    for (int jmp_index : jmp_indices) {
-      code_[jmp_index].payload.pc = end_index;
-    }
-  }
-
-  void* VisitDisjunction(RegExpDisjunction* node, void*) override {
-    ZoneList<RegExpTree*>& alts = *node->alternatives();
-    CompileDisjunction(alts.length(),
-                       [&](int i) { alts[i]->Accept(this, nullptr); });
-    return nullptr;
-  }
-
-  void* VisitAlternative(RegExpAlternative* node, void*) override {
-    for (RegExpTree* child : *node->nodes()) {
-      child->Accept(this, nullptr);
-    }
-    return nullptr;
-  }
-
-  void* VisitAssertion(RegExpAssertion* node, void*) override {
-    // TODO(mbid,v8:10765): Support this case.
-    UNREACHABLE();
-  }
-
-  void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
-    // A character class is compiled as Disjunction over its `CharacterRange`s.
-    ZoneList<CharacterRange>* ranges = node->ranges(zone_);
-    CharacterRange::Canonicalize(ranges);
-    if (node->is_negated()) {
-      // Capacity 2 for the common case where we compute the complement of a
-      // single interval range that doesn't contain 0 and kMaxCodePoint.
-      ZoneList<CharacterRange>* negated =
-          zone_->New<ZoneList<CharacterRange>>(2, zone_);
-      CharacterRange::Negate(ranges, negated, zone_);
-      ranges = negated;
-    }
-
-    CompileDisjunction(ranges->length(), [&](int i) {
-      // We don't support utf16 for now, so only ranges that can be specified
-      // by (complements of) ranges with uc16 bounds.
-      STATIC_ASSERT(kMaxSupportedCodepoint <= std::numeric_limits<uc16>::max());
-
-      uc32 from = (*ranges)[i].from();
-      DCHECK_LE(from, kMaxSupportedCodepoint);
-      uc16 from_uc16 = static_cast<uc16>(from);
-
-      uc32 to = (*ranges)[i].to();
-      DCHECK_IMPLIES(to > kMaxSupportedCodepoint, to == String::kMaxCodePoint);
-      uc16 to_uc16 = static_cast<uc16>(std::min(to, kMaxSupportedCodepoint));
-
-      Uc16Range range{from_uc16, to_uc16};
-      code_.Add(RegExpInstruction::ConsumeRange(range), zone_);
-    });
-    return nullptr;
-  }
-
-  void* VisitAtom(RegExpAtom* node, void*) override {
-    for (uc16 c : node->data()) {
-      code_.Add(RegExpInstruction::ConsumeRange(Uc16Range{c, c}), zone_);
-    }
-    return nullptr;
-  }
-
-  void* VisitQuantifier(RegExpQuantifier* node, void*) override {
-    // TODO(mbid,v8:10765): For now we support a quantifier of the form /x*/,
-    // i.e. greedy match of any number of /x/.  See also the comment in
-    // `CanBeHandledVisitor::VisitQuantifier`.
-    DCHECK_EQ(node->min(), 0);
-    DCHECK_EQ(node->max(), RegExpTree::kInfinity);
-    DCHECK(node->is_greedy());
-
-    // The repetition of /x/ is compiled into
-    //
-    //   a: FORK d
-    //   b: <x>
-    //   c: JMP a
-    //   d: ...
-    //
-    // Note that a FORKed thread has lower priority than the main thread, so
-    // this will indeed match greedily.
-
-    int initial_fork_index = code_.length();
-    // The FORK's address is patched once we're done.
-    code_.Add(RegExpInstruction::Fork(-1), zone_);
-    node->body()->Accept(this, nullptr);
-    code_.Add(RegExpInstruction::Jmp(initial_fork_index), zone_);
-    int end_index = code_.length();
-    code_[initial_fork_index].payload.pc = end_index;
-    return nullptr;
-  }
-
-  void* VisitCapture(RegExpCapture* node, void*) override {
-    // TODO(mbid,v8:10765): Support this case.
-    UNREACHABLE();
-  }
-
-  void* VisitGroup(RegExpGroup* node, void*) override {
-    node->body()->Accept(this, nullptr);
-    return nullptr;
-  }
-
-  void* VisitLookaround(RegExpLookaround* node, void*) override {
-    // TODO(mbid,v8:10765): Support this case.
-    UNREACHABLE();
-  }
-
-  void* VisitBackReference(RegExpBackReference* node, void*) override {
-    UNREACHABLE();
-  }
-
-  void* VisitEmpty(RegExpEmpty* node, void*) override { return nullptr; }
-
-  void* VisitText(RegExpText* node, void*) override {
-    for (TextElement& text_el : *node->elements()) {
-      text_el.tree()->Accept(this, nullptr);
-    }
-    return nullptr;
-  }
-
- private:
-  Zone* zone_;
-  ZoneList<RegExpInstruction> code_;
-};
-
-}  // namespace
-
 void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
  DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
 #ifdef VERIFY_HEAP
@ -624,341 +68,32 @@ void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
      RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result);
  CHECK(parse_success);

-  Handle<ByteArray> bytecode =
-      Compiler::Compile(parse_result.tree, isolate, &zone);
-  re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, *bytecode);
-  re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, *bytecode);
+  ZoneList<RegExpInstruction> bytecode =
+      ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
+
+  int byte_length = sizeof(RegExpInstruction) * bytecode.length();
+  Handle<ByteArray> bytecode_byte_array =
+      isolate->factory()->NewByteArray(byte_length);
+  MemCopy(bytecode_byte_array->GetDataStartAddress(), bytecode.begin(),
+          byte_length);
+
+  re->SetDataAt(JSRegExp::kIrregexpLatin1BytecodeIndex, *bytecode_byte_array);
+  re->SetDataAt(JSRegExp::kIrregexpUC16BytecodeIndex, *bytecode_byte_array);

  Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
  re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
  re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
 }

-namespace {
+Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
+  RegExpInstruction* inst_begin =
+      reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
+  int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
+  DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
+  return Vector<RegExpInstruction>(inst_begin, inst_num);
+}

-// A half-open range in the input string denoting a (sub)match.  Used to access
-// output registers of a regexp match grouped by [begin, end) pairs.
-struct MatchRange {
-  int32_t begin;  // inclusive
-  int32_t end;    // exclusive
-};
-
-template <class Character>
-class NfaInterpreter {
-  // Executes a bytecode program in breadth-first mode, without backtracking.
-  // `Character` can be instantiated with `uint8_t` or `uc16` for one byte or
-  // two byte input strings.
-  //
-  // In contrast to the backtracking implementation, this has linear time
-  // complexity in the length of the input string. Breadth-first mode means
-  // that threads are executed in lockstep with respect to their input
-  // position, i.e. the threads share a common input index.  This is similar
-  // to breadth-first simulation of a non-deterministic finite automaton (nfa),
-  // hence the name of the class.
-  //
-  // To follow the semantics of a backtracking VM implementation, we have to be
-  // careful about whether we stop execution when a thread executes ACCEPT.
-  // For example, consider execution of the bytecode generated by the regexp
-  //
-  //   r = /abc|..|[a-c]{10,}/
-  //
-  // on input "abcccccccccccccc".  Clearly the three alternatives
-  // - /abc/
-  // - /../
-  // - /[a-c]{10,}/
-  // all match this input.  A backtracking implementation will report "abc" as
-  // match, because it explores the first alternative before the others.
-  //
-  // However, if we execute breadth first, then we execute the 3 threads
-  // - t1, which tries to match /abc/
-  // - t2, which tries to match /../
-  // - t3, which tries to match /[a-c]{10,}/
-  // in lockstep i.e. by iterating over the input and feeding all threads one
-  // character at a time.  t2 will execute an ACCEPT after two characters,
-  // while t1 will only execute ACCEPT after three characters. Thus we find a
-  // match for the second alternative before a match of the first alternative.
-  //
-  // This shows that we cannot always stop searching as soon as some thread t
-  // executes ACCEPT:  If there is a thread u with higher priority than t, then
-  // it must be finished first.  If u produces a match, then we can discard the
-  // match of t because matches produced by threads with higher priority are
-  // preferred over matches of threads with lower priority.  On the other hand,
-  // we are allowed to abort all threads with lower priority than t if t
-  // produces a match: Such threads can only produce worse matches.  In the
-  // example above, we can abort t3 after two characters because of t2's match.
-  //
-  // Thus the interpreter keeps track of a priority-ordered list of threads.
-  // If a thread ACCEPTs, all threads with lower priority are discarded, and
-  // the search continues with the threads with higher priority.  If no threads
-  // with high priority are left, we return the match that was produced by the
-  // ACCEPTing thread with highest priority.
- public:
-  NfaInterpreter(Vector<const RegExpInstruction> bytecode,
-                 Vector<const Character> input, int32_t input_index)
-      : bytecode_(bytecode),
-        input_(input),
-        input_index_(input_index),
-        pc_last_input_index_(bytecode.size()),
-        active_threads_(),
-        blocked_threads_(),
-        best_match_(base::nullopt) {
-    DCHECK(!bytecode_.empty());
-    DCHECK_GE(input_index_, 0);
-    DCHECK_LE(input_index_, input_.length());
-
-    std::fill(pc_last_input_index_.begin(), pc_last_input_index_.end(), -1);
-  }
-
-  // Finds up to `max_match_num` matches and writes their boundaries to
-  // `matches_out`.  The search begins at the current input index.  Returns the
-  // number of matches found.
-  int FindMatches(MatchRange* matches_out, int max_match_num) {
-    int match_num;
-    for (match_num = 0; match_num != max_match_num; ++match_num) {
-      base::Optional<MatchRange> match = FindNextMatch();
-      if (!match.has_value()) {
-        break;
-      }
-
-      matches_out[match_num] = *match;
-      SetInputIndex(match->end);
-    }
-    return match_num;
-  }
-
- private:
-  // The state of a "thread" executing experimental regexp bytecode.  (Not to
-  // be confused with an OS thread.)
-  struct InterpreterThread {
-    // This thread's program counter, i.e. the index within `bytecode_` of the
-    // next instruction to be executed.
-    int32_t pc;
-    // The index in the input string where this thread started executing.
-    int32_t match_begin;
-  };
-
-  // Change the current input index for future calls to `FindNextMatch`.
-  void SetInputIndex(int new_input_index) {
-    DCHECK_GE(input_index_, 0);
-    DCHECK_LE(input_index_, input_.length());
-
-    input_index_ = new_input_index;
-  }
-
-  // Find the next match, begin search at input_index_;
-  base::Optional<MatchRange> FindNextMatch() {
-    DCHECK(active_threads_.empty());
-    // TODO(mbid,v8:10765): Can we get around resetting `pc_last_input_index_`
-    // here? As long as
-    //
-    //   pc_last_input_index_[pc] < input_index_
-    //
-    // for all possible program counters pc that are reachable without input
-    // from pc = 0 and
-    //
-    //   pc_last_input_index_[k] <= input_index_
-    //
-    // for all k > 0 hold I think everything should be fine.  Maybe we can do
-    // something about this in `SetInputIndex`.
-    std::fill(pc_last_input_index_.begin(), pc_last_input_index_.end(), -1);
-
-    DCHECK(blocked_threads_.empty());
-    DCHECK(active_threads_.empty());
-    DCHECK_EQ(best_match_, base::nullopt);
-
-    // All threads start at bytecode 0.
-    PushActiveThreadUnchecked(InterpreterThread{0, input_index_});
-    // Run the initial thread, potentially forking new threads, until every
-    // thread is blocked without further input.
-    RunActiveThreads();
-
-    // We stop if one of the following conditions hold:
-    // - We have exhausted the entire input.
-    // - We have found a match at some point, and there are no remaining
-    //   threads with higher priority than the thread that produced the match.
-    //   Threads with low priority have been aborted earlier, and the remaining
-    //   threads are blocked here, so the latter simply means that
-    //   `blocked_threads_` is empty.
-    while (input_index_ != input_.length() &&
-           !(best_match_.has_value() && blocked_threads_.empty())) {
-      DCHECK(active_threads_.empty());
-      uc16 input_char = input_[input_index_];
-      ++input_index_;
-
-      // If we haven't found a match yet, we add a thread with least priority
-      // that attempts a match starting after `input_char`.
-      if (!best_match_.has_value()) {
-        active_threads_.emplace_back(InterpreterThread{0, input_index_});
-      }
-
-      // We unblock all blocked_threads_ by feeding them the input char.
-      FlushBlockedThreads(input_char);
-
-      // Run all threads until they block or accept.
-      RunActiveThreads();
-    }
-
-    // Clean up the data structures we used.
-    base::Optional<MatchRange> result = best_match_;
-    best_match_ = base::nullopt;
-    blocked_threads_.clear();
-    active_threads_.clear();
-
-    return result;
-  }
-
-  // Run an active thread `t` until it executes a CONSUME_RANGE or ACCEPT
-  // instruction, or its PC value was already processed.
-  // - If processing of `t` can't continue because of CONSUME_RANGE, it is
-  //   pushed on `blocked_threads_`.
-  // - If `t` executes ACCEPT, set `best_match` according to `t.match_begin` and
-  //   the current input index. All remaining `active_threads_` are discarded.
-  void RunActiveThread(InterpreterThread t) {
-    while (true) {
-      RegExpInstruction inst = bytecode_[t.pc];
-      switch (inst.opcode) {
-        case RegExpInstruction::CONSUME_RANGE: {
-          blocked_threads_.emplace_back(t);
-          return;
-        }
-        case RegExpInstruction::FORK: {
-          InterpreterThread fork = t;
-          fork.pc = inst.payload.pc;
-          ++t.pc;
-
-          // t has higher priority than fork.  If t.pc hasn't been processed,we
-          // push fork on the active_thread_ stack and continue directly with
-          // t.  Otherwise we continue directly with fork if possible.
-          if (!IsPcProcessed(t.pc)) {
-            MarkPcProcessed(t.pc);
-            PushActiveThread(fork);
-            break;
-          } else if (!IsPcProcessed(fork.pc)) {
-            t = fork;
-            MarkPcProcessed(t.pc);
-            break;
-          }
-          return;
-        }
-        case RegExpInstruction::JMP:
-          t.pc = inst.payload.pc;
-          if (IsPcProcessed(t.pc)) return;
-          MarkPcProcessed(t.pc);
-          break;
-        case RegExpInstruction::ACCEPT:
-          best_match_ = MatchRange{t.match_begin, input_index_};
-          active_threads_.clear();
-          return;
-      }
-    }
-  }
-
-  // Run each active thread until it can't continue without further input.
-  // `active_threads_` is empty afterwards.  `blocked_threads_` are sorted from
-  // low to high priority.
-  void RunActiveThreads() {
-    while (!active_threads_.empty()) {
-      InterpreterThread t = active_threads_.back();
-      active_threads_.pop_back();
-      RunActiveThread(t);
-    }
-  }
-
-  // Unblock all blocked_threads_ by feeding them an `input_char`.  Should only
-  // be called with `input_index_` pointing to the character *after*
-  // `input_char` so that `pc_last_input_index_` is updated correctly.
-  void FlushBlockedThreads(uc16 input_char) {
-    // The threads in blocked_threads_ are sorted from high to low priority,
-    // but active_threads_ needs to be sorted from low to high priority, so we
-    // need to activate blocked threads in reverse order.
-    //
-    // TODO(mbid,v8:10765): base::SmallVector doesn't support `rbegin()` and
-    // `rend()`, should we implement that instead of this awkward iteration?
-    // Maybe we could at least use an int i and check for i >= 0, but
-    // SmallVectors don't have length() methods.
-    for (size_t i = blocked_threads_.size(); i > 0; --i) {
-      InterpreterThread t = blocked_threads_[i - 1];
-      RegExpInstruction inst = bytecode_[t.pc];
-      DCHECK_EQ(inst.opcode, RegExpInstruction::CONSUME_RANGE);
-      Uc16Range range = inst.payload.consume_range;
-      if (input_char >= range.min && input_char <= range.max) {
-        ++t.pc;
-        PushActiveThreadUnchecked(t);
-      }
-    }
-    blocked_threads_.clear();
-  }
-
-  // It is redundant to have two threads t, t0 execute at the same PC value,
-  // because one of t, t0 matches iff the other does.  We can thus discard
-  // the one with lower priority.  We check whether a thread executed at some
-  // PC value by recording for every possible value of PC what the value of
-  // input_index_ was the last time a thread executed at PC. If a thread
-  // tries to continue execution at a PC value that we have seen before at
-  // the current input index, we abort it. (We execute threads with higher
-  // priority first, so the second thread is guaranteed to have lower
-  // priority.)
-  //
-  // Check whether we've seen an active thread with a given pc value since the
-  // last increment of `input_index_`.
-  bool IsPcProcessed(int pc) {
-    DCHECK_LE(pc_last_input_index_[pc], input_index_);
-    return pc_last_input_index_[pc] == input_index_;
-  }
-
-  // Mark a pc as having been processed since the last increment of
-  // `input_index_`.
-  void MarkPcProcessed(int pc) {
-    DCHECK_LE(pc_last_input_index_[pc], input_index_);
-    pc_last_input_index_[pc] = input_index_;
-  }
-
-  // Functions to push a thread `t` onto the list of active threads, but only
-  // if `t.pc` was not already the pc of some other thread at the current
-  // subject index.
-  void PushActiveThreadUnchecked(InterpreterThread t) {
-    DCHECK(!IsPcProcessed(t.pc));
-
-    MarkPcProcessed(t.pc);
-    active_threads_.emplace_back(t);
-  }
-  void PushActiveThread(InterpreterThread t) {
-    if (IsPcProcessed(t.pc)) {
-      return;
-    }
-    PushActiveThreadUnchecked(t);
-  }
-
-  Vector<const RegExpInstruction> bytecode_;
-  Vector<const Character> input_;
-  int input_index_;
-
-  // TODO(mbid,v8:10765): The following `SmallVector`s have somehwat
-  // arbitrarily chosen small capacity sizes; should benchmark to find a good
-  // value.
-
-  // pc_last_input_index_[k] records the value of input_index_ the last
-  // time a thread t such that t.pc == k was activated, i.e. put on
-  // active_threads_.  Thus pc_last_input_index.size() == bytecode.size().  See
-  // also `RunActiveThread`.
-  base::SmallVector<int, 64> pc_last_input_index_;
-
-  // Active threads can potentially (but not necessarily) continue without
-  // input.  Sorted from low to high priority.
-  base::SmallVector<InterpreterThread, 64> active_threads_;
-
-  // The pc of a blocked thread points to an instruction that consumes a
-  // character. Sorted from high to low priority (so the opposite of
-  // `active_threads_`).
-  base::SmallVector<InterpreterThread, 64> blocked_threads_;
-
-  // The best match found so far during the current search.  If several threads
-  // ACCEPTed, then this will be the match of the accepting thread with highest
-  // priority.
-  base::Optional<MatchRange> best_match_;
-};
-
-}  // namespace
+using MatchRange = ExperimentalRegExpInterpreter::MatchRange;

 // Returns the number of matches.
 int32_t ExperimentalRegExp::ExecRaw(JSRegExp regexp, String subject,
@ -990,13 +125,13 @@ int32_t ExperimentalRegExp::ExecRaw(JSRegExp regexp, String subject,
  const int32_t max_match_num = output_register_count / 2;

  if (subject_content.IsOneByte()) {
-    NfaInterpreter<uint8_t> interpreter(
-        bytecode, subject_content.ToOneByteVector(), subject_index);
-    return interpreter.FindMatches(matches, max_match_num);
+    return ExperimentalRegExpInterpreter::FindMatchesNfaOneByte(
+        bytecode, subject_content.ToOneByteVector(), subject_index, matches,
+        max_match_num);
  } else {
-    NfaInterpreter<uc16> interpreter(bytecode, subject_content.ToUC16Vector(),
-                                     subject_index);
-    return interpreter.FindMatches(matches, max_match_num);
+    return ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte(
+        bytecode, subject_content.ToUC16Vector(), subject_index, matches,
+        max_match_num);
  }
 }