[regexp] Check capture_count before using experimental engine

Sometimes the parser throws away redundant parts of the AST while
parsing.  For example, the regexp /(?:(?=(f)o))?f/ is (almost)
equivalent to just /f/ because the optional block (...)? is zero-length.
The parser notices this and returns the same tree as for /f/.  However,
there is a capture inside the (...)? block (which is never recorded
because the quantifier containing it can only match zero-width, which is
considered failure), so in this case it doesn't suffice to check that
the regexp AST doesn't contain captures.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I6145849d95b3522a397eadd2bae63d1d8e880f28
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2397896
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Cr-Commit-Position: refs/heads/master@{#69733}
This commit is contained in:
Martin Bidlingmaier 2020-09-08 13:09:06 +02:00 committed by Commit Bot
parent b5fe40aab1
commit bc4174cc3b
5 changed files with 14 additions and 9 deletions

View File

@ -18,8 +18,9 @@ constexpr uc32 kMaxSupportedCodepoint = 0xFFFFu;
class CanBeHandledVisitor final : private RegExpVisitor {
// Visitor to implement `ExperimentalRegExp::CanBeHandled`.
public:
static bool Check(RegExpTree* node, JSRegExp::Flags flags, Zone* zone) {
if (!AreSuitableFlags(flags)) {
static bool Check(RegExpTree* node, JSRegExp::Flags flags, int capture_count,
Zone* zone) {
if (!AreSuitableFlags(flags) || capture_count > 0) {
return false;
}
CanBeHandledVisitor visitor(zone);
@ -145,9 +146,9 @@ class CanBeHandledVisitor final : private RegExpVisitor {
bool ExperimentalRegExpCompiler::CanBeHandled(RegExpTree* tree,
JSRegExp::Flags flags,
Zone* zone) {
int capture_count, Zone* zone) {
DCHECK(FLAG_enable_experimental_regexp_engine);
return CanBeHandledVisitor::Check(tree, flags, zone);
return CanBeHandledVisitor::Check(tree, flags, capture_count, zone);
}
namespace {

View File

@ -19,7 +19,8 @@ class ExperimentalRegExpCompiler final : public AllStatic {
// but see the definition.
// TODO(mbid,v8:10765): Currently more things are not handled, e.g. some
// quantifiers and unicode.
static bool CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, Zone* zone);
static bool CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
int capture_count, Zone* zone);
// Compile regexp into a bytecode program. The regexp must be handlable by
// the experimental engine; see`CanBeHandled`. The program is returned as a
// ZoneList backed by the same Zone that is used in the RegExpTree argument.

View File

@ -14,8 +14,9 @@ namespace v8 {
namespace internal {
bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
Zone* zone) {
return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, zone);
int capture_count, Zone* zone) {
return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count,
zone);
}
void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,

View File

@ -19,7 +19,8 @@ class ExperimentalRegExp final : public AllStatic {
// TODO(mbid, v8:10765): This walks the RegExpTree, but it could also be
// checked on the fly in the parser. Not done currently because walking the
// AST again is more flexible and less error prone (but less performant).
static bool CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags, Zone* zone);
static bool CanBeHandled(RegExpTree* tree, JSRegExp::Flags flags,
int capture_count, Zone* zone);
static void Initialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
int capture_count);

View File

@ -175,7 +175,8 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
bool has_been_compiled = false;
if (FLAG_enable_experimental_regexp_engine &&
ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, &zone)) {
ExperimentalRegExp::CanBeHandled(parse_result.tree, flags,
parse_result.capture_count, &zone)) {
ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
parse_result.capture_count);
has_been_compiled = true;