v8/test/mjsunit/regexp-experimental.js
Martin Bidlingmaier e6e9cbac37 [regexp] Support the msy flags in experimental engine
The m (multiline) and s (dotall) flags just needed to be marked as
allowed; the required logic was already in the regexp parser.

A regexp /<x>/ without the y (sticky) flag is equivalent to the sticky
regexp /.*?<x>/y.  The interpreter now assumes that every regexp is
sticky, and the compiler appends a preamble corresponding to /.*?/
before non-sticky regexps.  To reuse existing code for compiling this
preamble, the logic for each kind of quantifier is now in a separate
function and called from VisitQuantifier and for the preamble.

The commit also includes some improvements/fixes for character ranges:
- Empty character ranges/disjunctions should never match, but before
  this commit they would *always* match.
- The check of the range bounds in CanBeHandledVisitor was unncessary;
  without the unicode flag this can't be a range that can't be specified
  in 2-byte codepoints, and once we support unicode we simply support
  all codepoints.
- The capacity of the list containing the complementary intervals of a
  character range is now calculated more accurately.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I71a0e07279b4e1140c0ed1651b3714200c801de9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2404766
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70082}
2020-09-23 09:43:39 +00:00

99 lines
3.4 KiB
JavaScript

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine
function Test(regexp, subject, expectedResult, expectedLastIndex) {
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
var result = regexp.exec(subject);
if (result instanceof Array && expectedResult instanceof Array) {
assertArrayEquals(expectedResult, result);
} else {
assertEquals(expectedResult, result);
}
assertEquals(expectedLastIndex, regexp.lastIndex);
}
// The empty regexp.
Test(new RegExp(""), "asdf", [""], 0);
// Plain patterns without special operators.
Test(/asdf1/, "123asdf1xyz", ["asdf1"], 0);
// Escaped operators, otherwise plain string:
Test(/\*\.\(\[\]\?/, "123*.([]?123", ["*.([]?"], 0);
// Some two byte values:
Test(/쁰d섊/, "123쁰d섊abc", ["쁰d섊"], 0);
// A pattern with surrogates but without unicode flag:
Test(/💩f/, "123💩f", ["💩f"], 0);
// Disjunctions.
Test(/asdf|123/, "xyz123asdf", ["123"], 0);
Test(/asdf|123|fj|f|a/, "da123", ["a"], 0);
Test(/|123/, "123", [""], 0);
// Character ranges.
Test(/[abc]/, "123asdf", ["a"], 0);
Test(/[0-9]/, "asdf123xyz", ["1"], 0);
Test(/[^0-9]/, "123!xyz", ["!"], 0);
Test(/\w\d/, "?a??a3!!!", ["a3"], 0);
// [💩] without unicode flag is a character range matching one of the two
// surrogate characters that make up 💩. The leading surrogate is 0xD83D.
Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
// Greedy and non-greedy quantifiers.
Test(/x*/, "asdfxk", [""], 0);
Test(/xx*a/, "xxa", ["xxa"], 0);
Test(/x*[xa]/, "xxaa", ["xxa"], 0);
Test(/x*?[xa]/, "xxaa", ["x"], 0);
Test(/x*?a/, "xxaa", ["xxa"], 0);
Test(/x+a/, "axxa", ["xxa"], 0);
Test(/x+?[ax]/, "axxa", ["xx"], 0);
Test(/xx?[xa]/, "xxaa", ["xxa"], 0);
Test(/xx??[xa]/, "xxaa", ["xx"], 0);
Test(/xx??a/, "xxaa", ["xxa"], 0);
Test(/x{4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{4,}/, "xxxxxxxxx", ["xxxxxxxxx"], 0);
Test(/x{4,}?/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}?/, "xxxxxxxxx", ["xx"], 0);
// Non-capturing groups and nested operators.
Test(/(?:)/, "asdf", [""], 0);
Test(/(?:asdf)/, "123asdfxyz", ["asdf"], 0);
Test(/(?:asdf)|123/, "xyz123asdf", ["123"], 0);
Test(/asdf(?:[0-9]|(?:xy|x)*)*/, "kkkasdf5xyx8xyyky", ["asdf5xyx8xy"], 0);
// Capturing groups.
Test(/()/, "asdf", ["", ""], 0);
Test(/(123)/, "asdf123xyz", ["123", "123"], 0);
Test(/asdf(123)xyz/, "asdf123xyz", ["asdf123xyz", "123"], 0);
Test(/(123|xyz)/, "123", ["123", "123"], 0);
Test(/(123|xyz)/, "xyz", ["xyz", "xyz"], 0);
Test(/(123)|(xyz)/, "123", ["123", "123", undefined], 0);
Test(/(123)|(xyz)/, "xyz", ["xyz", undefined, "xyz"], 0);
Test(/(?:(123)|(xyz))*/, "xyz123", ["xyz123", "123", undefined], 0);
Test(/((123)|(xyz)*)*/, "xyz123xyz", ["xyz123xyz", "xyz", undefined, "xyz"], 0);
// Assertions.
Test(/asdf\b/, "asdf---", ["asdf"], 0);
Test(/asdf\b/, "asdfg", null, 0);
Test(/asd[fg]\B/, "asdf asdgg", ["asdg"], 0);
Test(/^asd[fg]/, "asdf asdgg", ["asdf"], 0);
Test(/asd[fg]$/, "asdf asdg", ["asdg"], 0);
// The global flag.
Test(/asdf/g, "fjasdfkkasdf", ["asdf"], 6);
// The sticky flag.
var r = /asdf/y;
r.lastIndex = 2;
Test(r, "fjasdfkkasdf", ["asdf"], 6);
// The multiline flag.
Test(/^a/m, "x\na", ["a"], 0);
Test(/x$/m, "x\na", ["x"], 0);
// The dotall flag.
Test(/asdf.xyz/s, "asdf\nxyz", ["asdf\nxyz"], 0);