v8/test/mjsunit/regexp-experimental.js
Martin Bidlingmaier f2a832cadd [regexp] Support more quantifiers in experimental engine
Previously to this commit only quantifiers of the form /<x>*/, i.e.
arbitrarily often greedy repetition, were implemented.  Now a much
larger class is supported, e.g. + and ? and their non-greedy variants.
Because it came up repeatedly during the implementation, the commit also
adds the Label and DeferredLabel classes to patch JMP and FORK target
addresses more easily.

Still not supported are the following quantifiers:
- Possessive quantifiers, where I'm not entirely sure whether they could
  be implemented in principle. Re2 doesn't support them.
- Quantifiers with large but finite numbers for min and max numbers of
  repetitions, as in e.g. /<x>{9000, 90000}/. These are currently
  limited to some small value. This is because the body of such
  repetitions is unrolled explicitly, so the size of the bytecode is
  linear in the number of repetitions.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: Id04d893252588abb0f80c3cb33cfc707f6601ea0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2387575
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69759}
2020-09-09 06:17:31 +00:00

65 lines
2.3 KiB
JavaScript

// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine
function Test(regexp, subject, expectedResult, expectedLastIndex) {
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
var result = regexp.exec(subject);
assertArrayEquals(expectedResult, result);
assertEquals(expectedLastIndex, regexp.lastIndex);
}
// The empty regexp.
Test(new RegExp(""), "asdf", [""], 0);
// Plain patterns without special operators.
Test(/asdf1/, "123asdf1xyz", ["asdf1"], 0);
// Escaped operators, otherwise plain string:
Test(/\*\.\(\[\]\?/, "123*.([]?123", ["*.([]?"], 0);
// Some two byte values:
Test(/쁰d섊/, "123쁰d섊abc", ["쁰d섊"], 0);
// A pattern with surrogates but without unicode flag:
Test(/💩f/, "123💩f", ["💩f"], 0);
// Disjunctions.
Test(/asdf|123/, "xyz123asdf", ["123"], 0);
Test(/asdf|123|fj|f|a/, "da123", ["a"], 0);
Test(/|123/, "123", [""], 0);
// Character ranges.
Test(/[abc]/, "123asdf", ["a"], 0);
Test(/[0-9]/, "asdf123xyz", ["1"], 0);
Test(/[^0-9]/, "123!xyz", ["!"], 0);
Test(/\w\d/, "?a??a3!!!", ["a3"], 0);
// [💩] without unicode flag is a character range matching one of the two
// surrogate characters that make up 💩. The leading surrogate is 0xD83D.
Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
// Greedy and non-greedy quantifiers.
Test(/x*/, "asdfxk", [""], 0);
Test(/xx*a/, "xxa", ["xxa"], 0);
Test(/x*[xa]/, "xxaa", ["xxa"], 0);
Test(/x*?[xa]/, "xxaa", ["x"], 0);
Test(/x*?a/, "xxaa", ["xxa"], 0);
Test(/x+a/, "axxa", ["xxa"], 0);
Test(/x+?[ax]/, "axxa", ["xx"], 0);
Test(/xx?[xa]/, "xxaa", ["xxa"], 0);
Test(/xx??[xa]/, "xxaa", ["xx"], 0);
Test(/xx??a/, "xxaa", ["xxa"], 0);
Test(/x{4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{4,}/, "xxxxxxxxx", ["xxxxxxxxx"], 0);
Test(/x{4,}?/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}?/, "xxxxxxxxx", ["xx"], 0);
// Non-capturing groups and nested operators.
Test(/(?:)/, "asdf", [""], 0);
Test(/(?:asdf)/, "123asdfxyz", ["asdf"], 0);
Test(/(?:asdf)|123/, "xyz123asdf", ["123"], 0);
Test(/asdf(?:[0-9]|(?:xy|x)*)*/, "kkkasdf5xyx8xyyky", ["asdf5xyx8xy"], 0);
// The global flag.
Test(/asdf/g, "fjasdfkkasdf", ["asdf"], 6);