f2a832cadd
Previously to this commit only quantifiers of the form /<x>*/, i.e. arbitrarily often greedy repetition, were implemented. Now a much larger class is supported, e.g. + and ? and their non-greedy variants. Because it came up repeatedly during the implementation, the commit also adds the Label and DeferredLabel classes to patch JMP and FORK target addresses more easily. Still not supported are the following quantifiers: - Possessive quantifiers, where I'm not entirely sure whether they could be implemented in principle. Re2 doesn't support them. - Quantifiers with large but finite numbers for min and max numbers of repetitions, as in e.g. /<x>{9000, 90000}/. These are currently limited to some small value. This is because the body of such repetitions is unrolled explicitly, so the size of the bytecode is linear in the number of repetitions. Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng Bug: v8:10765 Change-Id: Id04d893252588abb0f80c3cb33cfc707f6601ea0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2387575 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#69759}
65 lines
2.3 KiB
JavaScript
65 lines
2.3 KiB
JavaScript
// Copyright 2020 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine
|
|
|
|
function Test(regexp, subject, expectedResult, expectedLastIndex) {
|
|
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
|
|
var result = regexp.exec(subject);
|
|
assertArrayEquals(expectedResult, result);
|
|
assertEquals(expectedLastIndex, regexp.lastIndex);
|
|
}
|
|
|
|
// The empty regexp.
|
|
Test(new RegExp(""), "asdf", [""], 0);
|
|
|
|
// Plain patterns without special operators.
|
|
Test(/asdf1/, "123asdf1xyz", ["asdf1"], 0);
|
|
// Escaped operators, otherwise plain string:
|
|
Test(/\*\.\(\[\]\?/, "123*.([]?123", ["*.([]?"], 0);
|
|
// Some two byte values:
|
|
Test(/쁰d섊/, "123쁰d섊abc", ["쁰d섊"], 0);
|
|
// A pattern with surrogates but without unicode flag:
|
|
Test(/💩f/, "123💩f", ["💩f"], 0);
|
|
|
|
// Disjunctions.
|
|
Test(/asdf|123/, "xyz123asdf", ["123"], 0);
|
|
Test(/asdf|123|fj|f|a/, "da123", ["a"], 0);
|
|
Test(/|123/, "123", [""], 0);
|
|
|
|
// Character ranges.
|
|
Test(/[abc]/, "123asdf", ["a"], 0);
|
|
Test(/[0-9]/, "asdf123xyz", ["1"], 0);
|
|
Test(/[^0-9]/, "123!xyz", ["!"], 0);
|
|
Test(/\w\d/, "?a??a3!!!", ["a3"], 0);
|
|
// [💩] without unicode flag is a character range matching one of the two
|
|
// surrogate characters that make up 💩. The leading surrogate is 0xD83D.
|
|
Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
|
|
|
|
// Greedy and non-greedy quantifiers.
|
|
Test(/x*/, "asdfxk", [""], 0);
|
|
Test(/xx*a/, "xxa", ["xxa"], 0);
|
|
Test(/x*[xa]/, "xxaa", ["xxa"], 0);
|
|
Test(/x*?[xa]/, "xxaa", ["x"], 0);
|
|
Test(/x*?a/, "xxaa", ["xxa"], 0);
|
|
Test(/x+a/, "axxa", ["xxa"], 0);
|
|
Test(/x+?[ax]/, "axxa", ["xx"], 0);
|
|
Test(/xx?[xa]/, "xxaa", ["xxa"], 0);
|
|
Test(/xx??[xa]/, "xxaa", ["xx"], 0);
|
|
Test(/xx??a/, "xxaa", ["xxa"], 0);
|
|
Test(/x{4}/, "xxxxxxxxx", ["xxxx"], 0);
|
|
Test(/x{4,}/, "xxxxxxxxx", ["xxxxxxxxx"], 0);
|
|
Test(/x{4,}?/, "xxxxxxxxx", ["xxxx"], 0);
|
|
Test(/x{2,4}/, "xxxxxxxxx", ["xxxx"], 0);
|
|
Test(/x{2,4}?/, "xxxxxxxxx", ["xx"], 0);
|
|
|
|
// Non-capturing groups and nested operators.
|
|
Test(/(?:)/, "asdf", [""], 0);
|
|
Test(/(?:asdf)/, "123asdfxyz", ["asdf"], 0);
|
|
Test(/(?:asdf)|123/, "xyz123asdf", ["123"], 0);
|
|
Test(/asdf(?:[0-9]|(?:xy|x)*)*/, "kkkasdf5xyx8xyyky", ["asdf5xyx8xy"], 0);
|
|
|
|
// The global flag.
|
|
Test(/asdf/g, "fjasdfkkasdf", ["asdf"], 6);
|