v8/test/mjsunit/es9/regexp-lookbehind.js
Seth Brenith bea0ffd0dd Reland "[regexp] Better quick checks on loop entry nodes"
This is a reland of 4b15b984ad

Updates since original: fix an arithmetic overflow bug, remove an invalid
DCHECK, add a unit test that would trigger that DCHECK.

Original change's description:
> [regexp] Better quick checks on loop entry nodes
>
> Like the predecessor change https://crrev.com/c/v8/v8/+/1702125 , this
> change is inspired by attempting to exit earlier from generated RegExp
> code, when no further matches are possible because any match would be
> too long. The motivating example this time is the following expression,
> which tests whether a string of Unicode playing cards has five of the
> same suit in a row:
>
> /([🂡-🂮]{5})|([🂱-🂾]{5})|([🃁-🃎]{5})|([🃑-🃞]{5})/u
>
> A human reading this expression can readily see that any match requires
> at least 10 characters (5 surrogate pairs), but the LoopChoiceNode for
> each repeated option reports its minimum distance to the end of a match
> as zero. This is correct, because the LoopChoiceNode's behavior depends
> on additional state (the loop counter). However, the preceding node, a
> SET_REGISTER action that initializes the loop counter, could confidently
> state that it consumes at least 10 characters. Furthermore, when we try
> to emit a quick check for that action, we could follow only paths from
> the LoopChoiceNode that are possible based on the minimum iteration
> count. This change implements both of those "could"s.
>
> I expect this improvement to apply pretty broadly to expressions that
> use minimum repetition counts and that don't meet the criteria for
> unrolling. In this particular case, I get about 12% improvement on the
> overall UniPoker test, due to reducing the execution time of this
> expression by 85% and the execution time of another similar expression
> that checks for n-of-a-kind by 20%.
>
> Bug: v8:9305
>
> Change-Id: I319e381743967bdf83324be75bae943fbb5dd496
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1704941
> Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#62963}

Bug: v8:9305
Change-Id: I992070d383009013881bf778242254c27134b650
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1726674
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Seth Brenith <seth.brenith@microsoft.com>
Cr-Commit-Position: refs/heads/master@{#63009}
2019-07-31 14:34:20 +00:00

173 lines
8.2 KiB
JavaScript

// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Simple fixed-length matches.
assertEquals(["a"], "a".match(/^.(?<=a)/));
assertNull("b".match(/^.(?<=a)/));
assertEquals(["foo"], "foo1".match(/^f..(?<=.oo)/));
assertEquals(["foo"], "foo2".match(/^f\w\w(?<=\woo)/));
assertNull("boo".match(/^f\w\w(?<=\woo)/));
assertNull("fao".match(/^f\w\w(?<=\woo)/));
assertNull("foa".match(/^f\w\w(?<=\woo)/));
assertEquals(["def"], "abcdef".match(/(?<=abc)\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a.c)\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a\wc)\w\w\w/));
assertEquals(["cde"], "abcdef".match(/(?<=a[a-z])\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a[a-z][a-z])\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a[a-z]{2})\w\w\w/));
assertEquals(["bcd"], "abcdef".match(/(?<=a{1})\w\w\w/));
assertEquals(["cde"], "abcdef".match(/(?<=a{1}b{1})\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a{1}[a-z]{2})\w\w\w/));
// Variable-length matches.
assertEquals(["def"], "abcdef".match(/(?<=[a|b|c]*)[^a|b|c]{3}/));
assertEquals(["def"], "abcdef".match(/(?<=\w*)[^a|b|c]{3}/));
// Start of line matches.
assertEquals(["def"], "abcdef".match(/(?<=^abc)def/));
assertEquals(["def"], "abcdef".match(/(?<=^[a-c]{3})def/));
assertEquals(["def"], "abcabcdef".match(/(?<=^[a-c]{6})def/));
assertEquals(["def"], "xyz\nabcdef".match(/(?<=^[a-c]{3})def/m));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/(?<=^)\w+/gm));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/\w+(?<=$)/gm));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/(?<=^)\w+(?<=$)/gm));
assertNull("abcdef".match(/(?<=^[^a-c]{3})def/));
assertNull("foooo".match(/"^foooo(?<=^o+)$/));
assertNull("foooo".match(/"^foooo(?<=^o*)$/));
assertEquals(["foo"], "foo".match(/^foo(?<=^fo+)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=^fo*)/));
assertEquals(["foo", "f"], "foo".match(/^(f)oo(?<=^\1o+)$/));
assertEquals(["foo", "f"], "foo".match(/^(f)oo(?<=^\1o+)$/i));
assertEquals(["foo\u1234", "f"], "foo\u1234".match(/^(f)oo(?<=^\1o+).$/i));
assertEquals(["def"], "abcdefdef".match(/(?<=^\w+)def/));
assertEquals(["def", "def"], "abcdefdef".match(/(?<=^\w+)def/g));
// Word boundary matches.
assertEquals(["def"], "abc def".match(/(?<=\b)[d-f]{3}/));
assertEquals(["def"], "ab cdef".match(/(?<=\B)\w{3}/));
assertEquals(["def"], "ab cdef".match(/(?<=\B)(?<=c(?<=\w))\w{3}/));
assertNull("abcdef".match(/(?<=\b)[d-f]{3}/));
// Negative lookbehind.
assertEquals(["abc"], "abcdef".match(/(?<!abc)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a.c)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a\wc)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a[a-z])\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a[a-z]{2})\w\w\w/));
assertNull("abcdef".match(/(?<!abc)def/));
assertNull("abcdef".match(/(?<!a.c)def/));
assertNull("abcdef".match(/(?<!a\wc)def/));
assertNull("abcdef".match(/(?<!a[a-z][a-z])def/));
assertNull("abcdef".match(/(?<!a[a-z]{2})def/));
assertNull("abcdef".match(/(?<!a{1}b{1})cde/));
assertNull("abcdef".match(/(?<!a{1}[a-z]{2})def/));
// Capturing matches.
assertEquals(["def", "c"], "abcdef".match(/(?<=(c))def/));
assertEquals(["def", "bc"], "abcdef".match(/(?<=(\w{2}))def/));
assertEquals(["def", "bc", "c"], "abcdef".match(/(?<=(\w(\w)))def/));
assertEquals(["def", "a"], "abcdef".match(/(?<=(\w){3})def/));
assertEquals(["d", "bc", undefined], "abcdef".match(/(?<=(bc)|(cd))./));
assertEquals(["c", "a", undefined],
"abcdef".match(/(?<=([ab]{1,2})\D|(abc))\w/));
assertEquals(["ab", "a", "b"], "abcdef".match(/\D(?<=([ab]+))(\w)/));
assertEquals(["c", "d"], "abcdef".match(/(?<=b|c)\w/g));
assertEquals(["cd", "ef"], "abcdef".match(/(?<=[b-e])\w{2}/g));
// Captures inside negative lookbehind. (They never capture.)
assertEquals(["de", undefined], "abcdef".match(/(?<!(^|[ab]))\w{2}/));
// Nested lookaround.
assertEquals(["ef"], "abcdef".match(/(?<=ab(?=c)\wd)\w\w/));
assertEquals(["ef", "bc"], "abcdef".match(/(?<=a(?=([^a]{2})d)\w{3})\w\w/));
assertEquals(["ef", "bc"],
"abcdef".match(/(?<=a(?=([bc]{2}(?<!a{2}))d)\w{3})\w\w/));
assertNull("abcdef".match(/(?<=a(?=([bc]{2}(?<!a*))d)\w{3})\w\w/));
assertEquals(["faaa"], "faaao".match(/^faaao?(?<=^f[oa]+(?=o))/));
// Back references.
assertEquals(["b", "b", "bb"], "abb".match(/(.)(?<=(\1\1))/));
assertEquals(["B", "B", "bB"], "abB".match(/(.)(?<=(\1\1))/i));
assertEquals(["aB", "aB", "a"], "aabAaBa".match(/((\w)\w)(?<=\1\2\1)/i));
assertEquals(["Ba", "Ba", "a"], "aabAaBa".match(/(\w(\w))(?<=\1\2\1)/i));
assertEquals(["b", "b", "B"], "abaBbAa".match(/(?=(\w))(?<=(\1))./i));
assertEquals(["foo", "'", "foo"], " 'foo' ".match(/(?<=(.))(\w+)(?=\1)/));
assertEquals(["foo", "\"", "foo"], " \"foo\" ".match(/(?<=(.))(\w+)(?=\1)/));
assertNull(" .foo\" ".match(/(?<=(.))(\w+)(?=\1)/));
assertNull("ab".match(/(.)(?<=\1\1\1)/));
assertNull("abb".match(/(.)(?<=\1\1\1)/));
assertEquals(["b", "b"], "abbb".match(/(.)(?<=\1\1\1)/));
assertNull("ab".match(/(..)(?<=\1\1\1)/));
assertNull("abb".match(/(..)(?<=\1\1\1)/));
assertNull("aabb".match(/(..)(?<=\1\1\1)/));
assertNull("abab".match(/(..)(?<=\1\1\1)/));
assertNull("fabxbab".match(/(..)(?<=\1\1\1)/));
assertNull("faxabab".match(/(..)(?<=\1\1\1)/));
assertEquals(["ab", "ab"], "fababab".match(/(..)(?<=\1\1\1)/));
// Back references to captures inside the lookbehind.
assertEquals(["d", "C"], "abcCd".match(/(?<=\1(\w))d/i));
assertEquals(["d", "x"], "abxxd".match(/(?<=\1([abx]))d/));
assertEquals(["c", "ab"], "ababc".match(/(?<=\1(\w+))c/));
assertEquals(["c", "b"], "ababbc".match(/(?<=\1(\w+))c/));
assertNull("ababdc".match(/(?<=\1(\w+))c/));
assertEquals(["c", "abab"], "ababc".match(/(?<=(\w+)\1)c/));
// Alternations are tried left to right,
// and we do not backtrack into a lookbehind.
assertEquals(["xabcd", "cd", ""], "xabcd".match(/.*(?<=(..|...|....))(.*)/));
assertEquals(["xabcd", "bcd", ""], "xabcd".match(/.*(?<=(xx|...|....))(.*)/));
assertEquals(["xxabcd", "bcd", ""], "xxabcd".match(/.*(?<=(xx|...))(.*)/));
assertEquals(["xxabcd", "xx", "abcd"], "xxabcd".match(/.*(?<=(xx|xxx))(.*)/));
// We do not backtrack into a lookbehind.
// The lookbehind captures "abc" so that \1 does not match. We do not backtrack
// to capture only "bc" in the lookbehind.
assertNull("abcdbc".match(/(?<=([abc]+)).\1/));
// Greedy loop.
assertEquals(["c", "bbbbbb"], "abbbbbbc".match(/(?<=(b+))c/));
assertEquals(["c", "b1234"], "ab1234c".match(/(?<=(b\d+))c/));
assertEquals(["c", "b12b23b34"], "ab12b23b34c".match(/(?<=((?:b\d{2})+))c/));
// Sticky
var re1 = /(?<=^(\w+))def/g;
assertEquals(["def", "abc"], re1.exec("abcdefdef"));
assertEquals(["def", "abcdef"], re1.exec("abcdefdef"));
var re2 = /\Bdef/g;
assertEquals(["def"], re2.exec("abcdefdef"));
assertEquals(["def"], re2.exec("abcdefdef"));
// Misc
assertNull("abcdef".match(/(?<=$abc)def/));
assertEquals(["foo"], "foo".match(/^foo(?<=foo)$/));
assertEquals(["foo"], "foo".match(/^f.o(?<=foo)$/));
assertNull("fno".match(/^f.o(?<=foo)$/));
assertNull("foo".match(/^foo(?<!foo)$/));
assertNull("foo".match(/^f.o(?<!foo)$/));
assertEquals(["fno"], "fno".match(/^f.o(?<!foo)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=fo+)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=fo*)$/));
assertEquals(["abc", "abc"], /(abc\1)/.exec("abc"));
assertEquals(["abc", "abc"], /(abc\1)/.exec("abc\u1234"));
assertEquals(["abc", "abc"], /(abc\1)/i.exec("abc"));
assertEquals(["abc", "abc"], /(abc\1)/i.exec("abc\u1234"));
var oob_subject = "abcdefghijklmnabcdefghijklmn".substr(14);
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/i));
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/));
assertNull("abcdefgabcdefg".substr(1).match(/(?=(abcdefg))(?<=\1)/));
// Mutual recursive capture/back references
assertEquals(["cacb", "a", ""], /(?<=a(.\2)b(\1)).{4}/.exec("aabcacbc"));
assertEquals(["b", "ac", "ac"], /(?<=a(\2)b(..\1))b/.exec("aacbacb"));
assertEquals(["x", "aa"], /(?<=(?:\1b)(aa))./.exec("aabaax"));
assertEquals(["x", "aa"], /(?<=(?:\1|b)(aa))./.exec("aaaax"));
// Restricted syntax in Annex B 1.4.
assertThrows("/(?<=.)*/u", SyntaxError);
assertThrows("/(?<=.){1,2}/u", SyntaxError);
assertThrows("/(?<=.)*/", SyntaxError);
assertThrows("/(?<=.)?/", SyntaxError);
assertThrows("/(?<=.)+/", SyntaxError);