2020-08-31 10:22:55 +00:00
|
|
|
// Copyright 2020 the V8 project authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
|
|
|
|
|
|
|
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine
|
|
|
|
|
|
|
|
function Test(regexp, subject, expectedResult, expectedLastIndex) {
|
[regexp] Fix usage of {Is,Mark}PcProcessed in NfaInterpreter
Previously we checked whether a thread's pc IsPcProcessed before pushing
to the stack of (postponed) active_threads_. This commit moves the
IsPcProcessed check and corresponding MarkPcProcessed call to when the
thread is actually processed, i.e. when it is popped from the
active_threads_ stack again.
This fixes two issues:
- Consider what used to happen in the following scenario:
1. An active thread t is postponed (e.g. because it is a fork) and
pushed on active_threads_. IsPcProcessed(t.pc) is false, so t is
not discarded and does actually end up on active_threads_.
2. Some other thread s is executed, and at some point s.pc == t.pc,
i.e. t.pc is marked as processed.
3. t is popped from active_threads_ for processing.
In 3 we don't want to continue execution of t: After all, its pc is
already marked as processed. But because previously we only checked
for IsPcProcessed in step 1 before pushing to active_threads_, we used
to continue execution in 3. I don't think this is a correctness
issue, but possibly a performance problem. In any case, this commit
moves the IsPcProcessed check from 1 to 3 and so fixes this.
- After flushing blocked_threads_, we push them to active_threads_
again. While doing so, we used to mark these thread's pcs as processed.
This meant that sometimes a (fork of a) high priority thread was
cancelled by the IsPcProcessed check even though its pc was only
marked as processed by a thread with lower priority during flushing.
We need it to be the other way round: The low priority thread should
be cancelled after its pc is processed by a thread with higher
priority.
With this commit we don't MarkPcProcessed during flushing, it's
postponed to when we're actually processing. This was a correctness
issue, and there's a new corresponding test case.
Bug: v8:10765
Change-Id: Ie12682cf3f8a04222d907edd8a3ad25baa69465a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2388112
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69668}
2020-09-02 08:28:34 +00:00
|
|
|
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
|
2020-08-31 10:22:55 +00:00
|
|
|
var result = regexp.exec(subject);
|
2020-09-17 16:24:35 +00:00
|
|
|
if (result instanceof Array && expectedResult instanceof Array) {
|
|
|
|
assertArrayEquals(expectedResult, result);
|
|
|
|
} else {
|
|
|
|
assertEquals(expectedResult, result);
|
|
|
|
}
|
[regexp] Fix usage of {Is,Mark}PcProcessed in NfaInterpreter
Previously we checked whether a thread's pc IsPcProcessed before pushing
to the stack of (postponed) active_threads_. This commit moves the
IsPcProcessed check and corresponding MarkPcProcessed call to when the
thread is actually processed, i.e. when it is popped from the
active_threads_ stack again.
This fixes two issues:
- Consider what used to happen in the following scenario:
1. An active thread t is postponed (e.g. because it is a fork) and
pushed on active_threads_. IsPcProcessed(t.pc) is false, so t is
not discarded and does actually end up on active_threads_.
2. Some other thread s is executed, and at some point s.pc == t.pc,
i.e. t.pc is marked as processed.
3. t is popped from active_threads_ for processing.
In 3 we don't want to continue execution of t: After all, its pc is
already marked as processed. But because previously we only checked
for IsPcProcessed in step 1 before pushing to active_threads_, we used
to continue execution in 3. I don't think this is a correctness
issue, but possibly a performance problem. In any case, this commit
moves the IsPcProcessed check from 1 to 3 and so fixes this.
- After flushing blocked_threads_, we push them to active_threads_
again. While doing so, we used to mark these thread's pcs as processed.
This meant that sometimes a (fork of a) high priority thread was
cancelled by the IsPcProcessed check even though its pc was only
marked as processed by a thread with lower priority during flushing.
We need it to be the other way round: The low priority thread should
be cancelled after its pc is processed by a thread with higher
priority.
With this commit we don't MarkPcProcessed during flushing, it's
postponed to when we're actually processing. This was a correctness
issue, and there's a new corresponding test case.
Bug: v8:10765
Change-Id: Ie12682cf3f8a04222d907edd8a3ad25baa69465a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2388112
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69668}
2020-09-02 08:28:34 +00:00
|
|
|
assertEquals(expectedLastIndex, regexp.lastIndex);
|
2020-08-31 10:22:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The empty regexp.
|
|
|
|
Test(new RegExp(""), "asdf", [""], 0);
|
|
|
|
|
|
|
|
// Plain patterns without special operators.
|
|
|
|
Test(/asdf1/, "123asdf1xyz", ["asdf1"], 0);
|
|
|
|
// Escaped operators, otherwise plain string:
|
|
|
|
Test(/\*\.\(\[\]\?/, "123*.([]?123", ["*.([]?"], 0);
|
|
|
|
// Some two byte values:
|
|
|
|
Test(/쁰d섊/, "123쁰d섊abc", ["쁰d섊"], 0);
|
|
|
|
// A pattern with surrogates but without unicode flag:
|
|
|
|
Test(/💩f/, "123💩f", ["💩f"], 0);
|
|
|
|
|
|
|
|
// Disjunctions.
|
|
|
|
Test(/asdf|123/, "xyz123asdf", ["123"], 0);
|
|
|
|
Test(/asdf|123|fj|f|a/, "da123", ["a"], 0);
|
|
|
|
Test(/|123/, "123", [""], 0);
|
|
|
|
|
|
|
|
// Character ranges.
|
|
|
|
Test(/[abc]/, "123asdf", ["a"], 0);
|
|
|
|
Test(/[0-9]/, "asdf123xyz", ["1"], 0);
|
|
|
|
Test(/[^0-9]/, "123!xyz", ["!"], 0);
|
|
|
|
Test(/\w\d/, "?a??a3!!!", ["a3"], 0);
|
|
|
|
// [💩] without unicode flag is a character range matching one of the two
|
|
|
|
// surrogate characters that make up 💩. The leading surrogate is 0xD83D.
|
|
|
|
Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
|
|
|
|
|
2020-09-08 16:00:37 +00:00
|
|
|
// Greedy and non-greedy quantifiers.
|
2020-08-31 10:22:55 +00:00
|
|
|
Test(/x*/, "asdfxk", [""], 0);
|
[regexp] Fix usage of {Is,Mark}PcProcessed in NfaInterpreter
Previously we checked whether a thread's pc IsPcProcessed before pushing
to the stack of (postponed) active_threads_. This commit moves the
IsPcProcessed check and corresponding MarkPcProcessed call to when the
thread is actually processed, i.e. when it is popped from the
active_threads_ stack again.
This fixes two issues:
- Consider what used to happen in the following scenario:
1. An active thread t is postponed (e.g. because it is a fork) and
pushed on active_threads_. IsPcProcessed(t.pc) is false, so t is
not discarded and does actually end up on active_threads_.
2. Some other thread s is executed, and at some point s.pc == t.pc,
i.e. t.pc is marked as processed.
3. t is popped from active_threads_ for processing.
In 3 we don't want to continue execution of t: After all, its pc is
already marked as processed. But because previously we only checked
for IsPcProcessed in step 1 before pushing to active_threads_, we used
to continue execution in 3. I don't think this is a correctness
issue, but possibly a performance problem. In any case, this commit
moves the IsPcProcessed check from 1 to 3 and so fixes this.
- After flushing blocked_threads_, we push them to active_threads_
again. While doing so, we used to mark these thread's pcs as processed.
This meant that sometimes a (fork of a) high priority thread was
cancelled by the IsPcProcessed check even though its pc was only
marked as processed by a thread with lower priority during flushing.
We need it to be the other way round: The low priority thread should
be cancelled after its pc is processed by a thread with higher
priority.
With this commit we don't MarkPcProcessed during flushing, it's
postponed to when we're actually processing. This was a correctness
issue, and there's a new corresponding test case.
Bug: v8:10765
Change-Id: Ie12682cf3f8a04222d907edd8a3ad25baa69465a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2388112
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69668}
2020-09-02 08:28:34 +00:00
|
|
|
Test(/xx*a/, "xxa", ["xxa"], 0);
|
2020-09-08 16:00:37 +00:00
|
|
|
Test(/x*[xa]/, "xxaa", ["xxa"], 0);
|
|
|
|
Test(/x*?[xa]/, "xxaa", ["x"], 0);
|
|
|
|
Test(/x*?a/, "xxaa", ["xxa"], 0);
|
|
|
|
Test(/x+a/, "axxa", ["xxa"], 0);
|
|
|
|
Test(/x+?[ax]/, "axxa", ["xx"], 0);
|
|
|
|
Test(/xx?[xa]/, "xxaa", ["xxa"], 0);
|
|
|
|
Test(/xx??[xa]/, "xxaa", ["xx"], 0);
|
|
|
|
Test(/xx??a/, "xxaa", ["xxa"], 0);
|
|
|
|
Test(/x{4}/, "xxxxxxxxx", ["xxxx"], 0);
|
|
|
|
Test(/x{4,}/, "xxxxxxxxx", ["xxxxxxxxx"], 0);
|
|
|
|
Test(/x{4,}?/, "xxxxxxxxx", ["xxxx"], 0);
|
|
|
|
Test(/x{2,4}/, "xxxxxxxxx", ["xxxx"], 0);
|
|
|
|
Test(/x{2,4}?/, "xxxxxxxxx", ["xx"], 0);
|
2020-08-31 10:22:55 +00:00
|
|
|
|
|
|
|
// Non-capturing groups and nested operators.
|
|
|
|
Test(/(?:)/, "asdf", [""], 0);
|
|
|
|
Test(/(?:asdf)/, "123asdfxyz", ["asdf"], 0);
|
|
|
|
Test(/(?:asdf)|123/, "xyz123asdf", ["123"], 0);
|
|
|
|
Test(/asdf(?:[0-9]|(?:xy|x)*)*/, "kkkasdf5xyx8xyyky", ["asdf5xyx8xy"], 0);
|
|
|
|
|
2020-09-15 19:03:51 +00:00
|
|
|
// Capturing groups.
|
|
|
|
Test(/()/, "asdf", ["", ""], 0);
|
|
|
|
Test(/(123)/, "asdf123xyz", ["123", "123"], 0);
|
|
|
|
Test(/asdf(123)xyz/, "asdf123xyz", ["asdf123xyz", "123"], 0);
|
|
|
|
Test(/(123|xyz)/, "123", ["123", "123"], 0);
|
|
|
|
Test(/(123|xyz)/, "xyz", ["xyz", "xyz"], 0);
|
|
|
|
Test(/(123)|(xyz)/, "123", ["123", "123", undefined], 0);
|
|
|
|
Test(/(123)|(xyz)/, "xyz", ["xyz", undefined, "xyz"], 0);
|
|
|
|
Test(/(?:(123)|(xyz))*/, "xyz123", ["xyz123", "123", undefined], 0);
|
|
|
|
Test(/((123)|(xyz)*)*/, "xyz123xyz", ["xyz123xyz", "xyz", undefined, "xyz"], 0);
|
|
|
|
|
2020-09-17 16:24:35 +00:00
|
|
|
// Assertions.
|
|
|
|
Test(/asdf\b/, "asdf---", ["asdf"], 0);
|
|
|
|
Test(/asdf\b/, "asdfg", null, 0);
|
|
|
|
Test(/asd[fg]\B/, "asdf asdgg", ["asdg"], 0);
|
2020-09-22 15:19:04 +00:00
|
|
|
Test(/^asd[fg]/, "asdf asdgg", ["asdf"], 0);
|
2020-09-17 16:24:35 +00:00
|
|
|
Test(/asd[fg]$/, "asdf asdg", ["asdg"], 0);
|
|
|
|
|
2020-08-31 10:22:55 +00:00
|
|
|
// The global flag.
|
|
|
|
Test(/asdf/g, "fjasdfkkasdf", ["asdf"], 6);
|
2020-09-22 15:19:04 +00:00
|
|
|
|
|
|
|
// The sticky flag.
|
|
|
|
var r = /asdf/y;
|
|
|
|
r.lastIndex = 2;
|
|
|
|
Test(r, "fjasdfkkasdf", ["asdf"], 6);
|
|
|
|
|
|
|
|
// The multiline flag.
|
|
|
|
Test(/^a/m, "x\na", ["a"], 0);
|
|
|
|
Test(/x$/m, "x\na", ["x"], 0);
|
|
|
|
|
|
|
|
// The dotall flag.
|
|
|
|
Test(/asdf.xyz/s, "asdf\nxyz", ["asdf\nxyz"], 0);
|