[regexp] Canonicalize character range before adding case equivalents.

Adding case equivalents requires a canonicalized character range.
With unicode sets we missed to canonicalize ranges before adding case
equivalents in two locations.

Bug: chromium:1410963
Change-Id: I5907062f8c29b6e9d4a4c8166d3af05079298c50
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4205912
Auto-Submit: Patrick Thier <pthier@chromium.org>
Commit-Queue: Patrick Thier <pthier@chromium.org>
Reviewed-by: Jakob Linke <jgruber@chromium.org>
Commit-Queue: Jakob Linke <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85556}
This commit is contained in:
pthier 2023-01-31 10:51:14 +01:00 committed by V8 LUCI CQ
parent 1cf624ba03
commit 39b0ade26b
2 changed files with 10 additions and 0 deletions

View File

@ -2758,6 +2758,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
// range.
if (!ranges->is_empty() || !strings->empty()) {
if (needs_case_folding) {
CharacterRange::Canonicalize(ranges);
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
}
may_contain_strings |= !strings->empty();
@ -2779,6 +2780,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseClassUnion(
// Add the range we started building as operand.
if (!ranges->is_empty() || !strings->empty()) {
if (needs_case_folding) {
CharacterRange::Canonicalize(ranges);
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone());
}
may_contain_strings |= !strings->empty();

View File

@ -0,0 +1,8 @@
// Copyright 2023 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-unicode-sets
assertTrue(/[a-cB]/vi.test('b'));
assertTrue(/[a-cB]/vi.test('B'));