dadd5f94f5
When creating a character class in unicode, case-insensitive mode we use icu::UnicodeSet::closeOver() to add all characters that case-insensitive match the characters in the class. According to the spec only simple case folding shall be performed for case-insensitive unicode matching, but closeOver() adds all characters that are equal w.r.t full case folding. The current approach of just removing strings from the closeOver set is not enough, as single code point characters still remain in the set if they were equal only by performing full case folding. E.g. the characters \u0390 and \u1FD3 both fold to the same string "\u03B9\u0308\u0301" via full case folding, but they don't have a simple case folding in common. To prevent these wrong matches, we calculate the set of all characters with close overs that are wrong according to the spec at build time and remove them from the set before adding case-insensitive equivalent characters. Bug: v8:13377 Change-Id: I0252c79143f266911691331dd0e1e27044ea8cba Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3952095 Commit-Queue: Patrick Thier <pthier@chromium.org> Reviewed-by: Camillo Bruni <cbruni@chromium.org> Cr-Commit-Position: refs/heads/main@{#83791}
72 lines
2.5 KiB
JavaScript
72 lines
2.5 KiB
JavaScript
// Copyright 2016 the V8 project authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
// Non-unicode use toUpperCase mappings.
|
|
assertFalse(/[\u00e5]/i.test("\u212b"));
|
|
assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
|
|
assertFalse(/[\u212b]/i.test("\u00e5"));
|
|
|
|
assertTrue("\u212b".toLowerCase() == "\u00e5");
|
|
assertTrue("\u00c5".toLowerCase() == "\u00e5");
|
|
assertTrue("\u00e5".toUpperCase() == "\u00c5");
|
|
|
|
// Unicode uses case folding mappings.
|
|
assertTrue(/\u00e5/ui.test("\u212b"));
|
|
assertTrue(/\u00e5/ui.test("\u00c5"));
|
|
assertTrue(/\u00e5/ui.test("\u00e5"));
|
|
assertTrue(/\u00e5/ui.test("\u212b"));
|
|
assertTrue(/\u00c5/ui.test("\u00e5"));
|
|
assertTrue(/\u00c5/ui.test("\u212b"));
|
|
assertTrue(/\u00c5/ui.test("\u00c5"));
|
|
assertTrue(/\u212b/ui.test("\u00c5"));
|
|
assertTrue(/\u212b/ui.test("\u00e5"));
|
|
assertTrue(/\u212b/ui.test("\u212b"));
|
|
|
|
// Non-BMP.
|
|
assertFalse(/\u{10400}/i.test("\u{10428}"));
|
|
assertTrue(/\u{10400}/ui.test("\u{10428}"));
|
|
assertTrue(/\ud801\udc00/ui.test("\u{10428}"));
|
|
assertTrue(/[\u{10428}]/ui.test("\u{10400}"));
|
|
assertTrue(/[\ud801\udc28]/ui.test("\u{10400}"));
|
|
assertEquals(["\uff21\u{10400}"],
|
|
/[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
|
|
assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
|
|
assertEquals(["\uff53\u24bb"],
|
|
/[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
|
|
|
|
// Full mappings are ignored.
|
|
assertFalse(/\u00df/ui.test("SS"));
|
|
assertFalse(/\u1f8d/ui.test("\u1f05\u03b9"));
|
|
|
|
// Simple mappings work.
|
|
assertTrue(/\u1f8d/ui.test("\u1f85"));
|
|
|
|
// Common mappings work.
|
|
assertTrue(/\u1f6b/ui.test("\u1f63"));
|
|
|
|
// Back references.
|
|
assertEquals(["\u00e5\u212b\u00c5", "\u00e5"],
|
|
/(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
|
|
assertEquals(["\u{118aa}\u{118ca}", "\u{118aa}"],
|
|
/(.)\1/ui.exec("\u{118aa}\u{118ca}"));
|
|
|
|
// Misc.
|
|
assertTrue(/\u00e5\u00e5\u00e5/ui.test("\u212b\u00e5\u00c5"));
|
|
assertTrue(/AB\u{10400}/ui.test("ab\u{10428}"));
|
|
|
|
// Non-Latin1 maps to Latin1.
|
|
assertEquals(["s"], /^\u017F/ui.exec("s"));
|
|
assertEquals(["s"], /^\u017F/ui.exec("s\u1234"));
|
|
assertEquals(["as"], /^a[\u017F]/ui.exec("as"));
|
|
assertEquals(["as"], /^a[\u017F]/ui.exec("as\u1234"));
|
|
|
|
// Non-simple mappings created by UnicodeSet::closeOver() requiring special
|
|
// treatment.
|
|
assertFalse(/[\u0390]/ui.test("\u1fd3"));
|
|
assertFalse(/[\u1fd3]/ui.test("\u0390"));
|
|
assertFalse(/[\u03b0]/ui.test("\u1fe3"));
|
|
assertFalse(/[\u1fe3]/ui.test("\u03b0"));
|
|
assertFalse(/[\ufb05]/ui.test("\ufb06"));
|
|
assertFalse(/[\ufb06]/ui.test("\ufb05"));
|