[regexp] Handle marker value 0x10ffff in MakeRangeArray
Unfortunately, CharacterRanges may use 0x10ffff as a marker value signifying 'highest possible code unit' irrespective of whether the regexp instance has the unicode flag or not. This value makes it through RegExpCharacterClass::ToNode unmodified (since no surrogate desugaring takes place without /u). Correctly mask out the 0xffff value for purposes of building our uint16_t range array. Note: It'd be better to never introduce 0x10ffff in the first place, but given the irregexp pipeline's lack of hackability I hesitate to change this - we are sure to rely on it implicitly in other spots. Drive-by: Refactors. Fixed: chromium:1264508 Bug: v8:11069 Change-Id: Ib3c5780e91f682f1a6d15f26eb4cf03636d93c25 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3256549 Commit-Queue: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Mathias Bynens <mathias@chromium.org> Cr-Commit-Position: refs/heads/main@{#77673}
This commit is contained in:
parent
d0df1ebce8
commit
bfa681ffb9
@ -112,9 +112,6 @@ class CharacterRange {
|
||||
return list;
|
||||
}
|
||||
|
||||
V8_EXPORT_PRIVATE static void AddClassEscape(
|
||||
StandardCharacterSet standard_character_set,
|
||||
ZoneList<CharacterRange>* ranges, Zone* zone);
|
||||
// Add class escapes. Add case equivalent closure for \w and \W if necessary.
|
||||
V8_EXPORT_PRIVATE static void AddClassEscape(
|
||||
StandardCharacterSet standard_character_set,
|
||||
|
@ -50,17 +50,17 @@ namespace {
|
||||
bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
|
||||
const int* special_class, int length) {
|
||||
length--; // Remove final marker.
|
||||
|
||||
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
||||
DCHECK_NE(0, ranges->length());
|
||||
DCHECK_NE(0, length);
|
||||
DCHECK_NE(0, special_class[0]);
|
||||
if (ranges->length() != (length >> 1) + 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ranges->length() != (length >> 1) + 1) return false;
|
||||
|
||||
CharacterRange range = ranges->at(0);
|
||||
if (range.from() != 0) {
|
||||
return false;
|
||||
}
|
||||
if (range.from() != 0) return false;
|
||||
|
||||
for (int i = 0; i < length; i += 2) {
|
||||
if (static_cast<base::uc32>(special_class[i]) != (range.to() + 1)) {
|
||||
return false;
|
||||
@ -70,19 +70,17 @@ bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (range.to() != kMaxCodePoint) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
return range.to() == kMaxCodePoint;
|
||||
}
|
||||
|
||||
bool CompareRanges(ZoneList<CharacterRange>* ranges, const int* special_class,
|
||||
int length) {
|
||||
length--; // Remove final marker.
|
||||
|
||||
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
||||
if (ranges->length() * 2 != length) {
|
||||
return false;
|
||||
}
|
||||
if (ranges->length() * 2 != length) return false;
|
||||
|
||||
for (int i = 0; i < length; i += 2) {
|
||||
CharacterRange range = ranges->at(i >> 1);
|
||||
if (range.from() != static_cast<base::uc32>(special_class[i]) ||
|
||||
@ -1157,12 +1155,7 @@ void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
|
||||
ranges->AddAll(*new_ranges, zone);
|
||||
return;
|
||||
}
|
||||
AddClassEscape(standard_character_set, ranges, zone);
|
||||
}
|
||||
|
||||
void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone) {
|
||||
switch (standard_character_set) {
|
||||
case StandardCharacterSet::kWhitespace:
|
||||
AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
|
||||
|
@ -115,9 +115,16 @@ uint32_t Hash(const ZoneList<CharacterRange>* ranges) {
|
||||
return static_cast<uint32_t>(seed);
|
||||
}
|
||||
|
||||
constexpr base::uc32 MaskEndOfRangeMarker(base::uc32 c) {
|
||||
// CharacterRanges may use 0x10ffff as the end-of-range marker irrespective
|
||||
// of whether the regexp IsUnicode or not; translate the marker value here.
|
||||
DCHECK_IMPLIES(c > kMaxUInt16, c == String::kMaxCodePoint);
|
||||
return c & 0xffff;
|
||||
}
|
||||
|
||||
int RangeArrayLengthFor(const ZoneList<CharacterRange>* ranges) {
|
||||
const int ranges_length = ranges->length();
|
||||
return ranges->at(ranges_length - 1).to() == kMaxUInt16
|
||||
return MaskEndOfRangeMarker(ranges->at(ranges_length - 1).to()) == kMaxUInt16
|
||||
? ranges_length * 2 - 1
|
||||
: ranges_length * 2;
|
||||
}
|
||||
@ -146,11 +153,13 @@ Handle<ByteArray> MakeRangeArray(Isolate* isolate,
|
||||
const CharacterRange& r = ranges->at(i);
|
||||
DCHECK_LE(r.from(), kMaxUInt16);
|
||||
range_array->set_uint16(i * 2 + 0, r.from());
|
||||
if (i == ranges_length - 1 && r.to() == kMaxUInt16) {
|
||||
const base::uc32 to = MaskEndOfRangeMarker(r.to());
|
||||
if (i == ranges_length - 1 && to == kMaxUInt16) {
|
||||
DCHECK_EQ(byte_array_length, ranges_length * 2 - 1);
|
||||
break; // Avoid overflow by leaving the last range open-ended.
|
||||
}
|
||||
DCHECK_LT(r.to(), kMaxUInt16);
|
||||
range_array->set_uint16(i * 2 + 1, r.to() + 1); // Exclusive.
|
||||
DCHECK_LT(to, kMaxUInt16);
|
||||
range_array->set_uint16(i * 2 + 1, to + 1); // Exclusive.
|
||||
}
|
||||
return range_array;
|
||||
}
|
||||
|
@ -510,7 +510,7 @@ static void TestCharacterClassEscapes(StandardCharacterSet c,
|
||||
Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
zone.New<ZoneList<CharacterRange>>(2, &zone);
|
||||
CharacterRange::AddClassEscape(c, ranges, &zone);
|
||||
CharacterRange::AddClassEscape(c, ranges, false, &zone);
|
||||
for (base::uc32 i = 0; i < (1 << 16); i++) {
|
||||
bool in_class = false;
|
||||
for (int j = 0; !in_class && j < ranges->length(); j++) {
|
||||
|
7
test/mjsunit/regress/regress-1264508.js
Normal file
7
test/mjsunit/regress/regress-1264508.js
Normal file
@ -0,0 +1,7 @@
|
||||
// Copyright 2021 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
//
|
||||
// Flags: --no-regexp-tier-up
|
||||
|
||||
assertNotNull(/[nyreekp\W]/isy.exec("\u2603"));
|
Loading…
Reference in New Issue
Block a user