[regexp] Handle empty nested classes correctly
With the recent introduction of unicode sets (v-flag), nested character classes are allowed in regular expressions. We always expect a nested class to be of type `RegExpClassSetExpression`, but the empty nested class was not handled correctly. Bug: v8:11935, chromium:1412942 Change-Id: I3b644c8627d8fc6b320a419216372810e8003983 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4224311 Reviewed-by: Jakob Linke <jgruber@chromium.org> Commit-Queue: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#85680}
This commit is contained in:
parent
8c4779241a
commit
ee93bc8035
@ -200,10 +200,12 @@ void* RegExpUnparser::VisitClassSetOperand(RegExpClassSetOperand* that,
|
|||||||
if (i > 0) os_ << " ";
|
if (i > 0) os_ << " ";
|
||||||
VisitCharacterRange(that->ranges()->at(i));
|
VisitCharacterRange(that->ranges()->at(i));
|
||||||
}
|
}
|
||||||
for (auto iter : *that->strings()) {
|
if (that->has_strings()) {
|
||||||
os_ << " '";
|
for (auto iter : *that->strings()) {
|
||||||
os_ << std::string(iter.first.begin(), iter.first.end());
|
os_ << " '";
|
||||||
os_ << "'";
|
os_ << std::string(iter.first.begin(), iter.first.end());
|
||||||
|
os_ << "'";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
os_ << "]";
|
os_ << "]";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -382,16 +384,17 @@ RegExpClassSetOperand::RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
|
|||||||
CharacterClassStrings* strings)
|
CharacterClassStrings* strings)
|
||||||
: ranges_(ranges), strings_(strings) {
|
: ranges_(ranges), strings_(strings) {
|
||||||
DCHECK_NOT_NULL(ranges);
|
DCHECK_NOT_NULL(ranges);
|
||||||
DCHECK_NOT_NULL(strings);
|
|
||||||
min_match_ = 0;
|
min_match_ = 0;
|
||||||
max_match_ = 0;
|
max_match_ = 0;
|
||||||
if (!ranges->is_empty()) {
|
if (!ranges->is_empty()) {
|
||||||
min_match_ = 1;
|
min_match_ = 1;
|
||||||
max_match_ = 2;
|
max_match_ = 2;
|
||||||
}
|
}
|
||||||
for (auto string : *strings) {
|
if (has_strings()) {
|
||||||
min_match_ = std::min(min_match_, string.second->min_match());
|
for (auto string : *strings) {
|
||||||
max_match_ = std::max(max_match_, string.second->max_match());
|
min_match_ = std::min(min_match_, string.second->min_match());
|
||||||
|
max_match_ = std::max(max_match_, string.second->max_match());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -410,5 +413,20 @@ RegExpClassSetExpression::RegExpClassSetExpression(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
RegExpClassSetExpression* RegExpClassSetExpression::Empty(Zone* zone,
|
||||||
|
bool is_negated) {
|
||||||
|
ZoneList<CharacterRange>* ranges =
|
||||||
|
zone->template New<ZoneList<CharacterRange>>(0, zone);
|
||||||
|
RegExpClassSetOperand* op =
|
||||||
|
zone->template New<RegExpClassSetOperand>(ranges, nullptr);
|
||||||
|
ZoneList<RegExpTree*>* operands =
|
||||||
|
zone->template New<ZoneList<RegExpTree*>>(1, zone);
|
||||||
|
operands->Add(op, zone);
|
||||||
|
return zone->template New<RegExpClassSetExpression>(
|
||||||
|
RegExpClassSetExpression::OperationType::kUnion, is_negated, false,
|
||||||
|
operands);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace v8
|
} // namespace v8
|
||||||
|
@ -413,9 +413,12 @@ class RegExpClassSetOperand final : public RegExpTree {
|
|||||||
void Subtract(RegExpClassSetOperand* other,
|
void Subtract(RegExpClassSetOperand* other,
|
||||||
ZoneList<CharacterRange>* temp_ranges, Zone* zone);
|
ZoneList<CharacterRange>* temp_ranges, Zone* zone);
|
||||||
|
|
||||||
bool has_strings() const { return !strings_->empty(); }
|
bool has_strings() const { return strings_ != nullptr && !strings_->empty(); }
|
||||||
ZoneList<CharacterRange>* ranges() { return ranges_; }
|
ZoneList<CharacterRange>* ranges() { return ranges_; }
|
||||||
CharacterClassStrings* strings() { return strings_; }
|
CharacterClassStrings* strings() {
|
||||||
|
DCHECK_NOT_NULL(strings_);
|
||||||
|
return strings_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ZoneList<CharacterRange>* ranges_;
|
ZoneList<CharacterRange>* ranges_;
|
||||||
@ -434,6 +437,10 @@ class RegExpClassSetExpression final : public RegExpTree {
|
|||||||
|
|
||||||
DECL_BOILERPLATE(ClassSetExpression);
|
DECL_BOILERPLATE(ClassSetExpression);
|
||||||
|
|
||||||
|
// Create an empty class set expression (matches everything if |is_negated|,
|
||||||
|
// nothing otherwise).
|
||||||
|
static RegExpClassSetExpression* Empty(Zone* zone, bool is_negated);
|
||||||
|
|
||||||
bool IsTextElement() override { return true; }
|
bool IsTextElement() override { return true; }
|
||||||
int min_match() override { return 0; }
|
int min_match() override { return 0; }
|
||||||
int max_match() override { return max_match_; }
|
int max_match() override { return max_match_; }
|
||||||
|
@ -593,7 +593,12 @@ RegExpNode* RegExpClassSetExpression::ToNode(RegExpCompiler* compiler,
|
|||||||
|
|
||||||
void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) {
|
void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) {
|
||||||
ranges()->AddAll(*other->ranges(), zone);
|
ranges()->AddAll(*other->ranges(), zone);
|
||||||
strings()->insert(other->strings()->begin(), other->strings()->end());
|
if (other->has_strings()) {
|
||||||
|
if (strings_ == nullptr) {
|
||||||
|
strings_ = zone->template New<CharacterClassStrings>(zone);
|
||||||
|
}
|
||||||
|
strings()->insert(other->strings()->begin(), other->strings()->end());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
|
void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
|
||||||
@ -602,11 +607,17 @@ void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
|
|||||||
CharacterRange::Intersect(ranges(), other->ranges(), temp_ranges, zone);
|
CharacterRange::Intersect(ranges(), other->ranges(), temp_ranges, zone);
|
||||||
std::swap(*ranges(), *temp_ranges);
|
std::swap(*ranges(), *temp_ranges);
|
||||||
temp_ranges->Rewind(0);
|
temp_ranges->Rewind(0);
|
||||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
if (has_strings()) {
|
||||||
if (other->strings()->find(iter->first) == other->strings()->end()) {
|
if (!other->has_strings()) {
|
||||||
iter = strings()->erase(iter);
|
strings()->clear();
|
||||||
} else {
|
} else {
|
||||||
iter++;
|
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||||
|
if (other->strings()->find(iter->first) == other->strings()->end()) {
|
||||||
|
iter = strings()->erase(iter);
|
||||||
|
} else {
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -617,11 +628,13 @@ void RegExpClassSetOperand::Subtract(RegExpClassSetOperand* other,
|
|||||||
CharacterRange::Subtract(ranges(), other->ranges(), temp_ranges, zone);
|
CharacterRange::Subtract(ranges(), other->ranges(), temp_ranges, zone);
|
||||||
std::swap(*ranges(), *temp_ranges);
|
std::swap(*ranges(), *temp_ranges);
|
||||||
temp_ranges->Rewind(0);
|
temp_ranges->Rewind(0);
|
||||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
if (has_strings() && other->has_strings()) {
|
||||||
if (other->strings()->find(iter->first) != other->strings()->end()) {
|
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||||
iter = strings()->erase(iter);
|
if (other->strings()->find(iter->first) != other->strings()->end()) {
|
||||||
} else {
|
iter = strings()->erase(iter);
|
||||||
iter++;
|
} else {
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2913,10 +2913,14 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
|
|||||||
zone()->template New<ZoneList<CharacterRange>>(2, zone());
|
zone()->template New<ZoneList<CharacterRange>>(2, zone());
|
||||||
if (current() == ']') {
|
if (current() == ']') {
|
||||||
Advance();
|
Advance();
|
||||||
RegExpClassRanges::ClassRangesFlags class_ranges_flags;
|
if (unicode_sets()) {
|
||||||
if (is_negated) class_ranges_flags = RegExpClassRanges::NEGATED;
|
return RegExpClassSetExpression::Empty(zone(), is_negated);
|
||||||
return zone()->template New<RegExpClassRanges>(zone(), ranges,
|
} else {
|
||||||
class_ranges_flags);
|
RegExpClassRanges::ClassRangesFlags class_ranges_flags;
|
||||||
|
if (is_negated) class_ranges_flags = RegExpClassRanges::NEGATED;
|
||||||
|
return zone()->template New<RegExpClassRanges>(zone(), ranges,
|
||||||
|
class_ranges_flags);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!unicode_sets()) {
|
if (!unicode_sets()) {
|
||||||
|
@ -184,6 +184,13 @@ check(
|
|||||||
/[\q{ĀĂĄĆ|AaAc}--\q{āăąć}]/vi, ['AaAc', 'aAaC'], ['ĀĂĄĆ', 'āăąć'],
|
/[\q{ĀĂĄĆ|AaAc}--\q{āăąć}]/vi, ['AaAc', 'aAaC'], ['ĀĂĄĆ', 'āăąć'],
|
||||||
false);
|
false);
|
||||||
|
|
||||||
|
// Empty nested classes.
|
||||||
|
check(/[a-c\q{foo|bar}[]]/v, ['a','b','c','foo','bar'], [], false);
|
||||||
|
check(/[[a-c\q{foo|bar}]&&[]]/v, [], ['a','b','c','foo','bar'], true);
|
||||||
|
check(/[[a-c\q{foo|bar}]--[]]/v, ['a','b','c','foo','bar'], [], false);
|
||||||
|
check(/[[]&&[a-c\q{foo|bar}]]/v, [], ['a','b','c','foo','bar'], true);
|
||||||
|
check(/[[]--[a-c\q{foo|bar}]]/v, [], ['a','b','c','foo','bar'], true);
|
||||||
|
|
||||||
// Empty string disjunctions matches nothing, but succeeds.
|
// Empty string disjunctions matches nothing, but succeeds.
|
||||||
let res = /[\q{}]/v.exec('foo');
|
let res = /[\q{}]/v.exec('foo');
|
||||||
assertNotNull(res);
|
assertNotNull(res);
|
||||||
|
Loading…
Reference in New Issue
Block a user