[regexp] Handle empty nested classes correctly
With the recent introduction of unicode sets (v-flag), nested character classes are allowed in regular expressions. We always expect a nested class to be of type `RegExpClassSetExpression`, but the empty nested class was not handled correctly. Bug: v8:11935, chromium:1412942 Change-Id: I3b644c8627d8fc6b320a419216372810e8003983 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4224311 Reviewed-by: Jakob Linke <jgruber@chromium.org> Commit-Queue: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#85680}
This commit is contained in:
parent
8c4779241a
commit
ee93bc8035
@ -200,10 +200,12 @@ void* RegExpUnparser::VisitClassSetOperand(RegExpClassSetOperand* that,
|
||||
if (i > 0) os_ << " ";
|
||||
VisitCharacterRange(that->ranges()->at(i));
|
||||
}
|
||||
for (auto iter : *that->strings()) {
|
||||
os_ << " '";
|
||||
os_ << std::string(iter.first.begin(), iter.first.end());
|
||||
os_ << "'";
|
||||
if (that->has_strings()) {
|
||||
for (auto iter : *that->strings()) {
|
||||
os_ << " '";
|
||||
os_ << std::string(iter.first.begin(), iter.first.end());
|
||||
os_ << "'";
|
||||
}
|
||||
}
|
||||
os_ << "]";
|
||||
return nullptr;
|
||||
@ -382,16 +384,17 @@ RegExpClassSetOperand::RegExpClassSetOperand(ZoneList<CharacterRange>* ranges,
|
||||
CharacterClassStrings* strings)
|
||||
: ranges_(ranges), strings_(strings) {
|
||||
DCHECK_NOT_NULL(ranges);
|
||||
DCHECK_NOT_NULL(strings);
|
||||
min_match_ = 0;
|
||||
max_match_ = 0;
|
||||
if (!ranges->is_empty()) {
|
||||
min_match_ = 1;
|
||||
max_match_ = 2;
|
||||
}
|
||||
for (auto string : *strings) {
|
||||
min_match_ = std::min(min_match_, string.second->min_match());
|
||||
max_match_ = std::max(max_match_, string.second->max_match());
|
||||
if (has_strings()) {
|
||||
for (auto string : *strings) {
|
||||
min_match_ = std::min(min_match_, string.second->min_match());
|
||||
max_match_ = std::max(max_match_, string.second->max_match());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -410,5 +413,20 @@ RegExpClassSetExpression::RegExpClassSetExpression(
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
RegExpClassSetExpression* RegExpClassSetExpression::Empty(Zone* zone,
|
||||
bool is_negated) {
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(0, zone);
|
||||
RegExpClassSetOperand* op =
|
||||
zone->template New<RegExpClassSetOperand>(ranges, nullptr);
|
||||
ZoneList<RegExpTree*>* operands =
|
||||
zone->template New<ZoneList<RegExpTree*>>(1, zone);
|
||||
operands->Add(op, zone);
|
||||
return zone->template New<RegExpClassSetExpression>(
|
||||
RegExpClassSetExpression::OperationType::kUnion, is_negated, false,
|
||||
operands);
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -413,9 +413,12 @@ class RegExpClassSetOperand final : public RegExpTree {
|
||||
void Subtract(RegExpClassSetOperand* other,
|
||||
ZoneList<CharacterRange>* temp_ranges, Zone* zone);
|
||||
|
||||
bool has_strings() const { return !strings_->empty(); }
|
||||
bool has_strings() const { return strings_ != nullptr && !strings_->empty(); }
|
||||
ZoneList<CharacterRange>* ranges() { return ranges_; }
|
||||
CharacterClassStrings* strings() { return strings_; }
|
||||
CharacterClassStrings* strings() {
|
||||
DCHECK_NOT_NULL(strings_);
|
||||
return strings_;
|
||||
}
|
||||
|
||||
private:
|
||||
ZoneList<CharacterRange>* ranges_;
|
||||
@ -434,6 +437,10 @@ class RegExpClassSetExpression final : public RegExpTree {
|
||||
|
||||
DECL_BOILERPLATE(ClassSetExpression);
|
||||
|
||||
// Create an empty class set expression (matches everything if |is_negated|,
|
||||
// nothing otherwise).
|
||||
static RegExpClassSetExpression* Empty(Zone* zone, bool is_negated);
|
||||
|
||||
bool IsTextElement() override { return true; }
|
||||
int min_match() override { return 0; }
|
||||
int max_match() override { return max_match_; }
|
||||
|
@ -593,7 +593,12 @@ RegExpNode* RegExpClassSetExpression::ToNode(RegExpCompiler* compiler,
|
||||
|
||||
void RegExpClassSetOperand::Union(RegExpClassSetOperand* other, Zone* zone) {
|
||||
ranges()->AddAll(*other->ranges(), zone);
|
||||
strings()->insert(other->strings()->begin(), other->strings()->end());
|
||||
if (other->has_strings()) {
|
||||
if (strings_ == nullptr) {
|
||||
strings_ = zone->template New<CharacterClassStrings>(zone);
|
||||
}
|
||||
strings()->insert(other->strings()->begin(), other->strings()->end());
|
||||
}
|
||||
}
|
||||
|
||||
void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
|
||||
@ -602,11 +607,17 @@ void RegExpClassSetOperand::Intersect(RegExpClassSetOperand* other,
|
||||
CharacterRange::Intersect(ranges(), other->ranges(), temp_ranges, zone);
|
||||
std::swap(*ranges(), *temp_ranges);
|
||||
temp_ranges->Rewind(0);
|
||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||
if (other->strings()->find(iter->first) == other->strings()->end()) {
|
||||
iter = strings()->erase(iter);
|
||||
if (has_strings()) {
|
||||
if (!other->has_strings()) {
|
||||
strings()->clear();
|
||||
} else {
|
||||
iter++;
|
||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||
if (other->strings()->find(iter->first) == other->strings()->end()) {
|
||||
iter = strings()->erase(iter);
|
||||
} else {
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -617,11 +628,13 @@ void RegExpClassSetOperand::Subtract(RegExpClassSetOperand* other,
|
||||
CharacterRange::Subtract(ranges(), other->ranges(), temp_ranges, zone);
|
||||
std::swap(*ranges(), *temp_ranges);
|
||||
temp_ranges->Rewind(0);
|
||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||
if (other->strings()->find(iter->first) != other->strings()->end()) {
|
||||
iter = strings()->erase(iter);
|
||||
} else {
|
||||
iter++;
|
||||
if (has_strings() && other->has_strings()) {
|
||||
for (auto iter = strings()->begin(); iter != strings()->end();) {
|
||||
if (other->strings()->find(iter->first) != other->strings()->end()) {
|
||||
iter = strings()->erase(iter);
|
||||
} else {
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2913,10 +2913,14 @@ RegExpTree* RegExpParserImpl<CharT>::ParseCharacterClass(
|
||||
zone()->template New<ZoneList<CharacterRange>>(2, zone());
|
||||
if (current() == ']') {
|
||||
Advance();
|
||||
RegExpClassRanges::ClassRangesFlags class_ranges_flags;
|
||||
if (is_negated) class_ranges_flags = RegExpClassRanges::NEGATED;
|
||||
return zone()->template New<RegExpClassRanges>(zone(), ranges,
|
||||
class_ranges_flags);
|
||||
if (unicode_sets()) {
|
||||
return RegExpClassSetExpression::Empty(zone(), is_negated);
|
||||
} else {
|
||||
RegExpClassRanges::ClassRangesFlags class_ranges_flags;
|
||||
if (is_negated) class_ranges_flags = RegExpClassRanges::NEGATED;
|
||||
return zone()->template New<RegExpClassRanges>(zone(), ranges,
|
||||
class_ranges_flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (!unicode_sets()) {
|
||||
|
@ -184,6 +184,13 @@ check(
|
||||
/[\q{ĀĂĄĆ|AaAc}--\q{āăąć}]/vi, ['AaAc', 'aAaC'], ['ĀĂĄĆ', 'āăąć'],
|
||||
false);
|
||||
|
||||
// Empty nested classes.
|
||||
check(/[a-c\q{foo|bar}[]]/v, ['a','b','c','foo','bar'], [], false);
|
||||
check(/[[a-c\q{foo|bar}]&&[]]/v, [], ['a','b','c','foo','bar'], true);
|
||||
check(/[[a-c\q{foo|bar}]--[]]/v, ['a','b','c','foo','bar'], [], false);
|
||||
check(/[[]&&[a-c\q{foo|bar}]]/v, [], ['a','b','c','foo','bar'], true);
|
||||
check(/[[]--[a-c\q{foo|bar}]]/v, [], ['a','b','c','foo','bar'], true);
|
||||
|
||||
// Empty string disjunctions matches nothing, but succeeds.
|
||||
let res = /[\q{}]/v.exec('foo');
|
||||
assertNotNull(res);
|
||||
|
Loading…
Reference in New Issue
Block a user