[regexp] Unicode Sets: Implement parser changes and set operations
- Adapt parser for /v. - Implement set operations (union, intersect, subtract). - String disjunctions not implemented yet. Bug: v8:11935 Change-Id: I1c3a6785500664dacc5d6562f49d7deed73ac15f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3921517 Commit-Queue: Patrick Thier <pthier@chromium.org> Reviewed-by: Jakob Linke <jgruber@chromium.org> Reviewed-by: Mathias Bynens <mathias@chromium.org> Reviewed-by: Kim-Anh Tran <kimanh@chromium.org> Cr-Commit-Position: refs/heads/main@{#83647}
This commit is contained in:
parent
316dd0326c
commit
a169bab6f7
@ -56,6 +56,9 @@ String16 _descriptionForRegExpFlags(v8::Local<v8::RegExp> value) {
|
||||
if (flags & v8::RegExp::Flags::kMultiline) result_string_builder.append('m');
|
||||
if (flags & v8::RegExp::Flags::kDotAll) result_string_builder.append('s');
|
||||
if (flags & v8::RegExp::Flags::kUnicode) result_string_builder.append('u');
|
||||
if (flags & v8::RegExp::Flags::kUnicodeSets) {
|
||||
result_string_builder.append('v');
|
||||
}
|
||||
if (flags & v8::RegExp::Flags::kSticky) result_string_builder.append('y');
|
||||
return result_string_builder.toString();
|
||||
}
|
||||
|
@ -243,6 +243,7 @@ String16 descriptionForRegExp(v8::Isolate* isolate,
|
||||
if (flags & v8::RegExp::Flags::kMultiline) description.append('m');
|
||||
if (flags & v8::RegExp::Flags::kDotAll) description.append('s');
|
||||
if (flags & v8::RegExp::Flags::kUnicode) description.append('u');
|
||||
if (flags & v8::RegExp::Flags::kUnicodeSets) description.append('v');
|
||||
if (flags & v8::RegExp::Flags::kSticky) description.append('y');
|
||||
return description.toString();
|
||||
}
|
||||
|
@ -69,6 +69,11 @@ class CanBeHandledVisitor final : private RegExpVisitor {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* VisitClassSet(RegExpClassSet* node, void*) override {
|
||||
result_ = false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* VisitAssertion(RegExpAssertion* node, void*) override {
|
||||
return nullptr;
|
||||
}
|
||||
@ -419,6 +424,11 @@ class CompileVisitor : private RegExpVisitor {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* VisitClassSet(RegExpClassSet* node, void*) override {
|
||||
// TODO(v8:11935): Add support.
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void* VisitAtom(RegExpAtom* node, void*) override {
|
||||
for (base::uc16 c : node->data()) {
|
||||
assembler_.ConsumeRange(c, c);
|
||||
|
@ -195,6 +195,27 @@ void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* RegExpUnparser::VisitClassSet(RegExpClassSet* that, void* data) {
|
||||
switch (that->operation()) {
|
||||
case RegExpClassSet::OperationType::kUnion:
|
||||
os_ << "++";
|
||||
break;
|
||||
case RegExpClassSet::OperationType::kIntersection:
|
||||
os_ << "&&";
|
||||
break;
|
||||
case RegExpClassSet::OperationType::kSubtraction:
|
||||
os_ << "--";
|
||||
break;
|
||||
}
|
||||
if (that->is_negated()) os_ << "^";
|
||||
os_ << "[";
|
||||
for (int i = 0; i < that->operands()->length(); i++) {
|
||||
if (i > 0) os_ << " ";
|
||||
that->operands()->at(i)->Accept(this, data);
|
||||
}
|
||||
os_ << "]";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
|
||||
switch (that->assertion_type()) {
|
||||
|
@ -19,6 +19,7 @@ namespace internal {
|
||||
VISIT(Alternative) \
|
||||
VISIT(Assertion) \
|
||||
VISIT(CharacterClass) \
|
||||
VISIT(ClassSet) \
|
||||
VISIT(Atom) \
|
||||
VISIT(Quantifier) \
|
||||
VISIT(Capture) \
|
||||
@ -117,29 +118,49 @@ class CharacterRange {
|
||||
StandardCharacterSet standard_character_set,
|
||||
ZoneList<CharacterRange>* ranges, bool add_unicode_case_equivalents,
|
||||
Zone* zone);
|
||||
// Add case equivalents to ranges. Only used for /i, not for /ui or /vi, as
|
||||
// the semantics for unicode mode are slightly different.
|
||||
// See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
|
||||
V8_EXPORT_PRIVATE static void AddCaseEquivalents(
|
||||
Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
|
||||
bool is_one_byte);
|
||||
// Add case equivalent code points to ranges. Only used for /ui and /vi, not
|
||||
// for /i, as the semantics for non-unicode mode are slightly different.
|
||||
// See https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch Note 4.
|
||||
static void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone);
|
||||
|
||||
bool Contains(base::uc32 i) const { return from_ <= i && i <= to_; }
|
||||
base::uc32 from() const { return from_; }
|
||||
base::uc32 to() const { return to_; }
|
||||
bool IsEverything(base::uc32 max) const { return from_ == 0 && to_ >= max; }
|
||||
bool IsSingleton() const { return from_ == to_; }
|
||||
|
||||
// Whether a range list is in canonical form: Ranges ordered by from value,
|
||||
// and ranges non-overlapping and non-adjacent.
|
||||
V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges);
|
||||
V8_EXPORT_PRIVATE static bool IsCanonical(
|
||||
const ZoneList<CharacterRange>* ranges);
|
||||
// Convert range list to canonical form. The characters covered by the ranges
|
||||
// will still be the same, but no character is in more than one range, and
|
||||
// adjacent ranges are merged. The resulting list may be shorter than the
|
||||
// original, but cannot be longer.
|
||||
static void Canonicalize(ZoneList<CharacterRange>* ranges);
|
||||
// Negate the contents of a character range in canonical form.
|
||||
static void Negate(ZoneList<CharacterRange>* src,
|
||||
static void Negate(const ZoneList<CharacterRange>* src,
|
||||
ZoneList<CharacterRange>* dst, Zone* zone);
|
||||
|
||||
// Intersect the contents of two character ranges in canonical form.
|
||||
static void Intersect(const ZoneList<CharacterRange>* lhs,
|
||||
const ZoneList<CharacterRange>* rhs,
|
||||
ZoneList<CharacterRange>* dst, Zone* zone);
|
||||
// Subtract the contents of |to_remove| from the contents of |src|.
|
||||
static void Subtract(const ZoneList<CharacterRange>* src,
|
||||
const ZoneList<CharacterRange>* to_remove,
|
||||
ZoneList<CharacterRange>* dst, Zone* zone);
|
||||
// Remove all ranges outside the one-byte range.
|
||||
static void ClampToOneByte(ZoneList<CharacterRange>* ranges);
|
||||
// Checks if two ranges (both need to be canonical) are equal.
|
||||
static bool Equals(const ZoneList<CharacterRange>* lhs,
|
||||
const ZoneList<CharacterRange>* rhs);
|
||||
|
||||
private:
|
||||
CharacterRange(base::uc32 from, base::uc32 to) : from_(from), to_(to) {}
|
||||
@ -150,6 +171,13 @@ class CharacterRange {
|
||||
base::uc32 to_ = 0;
|
||||
};
|
||||
|
||||
inline bool operator==(const CharacterRange& lhs, const CharacterRange& rhs) {
|
||||
return lhs.from() == rhs.from() && lhs.to() == rhs.to();
|
||||
}
|
||||
inline bool operator!=(const CharacterRange& lhs, const CharacterRange& rhs) {
|
||||
return !operator==(lhs, rhs);
|
||||
}
|
||||
|
||||
#define DECL_BOILERPLATE(Name) \
|
||||
void* Accept(RegExpVisitor* visitor, void* data) override; \
|
||||
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) \
|
||||
@ -329,6 +357,47 @@ class RegExpCharacterClass final : public RegExpTree {
|
||||
CharacterClassFlags character_class_flags_;
|
||||
};
|
||||
|
||||
class RegExpClassSet final : public RegExpTree {
|
||||
public:
|
||||
enum class OperationType { kUnion, kIntersection, kSubtraction };
|
||||
|
||||
RegExpClassSet(OperationType op, bool is_negated,
|
||||
ZoneList<RegExpTree*>* operands)
|
||||
: operation_(op), is_negated_(is_negated), operands_(operands) {}
|
||||
|
||||
DECL_BOILERPLATE(ClassSet);
|
||||
|
||||
bool IsTextElement() override { return true; }
|
||||
// At least 1 character is consumed.
|
||||
int min_match() override { return 1; }
|
||||
// Up to two code points might be consumed.
|
||||
int max_match() override { return 2; }
|
||||
|
||||
OperationType operation() const { return operation_; }
|
||||
bool is_negated() const { return is_negated_; }
|
||||
const ZoneList<RegExpTree*>* operands() const { return operands_; }
|
||||
|
||||
private:
|
||||
RegExpCharacterClass* ToCharacterClass(Zone* zone);
|
||||
|
||||
// Recursively evaluates the tree rooted at |root|, computing the valid
|
||||
// CharacterRanges after applying all set operations and storing the result in
|
||||
// |result_ranges|. |temp_ranges| is list used for intermediate results,
|
||||
// passed as parameter to avoid allocating new lists all the time.
|
||||
static void ComputeCharacterRanges(RegExpTree* root,
|
||||
ZoneList<CharacterRange>* result_ranges,
|
||||
ZoneList<CharacterRange>* temp_ranges,
|
||||
Zone* zone);
|
||||
|
||||
const OperationType operation_;
|
||||
const bool is_negated_;
|
||||
ZoneList<RegExpTree*>* operands_ = nullptr;
|
||||
#ifdef ENABLE_SLOW_DCHECKS
|
||||
// Cache ranges for each node during computation for (slow) DCHECKs.
|
||||
ZoneList<CharacterRange>* ranges_ = nullptr;
|
||||
#endif
|
||||
};
|
||||
|
||||
class RegExpAtom final : public RegExpTree {
|
||||
public:
|
||||
explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {}
|
||||
|
@ -419,9 +419,23 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
|
||||
return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
|
||||
}
|
||||
|
||||
void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
|
||||
} // namespace
|
||||
|
||||
// TODO(pthier, v8:11935): We use this method to implement
|
||||
// MaybeSimpleCaseFolding
|
||||
// TODO(v8:11935): Change to permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-maybesimplecasefolding
|
||||
// which is slightly different. The main difference is that we retain original
|
||||
// characters and add case equivalents, whereas according to the spec original
|
||||
// characters should be replaced with their case equivalent.
|
||||
// This shouldn't make a difference for correctness, but we could potentially
|
||||
// create smaller character classes for unicode sets.
|
||||
|
||||
// static
|
||||
void CharacterRange::AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone) {
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
DCHECK(CharacterRange::IsCanonical(ranges));
|
||||
DCHECK(IsCanonical(ranges));
|
||||
|
||||
// Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
|
||||
// See also https://crbug.com/v8/6727.
|
||||
@ -444,16 +458,13 @@ void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
|
||||
// we end up with only simple and common case mappings.
|
||||
set.removeAllStrings();
|
||||
for (int i = 0; i < set.getRangeCount(); i++) {
|
||||
ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
|
||||
zone);
|
||||
ranges->Add(Range(set.getRangeStart(i), set.getRangeEnd(i)), zone);
|
||||
}
|
||||
// No errors and everything we collected have been ranges.
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
Canonicalize(ranges);
|
||||
#endif // V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
set_.Canonicalize();
|
||||
@ -461,7 +472,7 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
||||
|
||||
if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
|
||||
AddUnicodeCaseEquivalents(ranges, zone);
|
||||
CharacterRange::AddUnicodeCaseEquivalents(ranges, zone);
|
||||
}
|
||||
|
||||
if (!IsEitherUnicode(compiler->flags()) || compiler->one_byte() ||
|
||||
@ -470,6 +481,17 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
}
|
||||
|
||||
if (is_negated()) {
|
||||
// With /v, character classes are never negated.
|
||||
// TODO(v8:11935): Change permalink once proposal is in stage 4.
|
||||
// https://arai-a.github.io/ecma262-compare/snapshot.html?pr=2418#sec-compileatom
|
||||
// Atom :: CharacterClass
|
||||
// 4. Assert: cc.[[Invert]] is false.
|
||||
// Instead the complement is created when evaluating the class set.
|
||||
// The only exception is the "nothing range" (negated everything), which is
|
||||
// internally created for an empty set.
|
||||
DCHECK_IMPLIES(
|
||||
IsUnicodeSets(compiler->flags()),
|
||||
ranges->length() == 1 && ranges->first().IsEverything(kMaxCodePoint));
|
||||
ZoneList<CharacterRange>* negated =
|
||||
zone->New<ZoneList<CharacterRange>>(2, zone);
|
||||
CharacterRange::Negate(ranges, negated, zone);
|
||||
@ -505,6 +527,11 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
return result;
|
||||
}
|
||||
|
||||
RegExpNode* RegExpClassSet::ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
return ToCharacterClass(compiler->zone())->ToNode(compiler, on_success);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
|
||||
@ -1359,7 +1386,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
|
||||
#endif // V8_INTL_SUPPORT
|
||||
}
|
||||
|
||||
bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
|
||||
bool CharacterRange::IsCanonical(const ZoneList<CharacterRange>* ranges) {
|
||||
DCHECK_NOT_NULL(ranges);
|
||||
int n = ranges->length();
|
||||
if (n <= 1) return true;
|
||||
@ -1463,6 +1490,129 @@ void CharacterSet::Canonicalize() {
|
||||
CharacterRange::Canonicalize(ranges_);
|
||||
}
|
||||
|
||||
RegExpCharacterClass* RegExpClassSet::ToCharacterClass(Zone* zone) {
|
||||
ZoneList<CharacterRange>* result_ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
ZoneList<CharacterRange>* temp_ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
ComputeCharacterRanges(this, result_ranges, temp_ranges, zone);
|
||||
return zone->template New<RegExpCharacterClass>(zone, result_ranges);
|
||||
}
|
||||
|
||||
// static
|
||||
void RegExpClassSet::ComputeCharacterRanges(
|
||||
RegExpTree* root, ZoneList<CharacterRange>* result_ranges,
|
||||
ZoneList<CharacterRange>* temp_ranges, Zone* zone) {
|
||||
DCHECK_EQ(temp_ranges->length(), 0);
|
||||
DCHECK(root->IsCharacterClass() || root->IsClassSet());
|
||||
if (root->IsCharacterClass()) {
|
||||
DCHECK(!root->AsCharacterClass()->is_negated());
|
||||
ZoneList<CharacterRange>* ranges = root->AsCharacterClass()->ranges(zone);
|
||||
CharacterRange::Canonicalize(ranges);
|
||||
result_ranges->AddAll(*ranges, zone);
|
||||
return;
|
||||
}
|
||||
RegExpClassSet* node = root->AsClassSet();
|
||||
switch (node->operation()) {
|
||||
case OperationType::kUnion: {
|
||||
ZoneList<CharacterRange>* op_ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
for (int i = 0; i < node->operands()->length(); i++) {
|
||||
RegExpTree* op = node->operands()->at(i);
|
||||
ComputeCharacterRanges(op, op_ranges, temp_ranges, zone);
|
||||
result_ranges->AddAll(*op_ranges, zone);
|
||||
op_ranges->Rewind(0);
|
||||
}
|
||||
CharacterRange::Canonicalize(result_ranges);
|
||||
break;
|
||||
}
|
||||
case OperationType::kIntersection: {
|
||||
ZoneList<CharacterRange>* op_ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
|
||||
zone);
|
||||
result_ranges->AddAll(*op_ranges, zone);
|
||||
op_ranges->Rewind(0);
|
||||
for (int i = 1; i < node->operands()->length(); i++) {
|
||||
ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
|
||||
zone);
|
||||
CharacterRange::Intersect(result_ranges, op_ranges, temp_ranges, zone);
|
||||
std::swap(*result_ranges, *temp_ranges);
|
||||
temp_ranges->Rewind(0);
|
||||
op_ranges->Rewind(0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case OperationType::kSubtraction: {
|
||||
ZoneList<CharacterRange>* op_ranges =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
ComputeCharacterRanges(node->operands()->at(0), op_ranges, temp_ranges,
|
||||
zone);
|
||||
result_ranges->AddAll(*op_ranges, zone);
|
||||
op_ranges->Rewind(0);
|
||||
for (int i = 1; i < node->operands()->length(); i++) {
|
||||
ComputeCharacterRanges(node->operands()->at(i), op_ranges, temp_ranges,
|
||||
zone);
|
||||
CharacterRange::Subtract(result_ranges, op_ranges, temp_ranges, zone);
|
||||
std::swap(*result_ranges, *temp_ranges);
|
||||
temp_ranges->Rewind(0);
|
||||
op_ranges->Rewind(0);
|
||||
}
|
||||
#ifdef ENABLE_SLOW_DCHECKS
|
||||
// Check that the result is equal to subtracting the union of all RHS
|
||||
// operands from the LHS operand.
|
||||
// TODO(pthier): It is unclear whether this variant is faster or slower
|
||||
// than subtracting multiple ranges in practice.
|
||||
ZoneList<CharacterRange>* lhs_range =
|
||||
// node->operands()->at(0)->AsCharacterClass()->ranges(zone);
|
||||
node->operands()->at(0)->IsCharacterClass()
|
||||
? node->operands()->at(0)->AsCharacterClass()->ranges(zone)
|
||||
: node->operands()->at(0)->AsClassSet()->ranges_;
|
||||
ZoneList<CharacterRange>* rhs_union =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
for (int i = 1; i < node->operands()->length(); i++) {
|
||||
ZoneList<CharacterRange>* op_range =
|
||||
node->operands()->at(i)->IsCharacterClass()
|
||||
? node->operands()->at(i)->AsCharacterClass()->ranges(zone)
|
||||
: node->operands()->at(i)->AsClassSet()->ranges_;
|
||||
rhs_union->AddAll(*op_range, zone);
|
||||
}
|
||||
CharacterRange::Canonicalize(rhs_union);
|
||||
ZoneList<CharacterRange>* ranges_check =
|
||||
zone->template New<ZoneList<CharacterRange>>(2, zone);
|
||||
CharacterRange::Subtract(lhs_range, rhs_union, ranges_check, zone);
|
||||
DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
|
||||
|
||||
// Check that the result is equal to intersecting the LHS operand with the
|
||||
// complemented union of all RHS operands
|
||||
ZoneList<CharacterRange>* rhs_union_negated =
|
||||
zone->template New<ZoneList<CharacterRange>>(rhs_union->length(),
|
||||
zone);
|
||||
CharacterRange::Negate(rhs_union, rhs_union_negated, zone);
|
||||
ranges_check->Rewind(0);
|
||||
CharacterRange::Intersect(lhs_range, rhs_union_negated, ranges_check,
|
||||
zone);
|
||||
DCHECK(CharacterRange::Equals(result_ranges, ranges_check));
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (node->is_negated()) {
|
||||
CharacterRange::Negate(result_ranges, temp_ranges, zone);
|
||||
std::swap(*result_ranges, *temp_ranges);
|
||||
temp_ranges->Rewind(0);
|
||||
}
|
||||
|
||||
DCHECK_EQ(temp_ranges->length(), 0);
|
||||
|
||||
#ifdef ENABLE_SLOW_DCHECKS
|
||||
// Cache results for DCHECKs.
|
||||
node->ranges_ =
|
||||
zone->template New<ZoneList<CharacterRange>>(*result_ranges, zone);
|
||||
#endif
|
||||
}
|
||||
|
||||
// static
|
||||
void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
|
||||
if (character_ranges->length() <= 1) return;
|
||||
@ -1500,7 +1650,7 @@ void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
|
||||
}
|
||||
|
||||
// static
|
||||
void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
|
||||
void CharacterRange::Negate(const ZoneList<CharacterRange>* ranges,
|
||||
ZoneList<CharacterRange>* negated_ranges,
|
||||
Zone* zone) {
|
||||
DCHECK(CharacterRange::IsCanonical(ranges));
|
||||
@ -1523,6 +1673,128 @@ void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
void CharacterRange::Intersect(const ZoneList<CharacterRange>* lhs,
|
||||
const ZoneList<CharacterRange>* rhs,
|
||||
ZoneList<CharacterRange>* intersection,
|
||||
Zone* zone) {
|
||||
DCHECK(CharacterRange::IsCanonical(lhs));
|
||||
DCHECK(CharacterRange::IsCanonical(rhs));
|
||||
DCHECK_EQ(0, intersection->length());
|
||||
int lhs_index = 0;
|
||||
int rhs_index = 0;
|
||||
while (lhs_index < lhs->length() && rhs_index < rhs->length()) {
|
||||
// Skip non-overlapping ranges.
|
||||
if (lhs->at(lhs_index).to() < rhs->at(rhs_index).from()) {
|
||||
lhs_index++;
|
||||
continue;
|
||||
}
|
||||
if (rhs->at(rhs_index).to() < lhs->at(lhs_index).from()) {
|
||||
rhs_index++;
|
||||
continue;
|
||||
}
|
||||
|
||||
base::uc32 from =
|
||||
std::max(lhs->at(lhs_index).from(), rhs->at(rhs_index).from());
|
||||
base::uc32 to = std::min(lhs->at(lhs_index).to(), rhs->at(rhs_index).to());
|
||||
intersection->Add(CharacterRange::Range(from, to), zone);
|
||||
if (to == lhs->at(lhs_index).to()) {
|
||||
lhs_index++;
|
||||
} else {
|
||||
rhs_index++;
|
||||
}
|
||||
}
|
||||
|
||||
DCHECK(IsCanonical(intersection));
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Advance |index| and set |from| and |to| to the new range, if not out of
|
||||
// bounds of |range|, otherwise |from| is set to a code point beyond the legal
|
||||
// unicode character range.
|
||||
void SafeAdvanceRange(const ZoneList<CharacterRange>* range, int* index,
|
||||
base::uc32* from, base::uc32* to) {
|
||||
++(*index);
|
||||
if (*index < range->length()) {
|
||||
*from = range->at(*index).from();
|
||||
*to = range->at(*index).to();
|
||||
} else {
|
||||
*from = kMaxCodePoint + 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
void CharacterRange::Subtract(const ZoneList<CharacterRange>* src,
|
||||
const ZoneList<CharacterRange>* to_remove,
|
||||
ZoneList<CharacterRange>* result, Zone* zone) {
|
||||
DCHECK(CharacterRange::IsCanonical(src));
|
||||
DCHECK(CharacterRange::IsCanonical(to_remove));
|
||||
DCHECK_EQ(0, result->length());
|
||||
int src_index = 0;
|
||||
int to_remove_index = 0;
|
||||
base::uc32 from = src->at(src_index).from();
|
||||
base::uc32 to = src->at(src_index).to();
|
||||
while (src_index < src->length() && to_remove_index < to_remove->length()) {
|
||||
CharacterRange remove_range = to_remove->at(to_remove_index);
|
||||
if (remove_range.to() < from) {
|
||||
// (a) Non-overlapping case, ignore current to_remove range.
|
||||
// |-------|
|
||||
// |-------|
|
||||
to_remove_index++;
|
||||
} else if (to < remove_range.from()) {
|
||||
// (b) Non-overlapping case, add full current range to result.
|
||||
// |-------|
|
||||
// |-------|
|
||||
result->Add(CharacterRange::Range(from, to), zone);
|
||||
SafeAdvanceRange(src, &src_index, &from, &to);
|
||||
} else if (from >= remove_range.from() && to <= remove_range.to()) {
|
||||
// (c) Current to_remove range fully covers current range.
|
||||
// |---|
|
||||
// |-------|
|
||||
SafeAdvanceRange(src, &src_index, &from, &to);
|
||||
} else if (from < remove_range.from() && to > remove_range.to()) {
|
||||
// (d) Split current range.
|
||||
// |-------|
|
||||
// |---|
|
||||
result->Add(CharacterRange::Range(from, remove_range.from() - 1), zone);
|
||||
from = remove_range.to() + 1;
|
||||
to_remove_index++;
|
||||
} else if (from < remove_range.from()) {
|
||||
// (e) End current range.
|
||||
// |-------|
|
||||
// |-------|
|
||||
to = remove_range.from() - 1;
|
||||
result->Add(CharacterRange::Range(from, to), zone);
|
||||
SafeAdvanceRange(src, &src_index, &from, &to);
|
||||
} else if (to > remove_range.to()) {
|
||||
// (f) Modify start of current range.
|
||||
// |-------|
|
||||
// |-------|
|
||||
from = remove_range.to() + 1;
|
||||
to_remove_index++;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
// The last range needs special treatment after |to_remove| is exhausted, as
|
||||
// |from| might have been modified by the last |to_remove| range and |to| was
|
||||
// not yet known (i.e. cases d and f).
|
||||
if (from <= to) {
|
||||
result->Add(CharacterRange::Range(from, to), zone);
|
||||
}
|
||||
src_index++;
|
||||
|
||||
// Add remaining ranges after |to_remove| is exhausted.
|
||||
for (; src_index < src->length(); src_index++) {
|
||||
result->Add(src->at(src_index), zone);
|
||||
}
|
||||
|
||||
DCHECK(IsCanonical(result));
|
||||
}
|
||||
|
||||
// static
|
||||
void CharacterRange::ClampToOneByte(ZoneList<CharacterRange>* ranges) {
|
||||
DCHECK(IsCanonical(ranges));
|
||||
@ -1544,6 +1816,20 @@ void CharacterRange::ClampToOneByte(ZoneList<CharacterRange>* ranges) {
|
||||
ranges->Rewind(n);
|
||||
}
|
||||
|
||||
// static
|
||||
bool CharacterRange::Equals(const ZoneList<CharacterRange>* lhs,
|
||||
const ZoneList<CharacterRange>* rhs) {
|
||||
DCHECK(IsCanonical(lhs));
|
||||
DCHECK(IsCanonical(rhs));
|
||||
if (lhs->length() != rhs->length()) return false;
|
||||
|
||||
for (int i = 0; i < lhs->length(); i++) {
|
||||
if (lhs->at(i) != rhs->at(i)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
// Scoped object to keep track of how much we unroll quantifier loops in the
|
||||
|
@ -42,7 +42,9 @@ namespace internal {
|
||||
T(InvalidClassPropertyName, "Invalid property name in character class") \
|
||||
T(InvalidCharacterClass, "Invalid character class") \
|
||||
T(UnterminatedCharacterClass, "Unterminated character class") \
|
||||
T(OutOfOrderCharacterClass, "Range out of order in character class")
|
||||
T(OutOfOrderCharacterClass, "Range out of order in character class") \
|
||||
T(InvalidClassSetOperation, "Invalid set operation in character class") \
|
||||
T(InvalidCharacterInClass, "Invalid character in character class")
|
||||
|
||||
enum class RegExpError : uint32_t {
|
||||
#define TEMPLATE(NAME, STRING) k##NAME,
|
||||
|
File diff suppressed because it is too large
Load Diff
143
test/mjsunit/harmony/regexp-unicode-sets.js
Normal file
143
test/mjsunit/harmony/regexp-unicode-sets.js
Normal file
@ -0,0 +1,143 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --harmony-regexp-unicode-sets
|
||||
|
||||
// u and v are not allowed together.
|
||||
assertEarlyError('/./uv');
|
||||
assertThrowsAtRuntime("new RegExp('.','uv')", SyntaxError);
|
||||
|
||||
assertEquals('v', /./v.flags);
|
||||
assertTrue(/./v.unicodeSets);
|
||||
|
||||
// Characters that require escaping within a character class in /v mode
|
||||
assertEarlyError('/[(]/v');
|
||||
assertEarlyError('/[)]/v');
|
||||
assertEarlyError('/[[]/v');
|
||||
assertEarlyError('/[]]/v');
|
||||
assertEarlyError('/[{]/v');
|
||||
assertEarlyError('/[}]/v');
|
||||
assertEarlyError('/[/]/v');
|
||||
assertEarlyError('/[-]/v');
|
||||
// Need to escape the backslash, as assertEarlyError uses eval().
|
||||
assertEarlyError('/[\\]/v');
|
||||
assertEarlyError('/[|]/v');
|
||||
|
||||
assertEarlyError('/[&&]/v');
|
||||
assertEarlyError('/[!!]/v');
|
||||
assertEarlyError('/[##]/v');
|
||||
assertEarlyError('/[$$]/v');
|
||||
assertEarlyError('/[%%]/v');
|
||||
assertEarlyError('/[**]/v');
|
||||
assertEarlyError('/[++]/v');
|
||||
assertEarlyError('/[,,]/v');
|
||||
assertEarlyError('/[..]/v');
|
||||
assertEarlyError('/[::]/v');
|
||||
assertEarlyError('/[;;]/v');
|
||||
assertEarlyError('/[<<]/v');
|
||||
assertEarlyError('/[==]/v');
|
||||
assertEarlyError('/[>>]/v');
|
||||
assertEarlyError('/[??]/v');
|
||||
assertEarlyError('/[@@]/v');
|
||||
// The first ^ negates the class. The following two are not valid.
|
||||
assertEarlyError('/[^^^]/v');
|
||||
assertEarlyError('/[``]/v');
|
||||
assertEarlyError('/[~~]/v');
|
||||
|
||||
assertEarlyError('/[a&&&]/v');
|
||||
assertEarlyError('/[&&&a]/v');
|
||||
|
||||
const allAscii = Array.from(
|
||||
{length: 127}, (v, i) => { return String.fromCharCode(i); });
|
||||
|
||||
function check(re, expectMatch, expectNoMatch) {
|
||||
if (expectNoMatch === undefined) {
|
||||
const expectSet = new Set(expectMatch.map(val => {
|
||||
return (typeof val == 'number') ? String(val) : val; }));
|
||||
expectNoMatch = allAscii.filter(val => !expectSet.has(val));
|
||||
}
|
||||
for (const match of expectMatch) {
|
||||
assertTrue(re.test(match), `${re}.test(${match})`);
|
||||
}
|
||||
for (const noMatch of expectNoMatch) {
|
||||
assertFalse(re.test(noMatch), `${re}.test(${noMatch})`);
|
||||
}
|
||||
// Nest the current RegExp in a negated class and check expectations are
|
||||
// inversed.
|
||||
const inverted = new RegExp(`[^${re.source}]`, re.flags);
|
||||
for (const match of expectMatch) {
|
||||
assertFalse(inverted.test(match), `${inverted}.test(${match})`);
|
||||
}
|
||||
for (const noMatch of expectNoMatch) {
|
||||
assertTrue(inverted.test(noMatch), `${inverted}.test(${noMatch})`);
|
||||
}
|
||||
}
|
||||
|
||||
// Union with nested class
|
||||
check(
|
||||
/[\da-f[xy][^[^z]]]/v, Array.from('0123456789abcdefxyz'),
|
||||
Array.from('ghijklmnopqrstuv!?'));
|
||||
|
||||
// Intersections
|
||||
check(/[\d&&[0-9]]/v, Array.from('0123456789'), []);
|
||||
check(/[\d&&0]/v, [0], Array.from('123456789'));
|
||||
check(/[\d&&9]/v, [9], Array.from('012345678'));
|
||||
check(/[\d&&[02468]]/v, Array.from('02468'), Array.from('13579'));
|
||||
check(/[\d&&[13579]]/v, Array.from('13579'), Array.from('02468'));
|
||||
check(
|
||||
/[\w&&[^a-zA-Z_]]/v, Array.from('0123456789'),
|
||||
Array.from('abcdxyzABCDXYZ_!?'));
|
||||
check(
|
||||
/[^\w&&[a-zA-Z_]]/v, Array.from('0123456789!?'),
|
||||
Array.from('abcdxyzABCDXYZ_'));
|
||||
|
||||
// Subtractions
|
||||
check(/[\d--[!-%]]/v, Array.from('0123456789'));
|
||||
check(/[\d--[A-Z]]/v, Array.from('0123456789'));
|
||||
check(/[\d--[0-9]]/v, []);
|
||||
check(/[\d--[\w]]/v, []);
|
||||
check(/[\d--0]/v, Array.from('123456789'));
|
||||
check(/[\d--9]/v, Array.from('012345678'));
|
||||
check(/[[\d[a-c]]--9]/v, Array.from('012345678abc'));
|
||||
check(/[\d--[02468]]/v, Array.from('13579'));
|
||||
check(/[\d--[13579]]/v, Array.from('02468'));
|
||||
check(/[[3-7]--[0-9]]/v, []);
|
||||
check(/[[3-7]--[0-7]]/v, []);
|
||||
check(/[[3-7]--[3-9]]/v, []);
|
||||
check(/[[3-79]--[0-7]]/v, [9]);
|
||||
check(/[[3-79]--[3-9]]/v, []);
|
||||
check(/[[3-7]--[0-3]]/v, Array.from('4567'));
|
||||
check(/[[3-7]--[0-5]]/v, Array.from('67'));
|
||||
check(/[[3-7]--[7-9]]/v, Array.from('3456'));
|
||||
check(/[[3-7]--[5-9]]/v, Array.from('34'));
|
||||
check(/[[3-7a-c]--[0-3]]/v, Array.from('4567abc'));
|
||||
check(/[[3-7a-c]--[0-5]]/v, Array.from('67abc'));
|
||||
check(/[[3-7a-c]--[7-9]]/v, Array.from('3456abc'));
|
||||
check(/[[3-7a-c]--[5-9]]/v, Array.from('34abc'));
|
||||
check(/[[2-8]--[0-3]--5--[7-9]]/v, Array.from('46'));
|
||||
check(/[[2-57-8]--[0-3]--[5-7]]/v, Array.from('48'));
|
||||
check(/[[0-57-8]--[1-34]--[5-7]]/v, Array.from('08'));
|
||||
check(/[\d--[^02468]]/v, Array.from('02468'));
|
||||
check(/[\d--[^13579]]/v, Array.from('13579'));
|
||||
|
||||
// Ignore-Case
|
||||
check(/[Ā-č]/v, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
|
||||
check(/[ĀĂĄĆ]/vi, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
|
||||
check(/[āăąć]/vi, Array.from('ĀāĂ㥹Ćć'), Array.from('abc'));
|
||||
|
||||
// Some more sophisticated tests taken from
|
||||
// https://v8.dev/features/regexp-v-flag
|
||||
assertFalse(/[\p{Script_Extensions=Greek}--π]/v.test('π'));
|
||||
assertFalse(/[\p{Script_Extensions=Greek}--[αβγ]]/v.test('α'));
|
||||
assertFalse(/[\p{Script_Extensions=Greek}--[α-γ]]/v.test('β'));
|
||||
assertTrue(/[\p{Decimal_Number}--[0-9]]/v.test('𑜹'));
|
||||
assertFalse(/[\p{Decimal_Number}--[0-9]]/v.test('4'));
|
||||
assertTrue(/[\p{Script_Extensions=Greek}&&\p{Letter}]/v.test('π'));
|
||||
assertFalse(/[\p{Script_Extensions=Greek}&&\p{Letter}]/v.test('𐆊'));
|
||||
assertTrue(/[\p{White_Space}&&\p{ASCII}]/v.test('\n'));
|
||||
assertFalse(/[\p{White_Space}&&\p{ASCII}]/v.test('\u2028'));
|
||||
assertTrue(/[\p{Script_Extensions=Mongolian}&&\p{Number}]/v.test('᠗'));
|
||||
assertFalse(/[\p{Script_Extensions=Mongolian}&&\p{Number}]/v.test('ᠴ'));
|
||||
assertEquals('XXXXXX4#', 'aAbBcC4#'.replaceAll(/\p{Lowercase_Letter}/giv, 'X'));
|
||||
assertEquals('XXXXXX4#', 'aAbBcC4#'.replaceAll(/[^\P{Lowercase_Letter}]/giv, 'X'));
|
@ -434,6 +434,10 @@
|
||||
'regress/regress-1262423': [PASS,FAIL],
|
||||
'regress/regress-793588': [PASS,FAIL],
|
||||
|
||||
# RegExp unicode tests relies on ICU for property classes and
|
||||
# case-insensitive unicode patterns.
|
||||
'harmony/regexp-unicode-sets': [PASS,FAIL],
|
||||
|
||||
# The noi18n build cannot parse characters in supplementary plane.
|
||||
'harmony/regexp-named-captures': [FAIL],
|
||||
'regress/regress-v8-10384': [FAIL],
|
||||
|
@ -317,76 +317,28 @@
|
||||
'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-CharacterClass': [SKIP],
|
||||
'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-P': [SKIP],
|
||||
'built-ins/RegExp/property-escapes/generated/strings/RGI_Emoji_ZWJ_Sequence-negative-u': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-character-class-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-character-class': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-character': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-property-of-strings-escape': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-string-literal': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/property-of-strings-escape-difference-character-class-escape': [SKIP],
|
||||
@ -805,6 +757,27 @@
|
||||
'built-ins/RegExp/named-groups/unicode-property-names-valid': [SKIP],
|
||||
'built-ins/RegExp/named-groups/non-unicode-property-names-valid': [FAIL],
|
||||
'built-ins/RegExp/match-indices/indices-array-unicode-property-names': [SKIP],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-difference-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-difference-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-union-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-escape-intersection-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-intersection-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-class-union-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-difference-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-intersection-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-class': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-difference-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-class': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-union-character-property-escape': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-union-character-class': [PASS,FAIL],
|
||||
'built-ins/RegExp/unicodeSets/generated/character-property-escape-intersection-character-property-escape': [PASS,FAIL],
|
||||
|
||||
# Unicode in identifiers.
|
||||
'language/identifiers/part-unicode-*': [FAIL],
|
||||
|
Loading…
Reference in New Issue
Block a user