[regexp] Remove experimental mode modifiers feature

The implementation came in with
https://chromium-review.googlesource.com/758999.

This feature was never enabled by default, is not used anywhere, and
is not on any standardization path.

Bug: v8:10953
Change-Id: Ia2b0a556c1fb504a4cd05bdfa9f0a9c5be608d26
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3053589
Reviewed-by: Mathias Bynens <mathias@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75934}
This commit is contained in:
Jakob Gruber 2021-07-26 14:31:12 +02:00 committed by V8 LUCI CQ
parent 5a352b395b
commit 7e97b2cffb
15 changed files with 164 additions and 739 deletions

View File

@ -1665,7 +1665,6 @@ DEFINE_BOOL(serialization_statistics, false,
"Collect statistics on serialized objects.")
// Regexp
DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code")
DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.")
DEFINE_BOOL(regexp_interpret_all, false, "interpret all regexp code")
#ifdef V8_TARGET_BIG_ENDIAN
#define REGEXP_PEEPHOLE_OPTIMIZATION_BOOL false

View File

@ -64,17 +64,14 @@ class CanBeHandledVisitor final : private RegExpVisitor {
}
void* VisitCharacterClass(RegExpCharacterClass* node, void*) override {
result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}
void* VisitAssertion(RegExpAssertion* node, void*) override {
result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}
void* VisitAtom(RegExpAtom* node, void*) override {
result_ = result_ && AreSuitableFlags(node->flags());
return nullptr;
}

View File

@ -280,8 +280,7 @@ class RegExpAssertion final : public RegExpTree {
NON_BOUNDARY = 5,
LAST_TYPE = NON_BOUNDARY,
};
RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
: assertion_type_(type), flags_(flags) {}
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAssertion* AsAssertion() override;
@ -291,11 +290,9 @@ class RegExpAssertion final : public RegExpTree {
int min_match() override { return 0; }
int max_match() override { return 0; }
AssertionType assertion_type() const { return assertion_type_; }
JSRegExp::Flags flags() const { return flags_; }
private:
const AssertionType assertion_type_;
const JSRegExp::Flags flags_;
};
@ -312,21 +309,17 @@ class RegExpCharacterClass final : public RegExpTree {
using CharacterClassFlags = base::Flags<Flag>;
RegExpCharacterClass(
Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
Zone* zone, ZoneList<CharacterRange>* ranges,
CharacterClassFlags character_class_flags = CharacterClassFlags())
: set_(ranges),
flags_(flags),
character_class_flags_(character_class_flags) {
: set_(ranges), character_class_flags_(character_class_flags) {
// Convert the empty set of ranges to the negated Everything() range.
if (ranges->is_empty()) {
ranges->Add(CharacterRange::Everything(), zone);
character_class_flags_ ^= NEGATED;
}
}
RegExpCharacterClass(base::uc16 type, JSRegExp::Flags flags)
: set_(type),
flags_(flags),
character_class_flags_(CharacterClassFlags()) {}
explicit RegExpCharacterClass(base::uc16 type)
: set_(type), character_class_flags_(CharacterClassFlags()) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpCharacterClass* AsCharacterClass() override;
@ -356,23 +349,19 @@ class RegExpCharacterClass final : public RegExpTree {
base::uc16 standard_type() const { return set_.standard_set_type(); }
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
JSRegExp::Flags flags() const { return flags_; }
bool contains_split_surrogate() const {
return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
}
private:
CharacterSet set_;
const JSRegExp::Flags flags_;
CharacterClassFlags character_class_flags_;
};
class RegExpAtom final : public RegExpTree {
public:
explicit RegExpAtom(base::Vector<const base::uc16> data,
JSRegExp::Flags flags)
: data_(data), flags_(flags) {}
explicit RegExpAtom(base::Vector<const base::uc16> data) : data_(data) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAtom* AsAtom() override;
@ -383,12 +372,9 @@ class RegExpAtom final : public RegExpTree {
void AppendToText(RegExpText* text, Zone* zone) override;
base::Vector<const base::uc16> data() { return data_; }
int length() { return data_.length(); }
JSRegExp::Flags flags() const { return flags_; }
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
private:
base::Vector<const base::uc16> data_;
const JSRegExp::Flags flags_;
};

View File

@ -200,19 +200,17 @@ ZoneList<CharacterRange>* ToCanonicalZoneList(
}
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success, UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* bmp =
ToCanonicalZoneList(splitter->bmp(), compiler->zone());
if (bmp == nullptr) return;
result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
compiler->zone(), bmp, compiler->read_backward(), on_success, flags)));
compiler->zone(), bmp, compiler->read_backward(), on_success)));
}
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* non_bmp =
ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
if (non_bmp == nullptr) return;
@ -237,7 +235,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
on_success, flags)));
on_success)));
} else {
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff]
@ -245,7 +243,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, kTrailSurrogateEnd),
compiler->read_backward(), on_success, flags)));
compiler->read_backward(), on_success)));
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
@ -254,7 +252,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(to_l),
CharacterRange::Range(kTrailSurrogateStart, to_t),
compiler->read_backward(), on_success, flags)));
compiler->read_backward(), on_success)));
to_l--;
}
if (from_l <= to_l) {
@ -263,7 +261,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Range(from_l, to_l),
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
compiler->read_backward(), on_success, flags)));
compiler->read_backward(), on_success)));
}
}
}
@ -271,39 +269,38 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
JSRegExp::Flags flags) {
ZoneList<CharacterRange>* match, RegExpNode* on_success,
bool read_backward) {
Zone* zone = compiler->zone();
RegExpNode* match_node = TextNode::CreateForCharacterRanges(
zone, match, read_backward, on_success, flags);
zone, match, read_backward, on_success);
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, match_node, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
zone, lookbehind, !read_backward, lookaround.on_match_success());
return lookaround.ForMatch(negative_match);
}
RegExpNode* MatchAndNegativeLookaroundInReadDirection(
RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
bool read_backward, JSRegExp::Flags flags) {
bool read_backward) {
Zone* zone = compiler->zone();
int stack_register = compiler->UnicodeLookaroundStackRegister();
int position_register = compiler->UnicodeLookaroundPositionRegister();
RegExpLookaround::Builder lookaround(false, on_success, stack_register,
position_register);
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
zone, lookahead, read_backward, lookaround.on_match_success(), flags);
zone, lookahead, read_backward, lookaround.on_match_success());
return TextNode::CreateForCharacterRanges(
zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
zone, match, read_backward, lookaround.ForMatch(negative_match));
}
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* lead_surrogates =
ToCanonicalZoneList(splitter->lead_surrogates(), compiler->zone());
if (lead_surrogates == nullptr) return;
@ -317,20 +314,19 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Assert that reading forward, there is no trail
// surrogate, and then backward match the lead surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
compiler, trail_surrogates, lead_surrogates, on_success, true);
} else {
// Reading forward. Forward match the lead surrogate and assert that
// no trail surrogate follows.
match = MatchAndNegativeLookaroundInReadDirection(
compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
compiler, lead_surrogates, trail_surrogates, on_success, false);
}
result->AddAlternative(GuardedAlternative(match));
}
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter,
JSRegExp::Flags flags) {
UnicodeRangeSplitter* splitter) {
ZoneList<CharacterRange>* trail_surrogates =
ToCanonicalZoneList(splitter->trail_surrogates(), compiler->zone());
if (trail_surrogates == nullptr) return;
@ -344,12 +340,12 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
// Reading backward. Backward match the trail surrogate and assert that no
// lead surrogate precedes it.
match = MatchAndNegativeLookaroundInReadDirection(
compiler, trail_surrogates, lead_surrogates, on_success, true, flags);
compiler, trail_surrogates, lead_surrogates, on_success, true);
} else {
// Reading forward. Assert that reading backward, there is no lead
// surrogate, and then forward match the trail surrogate.
match = NegativeLookaroundAgainstReadDirectionAndMatch(
compiler, lead_surrogates, trail_surrogates, on_success, false, flags);
compiler, lead_surrogates, trail_surrogates, on_success, false);
}
result->AddAlternative(GuardedAlternative(match));
}
@ -365,9 +361,7 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
// the associated trail surrogate.
ZoneList<CharacterRange>* range = CharacterRange::List(
zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
JSRegExp::Flags default_flags = JSRegExp::Flags();
return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
default_flags);
return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
}
void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
@ -410,10 +404,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
set_.Canonicalize();
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
if (NeedsUnicodeCaseEquivalents(flags_)) {
if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
AddUnicodeCaseEquivalents(ranges, zone);
}
if (IsUnicode(flags_) && !compiler->one_byte() &&
if (IsUnicode(compiler->flags()) && !compiler->one_byte() &&
!contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
@ -422,9 +416,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
ranges = negated;
}
if (ranges->length() == 0) {
JSRegExp::Flags default_flags;
RegExpCharacterClass* fail =
zone->New<RegExpCharacterClass>(zone, ranges, default_flags);
zone->New<RegExpCharacterClass>(zone, ranges);
return zone->New<TextNode>(fail, compiler->read_backward(), on_success);
}
if (standard_type() == '*') {
@ -432,10 +425,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
} else {
ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
UnicodeRangeSplitter splitter(ranges);
AddBmpCharacters(compiler, result, on_success, &splitter, flags_);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter, flags_);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter, flags_);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter, flags_);
AddBmpCharacters(compiler, result, on_success, &splitter);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
@ -510,12 +503,10 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
// i is length or it is the index of an atom.
if (i == length) break;
int first_atom = i;
JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
i++;
while (i < length) {
RegExpTree* alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
if (alternative->AsAtom()->flags() != flags) break;
i++;
}
// Sort atoms to get ones with common prefixes together.
@ -527,7 +518,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
DCHECK_LT(first_atom, alternatives->length());
DCHECK_LE(i, alternatives->length());
DCHECK_LE(first_atom, i);
if (IgnoreCase(flags)) {
if (IgnoreCase(compiler->flags())) {
#ifdef V8_INTL_SUPPORT
alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
i - first_atom);
@ -564,7 +555,6 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
continue;
}
RegExpAtom* const atom = alternative->AsAtom();
JSRegExp::Flags flags = atom->flags();
#ifdef V8_INTL_SUPPORT
icu::UnicodeString common_prefix(atom->data().at(0));
#else
@ -577,18 +567,17 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
alternative = alternatives->at(i);
if (!alternative->IsAtom()) break;
RegExpAtom* const atom = alternative->AsAtom();
if (atom->flags() != flags) break;
#ifdef V8_INTL_SUPPORT
icu::UnicodeString new_prefix(atom->data().at(0));
if (new_prefix != common_prefix) {
if (!IgnoreCase(flags)) break;
if (!IgnoreCase(compiler->flags())) break;
if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
break;
}
#else
unibrow::uchar new_prefix = atom->data().at(0);
if (new_prefix != common_prefix) {
if (!IgnoreCase(flags)) break;
if (!IgnoreCase(compiler->flags())) break;
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
compiler->isolate()->regexp_macro_assembler_canonicalize();
new_prefix = Canonical(canonicalize, new_prefix);
@ -617,8 +606,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
}
}
}
RegExpAtom* prefix = zone->New<RegExpAtom>(
atom->data().SubVector(0, prefix_length), flags);
RegExpAtom* prefix =
zone->New<RegExpAtom>(atom->data().SubVector(0, prefix_length));
ZoneList<RegExpTree*>* pair = zone->New<ZoneList<RegExpTree*>>(2, zone);
pair->Add(prefix, zone);
ZoneList<RegExpTree*>* suffixes =
@ -631,8 +620,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
suffixes->Add(zone->New<RegExpEmpty>(), zone);
} else {
RegExpTree* suffix = zone->New<RegExpAtom>(
old_atom->data().SubVector(prefix_length, old_atom->length()),
flags);
old_atom->data().SubVector(prefix_length, old_atom->length()));
suffixes->Add(suffix, zone);
}
}
@ -670,7 +658,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++;
continue;
}
JSRegExp::Flags flags = atom->flags();
const JSRegExp::Flags flags = compiler->flags();
DCHECK_IMPLIES(IsUnicode(flags),
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
bool contains_trail_surrogate =
@ -684,7 +672,6 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break;
RegExpAtom* const atom = alternative->AsAtom();
if (atom->length() != 1) break;
if (atom->flags() != flags) break;
DCHECK_IMPLIES(IsUnicode(flags),
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
contains_trail_surrogate |=
@ -705,8 +692,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (IsUnicode(flags) && contains_trail_surrogate) {
character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
}
alternatives->at(write_posn++) = zone->New<RegExpCharacterClass>(
zone, ranges, flags, character_class_flags);
alternatives->at(write_posn++) =
zone->New<RegExpCharacterClass>(zone, ranges, character_class_flags);
} else {
// Just copy any trivial alternatives.
for (int j = first_in_run; j < i; j++) {
@ -754,7 +741,7 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpAssertion::AssertionType type,
JSRegExp::Flags flags) {
DCHECK(NeedsUnicodeCaseEquivalents(flags));
CHECK(NeedsUnicodeCaseEquivalents(flags));
Zone* zone = compiler->zone();
ZoneList<CharacterRange>* word_range =
zone->New<ZoneList<CharacterRange>>(2, zone);
@ -772,13 +759,13 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
stack_register, position_register);
RegExpNode* backward = TextNode::CreateForCharacterRanges(
zone, word_range, true, lookbehind.on_match_success(), flags);
zone, word_range, true, lookbehind.on_match_success());
// Look to the right.
RegExpLookaround::Builder lookahead(lookahead_for_word,
lookbehind.ForMatch(backward),
stack_register, position_register);
RegExpNode* forward = TextNode::CreateForCharacterRanges(
zone, word_range, false, lookahead.on_match_success(), flags);
zone, word_range, false, lookahead.on_match_success());
result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
}
return result;
@ -796,14 +783,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
case START_OF_INPUT:
return AssertionNode::AtStart(on_success);
case BOUNDARY:
return NeedsUnicodeCaseEquivalents(flags_)
return NeedsUnicodeCaseEquivalents(compiler->flags())
? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
flags_)
compiler->flags())
: AssertionNode::AtBoundary(on_success);
case NON_BOUNDARY:
return NeedsUnicodeCaseEquivalents(flags_)
? BoundaryAssertionAsLookaround(compiler, on_success,
NON_BOUNDARY, flags_)
return NeedsUnicodeCaseEquivalents(compiler->flags())
? BoundaryAssertionAsLookaround(
compiler, on_success, NON_BOUNDARY, compiler->flags())
: AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
return AssertionNode::AtEnd(on_success);
@ -819,9 +806,7 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
ZoneList<CharacterRange>* newline_ranges =
zone->New<ZoneList<CharacterRange>>(3, zone);
CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
JSRegExp::Flags default_flags = JSRegExp::Flags();
RegExpCharacterClass* newline_atom =
zone->New<RegExpCharacterClass>('n', default_flags);
RegExpCharacterClass* newline_atom = zone->New<RegExpCharacterClass>('n');
TextNode* newline_matcher =
zone->New<TextNode>(newline_atom, false,
ActionNode::PositiveSubmatchSuccess(
@ -975,16 +960,11 @@ class AssertionSequenceRewriter final {
uint32_t seen_assertions = 0;
STATIC_ASSERT(RegExpAssertion::LAST_TYPE < kUInt32Size * kBitsPerByte);
// Flags must match for folding.
JSRegExp::Flags flags = terms_->at(from)->AsAssertion()->flags();
bool saw_mismatched_flags = false;
for (int i = from; i < to; i++) {
RegExpAssertion* t = terms_->at(i)->AsAssertion();
if (t->flags() != flags) saw_mismatched_flags = true;
const uint32_t bit = 1 << t->assertion_type();
if ((seen_assertions & bit) && !saw_mismatched_flags) {
if (seen_assertions & bit) {
// Fold duplicates.
terms_->Set(i, zone_->New<RegExpEmpty>());
}
@ -1006,8 +986,7 @@ class AssertionSequenceRewriter final {
// negated '*' (everything) range serves the purpose.
ZoneList<CharacterRange>* ranges =
zone_->New<ZoneList<CharacterRange>>(0, zone_);
RegExpCharacterClass* cc =
zone_->New<RegExpCharacterClass>(zone_, ranges, JSRegExp::Flags());
RegExpCharacterClass* cc = zone_->New<RegExpCharacterClass>(zone_, ranges);
terms_->Set(from, cc);
// Zero out the rest.

View File

@ -240,12 +240,13 @@ class RecursionCheck {
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
bool one_byte)
JSRegExp::Flags flags, bool one_byte)
: next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
unicode_lookaround_stack_register_(kNoRegister),
unicode_lookaround_position_register_(kNoRegister),
work_list_(nullptr),
recursion_depth_(0),
flags_(flags),
one_byte_(one_byte),
reg_exp_too_big_(false),
limiting_recursion_(false),
@ -1585,7 +1586,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
QuickCheckDetails::Position* pos =
details->positions(characters_filled_in);
base::uc16 c = quarks[i];
if (elm.atom()->ignore_case()) {
if (IgnoreCase(compiler->flags())) {
unibrow::uchar chars[4];
int length = GetCaseIndependentLetters(
isolate, c, compiler->one_byte(), chars, 4);
@ -1815,16 +1816,16 @@ class IterationDecrementer {
LoopChoiceNode* node_;
};
RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
RegExpNode* SeqRegExpNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
VisitMarker marker(info());
return FilterSuccessor(depth - 1);
return FilterSuccessor(depth - 1, flags);
}
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
RegExpNode* next = on_success_->FilterOneByte(depth - 1);
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, JSRegExp::Flags flags) {
RegExpNode* next = on_success_->FilterOneByte(depth - 1, flags);
if (next == nullptr) return set_replacement(nullptr);
on_success_ = next;
return set_replacement(this);
@ -1845,7 +1846,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
return false;
}
RegExpNode* TextNode::FilterOneByte(int depth) {
RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
@ -1857,7 +1858,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) {
base::uc16 c = quarks[j];
if (elm.atom()->ignore_case()) {
if (IgnoreCase(flags)) {
c = unibrow::Latin1::TryConvertToLatin1(c);
}
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
@ -1876,8 +1877,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (range_count != 0 && ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
if (IgnoreCase(cc->flags()) &&
RangesContainLatin1Equivalents(ranges)) {
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue;
}
return set_replacement(nullptr);
@ -1886,8 +1886,7 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter.
if (IgnoreCase(cc->flags()) &&
RangesContainLatin1Equivalents(ranges)) {
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue;
}
return set_replacement(nullptr);
@ -1895,26 +1894,27 @@ RegExpNode* TextNode::FilterOneByte(int depth) {
}
}
}
return FilterSuccessor(depth - 1);
return FilterSuccessor(depth - 1, flags);
}
RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
RegExpNode* LoopChoiceNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
{
VisitMarker marker(info());
RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
RegExpNode* continue_replacement =
continue_node_->FilterOneByte(depth - 1, flags);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == nullptr) return set_replacement(nullptr);
}
return ChoiceNode::FilterOneByte(depth - 1);
return ChoiceNode::FilterOneByte(depth - 1, flags);
}
RegExpNode* ChoiceNode::FilterOneByte(int depth) {
RegExpNode* ChoiceNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@ -1934,7 +1934,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
RegExpNode* survivor = nullptr;
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
RegExpNode* replacement =
alternative.node()->FilterOneByte(depth - 1, flags);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
@ -1954,7 +1955,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
zone()->New<ZoneList<GuardedAlternative>>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
alternatives_->at(i).node()->FilterOneByte(depth - 1);
alternatives_->at(i).node()->FilterOneByte(depth - 1, flags);
if (replacement != nullptr) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
@ -1964,7 +1965,8 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth) {
return this;
}
RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
JSRegExp::Flags flags) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
@ -1972,12 +1974,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = continue_node();
RegExpNode* replacement = node->FilterOneByte(depth - 1);
RegExpNode* replacement = node->FilterOneByte(depth - 1, flags);
if (replacement == nullptr) return set_replacement(nullptr);
alternatives_->at(kContinueIndex).set_node(replacement);
RegExpNode* neg_node = lookaround_node();
RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, flags);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == nullptr) return set_replacement(replacement);
@ -2316,13 +2318,13 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
TextElement elm = elements()->at(i);
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
if (SkipPass(pass, elm.atom()->ignore_case())) continue;
if (SkipPass(pass, IgnoreCase(compiler->flags()))) continue;
base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
base::uc16 quark = quarks[j];
if (elm.atom()->ignore_case()) {
if (IgnoreCase(compiler->flags())) {
// Everywhere else we assume that a non-Latin-1 character cannot match
// a Latin-1 character. Avoid the cases where this is assumption is
// invalid by using the Latin1 equivalent instead.
@ -2391,29 +2393,27 @@ bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
RegExpNode* on_success,
JSRegExp::Flags flags) {
RegExpNode* on_success) {
DCHECK_NOT_NULL(ranges);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(1, zone);
elms->Add(TextElement::CharClass(
zone->New<RegExpCharacterClass>(zone, ranges, flags)),
zone);
elms->Add(
TextElement::CharClass(zone->New<RegExpCharacterClass>(zone, ranges)),
zone);
return zone->New<TextNode>(elms, read_backward, on_success);
}
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success,
JSRegExp::Flags flags) {
RegExpNode* on_success) {
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
elms->Add(TextElement::CharClass(
zone->New<RegExpCharacterClass>(zone, lead_ranges, flags)),
zone->New<RegExpCharacterClass>(zone, lead_ranges)),
zone);
elms->Add(TextElement::CharClass(
zone->New<RegExpCharacterClass>(zone, trail_ranges, flags)),
zone->New<RegExpCharacterClass>(zone, trail_ranges)),
zone);
return zone->New<TextNode>(elms, read_backward, on_success);
}
@ -2487,26 +2487,23 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
bound_checked_up_to_ = std::max(0, bound_checked_up_to_ - by);
}
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
JSRegExp::Flags flags) {
if (!IgnoreCase(flags)) return;
#ifdef V8_INTL_SUPPORT
if (NeedsUnicodeCaseEquivalents(flags)) return;
#endif
int element_count = elements()->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elements()->at(i);
if (elm.text_type() == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.char_class();
#ifdef V8_INTL_SUPPORT
bool case_equivalents_already_added =
NeedsUnicodeCaseEquivalents(cc->flags());
#else
bool case_equivalents_already_added = false;
#endif
if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
// None of the standard character classes is different in the case
// independent case and it slows us down if we don't know that.
if (cc->is_standard(zone())) continue;
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
is_one_byte);
}
// None of the standard character classes is different in the case
// independent case and it slows us down if we don't know that.
if (cc->is_standard(zone())) continue;
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
}
}
}
@ -3634,9 +3631,10 @@ class EatsAtLeastPropagator : public AllStatic {
template <typename... Propagators>
class Analysis : public NodeVisitor {
public:
Analysis(Isolate* isolate, bool is_one_byte)
Analysis(Isolate* isolate, bool is_one_byte, JSRegExp::Flags flags)
: isolate_(isolate),
is_one_byte_(is_one_byte),
flags_(flags),
error_(RegExpError::kNone) {}
void EnsureAnalyzed(RegExpNode* that) {
@ -3677,7 +3675,7 @@ class Analysis : public NodeVisitor {
} while (false)
void VisitText(TextNode* that) override {
that->MakeCaseIndependent(isolate(), is_one_byte_);
that->MakeCaseIndependent(isolate(), is_one_byte_, flags_);
EnsureAnalyzed(that->on_success());
if (has_failed()) return;
that->CalculateOffsets();
@ -3744,16 +3742,17 @@ class Analysis : public NodeVisitor {
private:
Isolate* isolate_;
bool is_one_byte_;
const bool is_one_byte_;
const JSRegExp::Flags flags_;
RegExpError error_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
RegExpNode* node) {
Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
is_one_byte);
JSRegExp::Flags flags, RegExpNode* node) {
Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(
isolate, is_one_byte, flags);
DCHECK_EQ(node->info()->been_analyzed, false);
analysis.EnsureAnalyzed(node);
DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
@ -3807,7 +3806,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
return;
}
base::uc16 character = atom->data()[j];
if (IgnoreCase(atom->flags())) {
if (IgnoreCase(bm->compiler()->flags())) {
unibrow::uchar chars[4];
int length = GetCaseIndependentLetters(
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
@ -3846,7 +3845,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
}
RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
RegExpNode* on_success, JSRegExp::Flags flags) {
RegExpNode* on_success) {
DCHECK(!read_backward());
ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
zone(), CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
@ -3858,11 +3857,11 @@ RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
int stack_register = UnicodeLookaroundStackRegister();
int position_register = UnicodeLookaroundPositionRegister();
RegExpNode* step_back = TextNode::CreateForCharacterRanges(
zone(), lead_surrogates, true, on_success, flags);
zone(), lead_surrogates, true, on_success);
RegExpLookaround::Builder builder(true, step_back, stack_register,
position_register);
RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
zone(), trail_surrogates, false, builder.on_match_success(), flags);
zone(), trail_surrogates, false, builder.on_match_success());
optional_step_back->AddAlternative(
GuardedAlternative(builder.ForMatch(match_trail)));
@ -3881,11 +3880,9 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
if (!data->tree->IsAnchoredAtStart() && !IsSticky(flags)) {
// Add a .*? at the beginning, outside the body capture, unless
// this expression is anchored at the beginning or sticky.
JSRegExp::Flags default_flags = JSRegExp::Flags();
RegExpNode* loop_node = RegExpQuantifier::ToNode(
0, RegExpTree::kInfinity, false,
zone()->New<RegExpCharacterClass>('*', default_flags), this,
captured_body, data->contains_anchor);
0, RegExpTree::kInfinity, false, zone()->New<RegExpCharacterClass>('*'),
this, captured_body, data->contains_anchor);
if (data->contains_anchor) {
// Unroll loop once, to take care of the case that might start
@ -3893,22 +3890,21 @@ RegExpNode* RegExpCompiler::PreprocessRegExp(RegExpCompileData* data,
ChoiceNode* first_step_node = zone()->New<ChoiceNode>(2, zone());
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(zone()->New<TextNode>(
zone()->New<RegExpCharacterClass>('*', default_flags), false,
loop_node)));
zone()->New<RegExpCharacterClass>('*'), false, loop_node)));
node = first_step_node;
} else {
node = loop_node;
}
}
if (is_one_byte) {
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != nullptr) {
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, flags);
}
} else if (IsUnicode(flags) && (IsGlobal(flags) || IsSticky(flags))) {
node = OptionallyStepBackToLeadSurrogate(node, flags);
node = OptionallyStepBackToLeadSurrogate(node);
}
if (node == nullptr) node = zone()->New<EndNode>(EndNode::BACKTRACK, zone());

View File

@ -424,7 +424,8 @@ struct PreloadState {
// Analysis performs assertion propagation and computes eats_at_least_ values.
// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
// details.
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
JSRegExp::Flags flags, RegExpNode* node);
class FrequencyCollator {
public:
@ -474,7 +475,7 @@ class FrequencyCollator {
class RegExpCompiler {
public:
RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
bool is_one_byte);
JSRegExp::Flags flags, bool is_one_byte);
int AllocateRegister() {
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
@ -531,8 +532,7 @@ class RegExpCompiler {
// If the regexp matching starts within a surrogate pair, step back to the
// lead surrogate and start matching from there.
RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpNode* on_success,
JSRegExp::Flags flags);
RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpNode* on_success);
inline void AddWork(RegExpNode* node) {
if (!node->on_work_list() && !node->label()->is_bound()) {
@ -553,6 +553,8 @@ class RegExpCompiler {
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
JSRegExp::Flags flags() const { return flags_; }
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
inline bool one_byte() { return one_byte_; }
@ -583,6 +585,7 @@ class RegExpCompiler {
int unicode_lookaround_position_register_;
ZoneVector<RegExpNode*>* work_list_;
int recursion_depth_;
const JSRegExp::Flags flags_;
RegExpMacroAssembler* macro_assembler_;
bool one_byte_;
bool reg_exp_too_big_;

View File

@ -205,7 +205,9 @@ class RegExpNode : public ZoneObject {
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or nullptr if the node can never match.
virtual RegExpNode* FilterOneByte(int depth) { return this; }
virtual RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) {
return this;
}
// Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated);
@ -294,7 +296,7 @@ class SeqRegExpNode : public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) {}
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
RegExpNode* FilterOneByte(int depth) override;
RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override {
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
@ -302,7 +304,7 @@ class SeqRegExpNode : public RegExpNode {
}
protected:
RegExpNode* FilterSuccessor(int depth);
RegExpNode* FilterSuccessor(int depth, JSRegExp::Flags flags);
private:
RegExpNode* on_success_;
@ -406,15 +408,13 @@ class TextNode : public SeqRegExpNode {
static TextNode* CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
RegExpNode* on_success,
JSRegExp::Flags flags);
RegExpNode* on_success);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success,
JSRegExp::Flags flags);
RegExpNode* on_success);
void Accept(NodeVisitor* visitor) override;
void Emit(RegExpCompiler* compiler, Trace* trace) override;
void GetQuickCheckDetails(QuickCheckDetails* details,
@ -422,14 +422,15 @@ class TextNode : public SeqRegExpNode {
bool not_at_start) override;
ZoneList<TextElement>* elements() { return elms_; }
bool read_backward() { return read_backward_; }
void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
void MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
JSRegExp::Flags flags);
int GreedyLoopTextLength() override;
RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler) override;
void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) override;
void CalculateOffsets();
RegExpNode* FilterOneByte(int depth) override;
RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
int Length();
private:
@ -622,7 +623,7 @@ class ChoiceNode : public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
RegExpNode* FilterOneByte(int depth) override;
RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
virtual bool read_backward() { return false; }
protected:
@ -694,7 +695,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
return !is_first;
}
void Accept(NodeVisitor* visitor) override;
RegExpNode* FilterOneByte(int depth) override;
RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
};
class LoopChoiceNode : public ChoiceNode {
@ -727,7 +728,7 @@ class LoopChoiceNode : public ChoiceNode {
int min_loop_iterations() const { return min_loop_iterations_; }
bool read_backward() override { return read_backward_; }
void Accept(NodeVisitor* visitor) override;
RegExpNode* FilterOneByte(int depth) override;
RegExpNode* FilterOneByte(int depth, JSRegExp::Flags flags) override;
private:
// AddAlternative is made private for loop nodes because alternatives

View File

@ -250,14 +250,10 @@ RegExpTree* RegExpParser::ParseDisjunction() {
return ReportError(RegExpError::kNothingToRepeat);
case '^': {
Advance();
if (builder->multiline()) {
builder->AddAssertion(zone()->New<RegExpAssertion>(
RegExpAssertion::START_OF_LINE, builder->flags()));
} else {
builder->AddAssertion(zone()->New<RegExpAssertion>(
RegExpAssertion::START_OF_INPUT, builder->flags()));
set_contains_anchor();
}
builder->AddAssertion(zone()->New<RegExpAssertion>(
builder->multiline() ? RegExpAssertion::START_OF_LINE
: RegExpAssertion::START_OF_INPUT));
set_contains_anchor();
continue;
}
case '$': {
@ -265,8 +261,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
RegExpAssertion::AssertionType assertion_type =
builder->multiline() ? RegExpAssertion::END_OF_LINE
: RegExpAssertion::END_OF_INPUT;
builder->AddAssertion(
zone()->New<RegExpAssertion>(assertion_type, builder->flags()));
builder->AddAssertion(zone()->New<RegExpAssertion>(assertion_type));
continue;
}
case '.': {
@ -283,7 +278,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
RegExpCharacterClass* cc =
zone()->New<RegExpCharacterClass>(zone(), ranges, builder->flags());
zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@ -305,13 +300,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
return ReportError(RegExpError::kEscapeAtEndOfPattern);
case 'b':
Advance(2);
builder->AddAssertion(zone()->New<RegExpAssertion>(
RegExpAssertion::BOUNDARY, builder->flags()));
builder->AddAssertion(
zone()->New<RegExpAssertion>(RegExpAssertion::BOUNDARY));
continue;
case 'B':
Advance(2);
builder->AddAssertion(zone()->New<RegExpAssertion>(
RegExpAssertion::NON_BOUNDARY, builder->flags()));
builder->AddAssertion(
zone()->New<RegExpAssertion>(RegExpAssertion::NON_BOUNDARY));
continue;
// AtomEscape ::
// CharacterClassEscape
@ -330,8 +325,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
zone()->New<ZoneList<CharacterRange>>(2, zone());
CharacterRange::AddClassEscape(
c, ranges, unicode() && builder->ignore_case(), zone());
RegExpCharacterClass* cc = zone()->New<RegExpCharacterClass>(
zone(), ranges, builder->flags());
RegExpCharacterClass* cc =
zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@ -346,8 +341,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
ZoneVector<char> name_2(zone());
if (ParsePropertyClassName(&name_1, &name_2)) {
if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
RegExpCharacterClass* cc = zone()->New<RegExpCharacterClass>(
zone(), ranges, builder->flags());
RegExpCharacterClass* cc =
zone()->New<RegExpCharacterClass>(zone(), ranges);
builder->AddCharacterClass(cc);
break;
}
@ -605,68 +600,6 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = NEGATIVE_LOOKAROUND;
break;
case '-':
case 'i':
case 's':
case 'm': {
if (!FLAG_regexp_mode_modifiers) {
ReportError(RegExpError::kInvalidGroup);
return nullptr;
}
Advance();
bool flags_sense = true; // Switching on flags.
while (subexpr_type != GROUPING) {
switch (current()) {
case '-':
if (!flags_sense) {
ReportError(RegExpError::kMultipleFlagDashes);
return nullptr;
}
flags_sense = false;
Advance();
continue;
case 's':
case 'i':
case 'm': {
JSRegExp::Flags bit = JSRegExp::kUnicode;
if (current() == 'i') bit = JSRegExp::kIgnoreCase;
if (current() == 'm') bit = JSRegExp::kMultiline;
if (current() == 's') bit = JSRegExp::kDotAll;
if (((switch_on | switch_off) & bit) != 0) {
ReportError(RegExpError::kRepeatedFlag);
return nullptr;
}
if (flags_sense) {
switch_on |= bit;
} else {
switch_off |= bit;
}
Advance();
continue;
}
case ')': {
Advance();
state->builder()
->FlushText(); // Flush pending text using old flags.
// These (?i)-style flag switches don't put us in a subexpression
// at all, they just modify the flags in the rest of the current
// subexpression.
JSRegExp::Flags flags =
(state->builder()->flags() | switch_on) & ~switch_off;
state->builder()->set_flags(flags);
return state;
}
case ':':
Advance();
subexpr_type = GROUPING; // Will break us out of the outer loop.
continue;
default:
ReportError(RegExpError::kInvalidFlagGroup);
return nullptr;
}
}
break;
}
case '<':
Advance();
if (Next() == '=') {
@ -1493,7 +1426,7 @@ RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
prefix_ranges->Add(CharacterRange::Singleton('#'), zone());
prefix_ranges->Add(CharacterRange::Singleton('*'), zone());
builder.AddCharacterClass(
zone()->New<RegExpCharacterClass>(zone(), prefix_ranges, flags));
zone()->New<RegExpCharacterClass>(zone(), prefix_ranges));
builder.AddCharacter(0xFE0F);
builder.AddCharacter(0x20E3);
return builder.ToRegExp();
@ -1506,13 +1439,13 @@ RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER_BASE, "Y", false,
modifier_base_ranges, zone());
builder.AddCharacterClass(
zone()->New<RegExpCharacterClass>(zone(), modifier_base_ranges, flags));
zone()->New<RegExpCharacterClass>(zone(), modifier_base_ranges));
ZoneList<CharacterRange>* modifier_ranges =
zone()->New<ZoneList<CharacterRange>>(2, zone());
LookupPropertyValueName(UCHAR_EMOJI_MODIFIER, "Y", false, modifier_ranges,
zone());
builder.AddCharacterClass(
zone()->New<RegExpCharacterClass>(zone(), modifier_ranges, flags));
zone()->New<RegExpCharacterClass>(zone(), modifier_ranges));
return builder.ToRegExp();
}
@ -1780,7 +1713,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
Advance();
RegExpCharacterClass::CharacterClassFlags character_class_flags;
if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
return zone()->New<RegExpCharacterClass>(zone(), ranges, builder->flags(),
return zone()->New<RegExpCharacterClass>(zone(), ranges,
character_class_flags);
}
@ -1874,7 +1807,7 @@ void RegExpBuilder::AddTrailSurrogate(base::uc16 trail_surrogate) {
surrogate_pair.Add(lead_surrogate, zone());
surrogate_pair.Add(trail_surrogate, zone());
RegExpAtom* atom =
zone()->New<RegExpAtom>(surrogate_pair.ToConstVector(), flags_);
zone()->New<RegExpAtom>(surrogate_pair.ToConstVector());
AddAtom(atom);
}
} else {
@ -1897,8 +1830,7 @@ void RegExpBuilder::FlushCharacters() {
FlushPendingSurrogate();
pending_empty_ = false;
if (characters_ != nullptr) {
RegExpTree* atom =
zone()->New<RegExpAtom>(characters_->ToConstVector(), flags_);
RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector());
characters_ = nullptr;
text_.Add(atom, zone());
LAST(ADD_ATOM);
@ -1972,8 +1904,7 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
void RegExpBuilder::AddCharacterClassForDesugaring(base::uc32 c) {
AddTerm(zone()->New<RegExpCharacterClass>(
zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
flags_));
zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c))));
}
void RegExpBuilder::AddAtom(RegExpTree* term) {
@ -2083,11 +2014,11 @@ bool RegExpBuilder::AddQuantifierToAtom(
if (num_chars > 1) {
base::Vector<const base::uc16> prefix =
char_vector.SubVector(0, num_chars - 1);
text_.Add(zone()->New<RegExpAtom>(prefix, flags_), zone());
text_.Add(zone()->New<RegExpAtom>(prefix), zone());
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
}
characters_ = nullptr;
atom = zone()->New<RegExpAtom>(char_vector, flags_);
atom = zone()->New<RegExpAtom>(char_vector);
FlushText();
} else if (text_.length() > 0) {
DCHECK(last_added_ == ADD_ATOM);

View File

@ -225,7 +225,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
if (!IgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) {
RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
has_been_compiled = true;
}
@ -802,7 +802,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
return false;
}
RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
is_one_byte);
if (compiler.optimize()) {
compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
@ -821,7 +822,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
data->node = compiler.PreprocessRegExp(data, flags, is_one_byte);
data->error = AnalyzeRegExp(isolate, is_one_byte, data->node);
data->error = AnalyzeRegExp(isolate, is_one_byte, flags, data->node);
if (data->error != RegExpError::kNone) {
return false;
}

View File

@ -416,8 +416,6 @@
'es6/unicode-regexp-ignore-case': [FAIL],
'regress/regress-5036': [FAIL],
'es7/regexp-ui-word': [FAIL],
'regexp-modifiers-i18n': [FAIL],
'regexp-modifiers-autogenerated-i18n': [FAIL],
# Desugaring regexp property class relies on ICU. Anything goes as long as we
# don't crash.

View File

@ -1,81 +0,0 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
// These regexps are just grepped out of the other tests we already have
// and the syntax changed from out-of-line i flag to inline i flag.
// These tests won't all run on the noi18n build of V8.
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u212b"));
assertFalse(/(?i)\u00df/u.test("SS"));
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
assertTrue(/(?i)\u00e5/u.test("\u212b"));
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
assertTrue(/(?i)\u00e5/u.test("\u212b"));
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u212b"));
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u00e5"));
assertTrue(/(?i)\u212b/u.test("\u212b"));
assertTrue(/(?i)\u{10400}/u.test("\u{10428}"));
assertTrue(/(?i)\ud801\udc00/u.test("\u{10428}"));
assertTrue(/(?i)[\u{10428}]/u.test("\u{10400}"));
assertTrue(/(?i)[\ud801\udc28]/u.test("\u{10400}"));
assertFalse(/(?i)\u00df/u.test("SS"));
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
assertTrue(/(?i)\u1f8d/u.test("\u1f85"));
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
assertTrue(/(?i)\u00e5\u00e5\u00e5/u.test("\u212b\u00e5\u00c5"));
assertTrue(/(?i)AB\u{10400}/u.test("ab\u{10428}"));
assertTrue(/(?i)\w/u.test('\u017F'));
assertTrue(/(?i)\w/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('\u017F'));
assertFalse(/(?i)\W/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('s'));
assertFalse(/(?i)\W/u.test('S'));
assertFalse(/(?i)\W/u.test('K'));
assertFalse(/(?i)\W/u.test('k'));
assertTrue(/(?i)[\w]/u.test('\u017F'));
assertTrue(/(?i)[\w]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('\u017F'));
assertFalse(/(?i)[\W]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('s'));
assertFalse(/(?i)[\W]/u.test('S'));
assertFalse(/(?i)[\W]/u.test('K'));
assertFalse(/(?i)[\W]/u.test('k'));
assertTrue(/(?i)\b/u.test('\u017F'));
assertTrue(/(?i)\b/u.test('\u212A'));
assertTrue(/(?i)\b/u.test('s'));
assertTrue(/(?i)\b/u.test('S'));
assertFalse(/(?i)\B/u.test('\u017F'));
assertFalse(/(?i)\B/u.test('\u212A'));
assertFalse(/(?i)\B/u.test('s'));
assertFalse(/(?i)\B/u.test('S'));
assertFalse(/(?i)\B/u.test('K'));
assertFalse(/(?i)\B/u.test('k'));
assertTrue(/(?i)\p{Ll}/u.test("a"));
assertTrue(/(?i)\p{Ll}/u.test("\u{118D4}"));
assertTrue(/(?i)\p{Ll}/u.test("A"));
assertTrue(/(?i)\p{Ll}/u.test("\u{118B4}"));
assertTrue(/(?i)\P{Ll}/u.test("a"));
assertTrue(/(?i)\P{Ll}/u.test("\u{118D4}"));
assertTrue(/(?i)\P{Ll}/u.test("A"));
assertTrue(/(?i)\P{Ll}/u.test("\u{118B4}"));
assertTrue(/(?i)\p{Lu}/u.test("a"));
assertTrue(/(?i)\p{Lu}/u.test("\u{118D4}"));
assertTrue(/(?i)\p{Lu}/u.test("A"));
assertTrue(/(?i)\p{Lu}/u.test("\u{118B4}"));
assertTrue(/(?i)\P{Lu}/u.test("a"));
assertTrue(/(?i)\P{Lu}/u.test("\u{118D4}"));
assertTrue(/(?i)\P{Lu}/u.test("A"));
assertTrue(/(?i)\P{Lu}/u.test("\u{118B4}"));

View File

@ -1,74 +0,0 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
// These regexps are just grepped out of the other tests we already have
// and the syntax changed from out-of-line i flag to inline i flag.
assertFalse(/(?i)x(...)\1/.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"));
assertTrue(/(?i)\u03a3((?:))\1\1x/.test("\u03c2x"), "backref-UC16-empty");
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03a3\u03c2\u03c3x"));
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"));
assertFalse(/(?i)\xc1/.test('fooA'), "quickcheck-uc16-pattern-ascii-subject");
assertFalse(/(?i)x(...)\1/.test("xaaaaa"), "backref-ASCII-short");
assertTrue(/(?i)x((?:))\1\1x/.test("xx"), "backref-ASCII-empty");
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcx"), "backref-ASCII-uncaptured");
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcABCx"), "backref-ASCII-backtrack");
assertFalse(/(?i)f/.test('b'));
assertFalse(/(?i)[abc]f/.test('x'));
assertFalse(/(?i)[abc]f/.test('xa'));
assertFalse(/(?i)[abc]</.test('x'));
assertFalse(/(?i)[abc]</.test('xa'));
assertFalse(/(?i)f[abc]/.test('x'));
assertFalse(/(?i)f[abc]/.test('xa'));
assertFalse(/(?i)<[abc]/.test('x'));
assertFalse(/(?i)<[abc]/.test('xa'));
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
assertTrue(/(?i)[@-A]/.test("a"));
assertTrue(/(?i)[@-A]/.test("A"));
assertTrue(/(?i)[@-A]/.test("@"));
assertFalse(/(?i)[¿-À]/.test('¾'));
assertTrue(/(?i)[¿-À]/.test('¿'));
assertTrue(/(?i)[¿-À]/.test('À'));
assertTrue(/(?i)[¿-À]/.test('à'));
assertFalse(/(?i)[¿-À]/.test('á'));
assertFalse(/(?i)[¿-À]/.test('Á'));
assertFalse(/(?i)[¿-À]/.test('Á'));
assertFalse(/(?i)[Ö-×]/.test('Õ'));
assertTrue(/(?i)[Ö-×]/.test('Ö'));
assertTrue(/(?i)[Ö-×]/.test('ö'));
assertTrue(/(?i)[Ö-×]/.test('×'));
assertFalse(/(?i)[Ö-×]/.test('Ø'));
assertTrue(/(?i)(a[\u1000A])+/.test('aa'));
assertTrue(/(?i)\u0178/.test('\u00ff'));
assertTrue(/(?i)\u039c/.test('\u00b5'));
assertTrue(/(?i)\u039c/.test('\u03bc'));
assertTrue(/(?i)\u00b5/.test('\u03bc'));
assertTrue(/(?i)[\u039b-\u039d]/.test('\u00b5'));
assertFalse(/(?i)[^\u039b-\u039d]/.test('\u00b5'));
assertTrue(/(?m)^bar/.test("bar"));
assertTrue(/(?m)^bar/.test("bar\nfoo"));
assertTrue(/(?m)^bar/.test("foo\nbar"));
assertTrue(/(?m)bar$/.test("bar"));
assertTrue(/(?m)bar$/.test("bar\nfoo"));
assertTrue(/(?m)bar$/.test("foo\nbar"));
assertFalse(/(?m)^bxr/.test("bar"));
assertFalse(/(?m)^bxr/.test("bar\nfoo"));
assertFalse(/(?m)^bxr/.test("foo\nbar"));
assertFalse(/(?m)bxr$/.test("bar"));
assertFalse(/(?m)bxr$/.test("bar\nfoo"));
assertFalse(/(?m)bxr$/.test("foo\nbar"));
assertTrue(/(?m)^.*$/.test("\n"));
assertTrue(/(?m)^([()]|.)*$/.test("()\n()"));
assertTrue(/(?m)^([()]|.)*$/.test("()\n"));
assertTrue(/(?m)^[()]*$/.test("()\n."));

View File

@ -1,27 +0,0 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
// S flag switches dotall mode on and off. Combine with i flag changes to test
// the parser.
test(/.(?s).(?i-s).a(?-i)a/);
test(/.(?s:.)(?i:.a)a/);
test(/.(?s).(?i-s).a(?-i)a/u);
test(/.(?s:.)(?i:.a)a/u);
// m flag makes no difference
test(/.(?sm).(?i-s).a(?-i)a/);
test(/.(?s:.)(?i:.a)a/);
test(/.(?sm).(?im-s).a(?m-i)a/u);
test(/.(?s:.)(?i:.a)a/u);
function test(re) {
assertTrue(re.test("...aa"));
assertTrue(re.test(".\n.aa"));
assertTrue(re.test(".\n.Aa"));
assertFalse(re.test("\n\n.Aa"));
assertFalse(re.test(".\n\nAa"));
assertFalse(re.test(".\n.AA"));
}

View File

@ -1,138 +0,0 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
// These tests won't all run on the noi18n build of V8.
aa(/(a)(?i)\1/u);
aa(/([az])(?i)\1/u);
function aa(re) {
assertTrue(re.test("aa"));
assertTrue(re.test("aA"));
assertFalse(re.test("Aa"));
assertFalse(re.test("AA"));
}
aai(/(a)(?-i)\1/iu);
aai(/([az])(?-i)\1/iu);
function aai(re) {
assertTrue(re.test("aa"));
assertFalse(re.test("aA"));
assertFalse(re.test("Aa"));
assertTrue(re.test("AA"));
}
abcd(/a(b(?i)c)d/u);
abcd(/[aw]([bx](?i)[cy])[dz]/u);
function abcd(re) {
assertTrue(re.test("abcd"));
assertFalse(re.test("abcD"));
assertTrue(re.test("abCd"));
assertFalse(re.test("abCD"));
assertFalse(re.test("aBcd"));
assertFalse(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertFalse(re.test("Abcd"));
assertFalse(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertFalse(re.test("ABcd"));
assertFalse(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abcdei(/a(b(?-i)c)d/iu);
abcdei(/[aw]([bx](?-i)[cy])[dz]/iu);
function abcdei(re) {
assertTrue(re.test("abcd"));
assertTrue(re.test("abcD"));
assertFalse(re.test("abCd"));
assertFalse(re.test("abCD"));
assertTrue(re.test("aBcd"));
assertTrue(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertTrue(re.test("Abcd"));
assertTrue(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertTrue(re.test("ABcd"));
assertTrue(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abc(/a(?i:b)c/u);
abc(/[ax](?i:[by])[cz]/u);
function abc(re) {
assertTrue(re.test("abc"));
assertFalse(re.test("abC"));
assertTrue(re.test("aBc"));
assertFalse(re.test("aBC"));
assertFalse(re.test("Abc"));
assertFalse(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
abci(/a(?-i:b)c/iu);
abci(/[ax](?-i:[by])[cz]/iu);
function abci(re) {
assertTrue(re.test("abc"));
assertTrue(re.test("abC"));
assertFalse(re.test("aBc"));
assertFalse(re.test("aBC"));
assertTrue(re.test("Abc"));
assertTrue(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
// The following tests are taken from test/mjsunit/es7/regexp-ui-word.js but
// using inline syntax instead of the global /i flag.
assertTrue(/(?i)\w/u.test('\u017F'));
assertTrue(/(?i)\w/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('\u017F'));
assertFalse(/(?i)\W/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('s'));
assertFalse(/(?i)\W/u.test('S'));
assertFalse(/(?i)\W/u.test('K'));
assertFalse(/(?i)\W/u.test('k'));
assertTrue(/(?i)[\w]/u.test('\u017F'));
assertTrue(/(?i)[\w]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('\u017F'));
assertFalse(/(?i)[\W]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('s'));
assertFalse(/(?i)[\W]/u.test('S'));
assertFalse(/(?i)[\W]/u.test('K'));
assertFalse(/(?i)[\W]/u.test('k'));
assertTrue(/(?i)\b/u.test('\u017F'));
assertFalse(/(?i:)\b/u.test('\u017F'));
assertTrue(/(?i)\b/u.test('\u212A'));
assertFalse(/(?i:)\b/u.test('\u212A'));
assertTrue(/(?i)\b/u.test('s'));
assertTrue(/(?i)\b/u.test('S'));
assertFalse(/(?i)\B/u.test('\u017F'));
assertFalse(/(?i)\B/u.test('\u212A'));
assertFalse(/(?i)\B/u.test('s'));
assertFalse(/(?i)\B/u.test('S'));
assertFalse(/(?i)\B/u.test('K'));
assertFalse(/(?i)\B/u.test('k'));
assertEquals(["abcd\u017F", "\u017F"], /a.*?(.)(?i)\b/u.exec('abcd\u017F cd'));
assertEquals(["abcd\u212A", "\u212A"], /a.*?(.)(?i)\b/u.exec('abcd\u212A cd'));
assertEquals(["a\u017F", "\u017F"], /a.*?(?i:\B)(.)/u.exec('a\u017F '));
assertEquals(["a\u212A", "\u212A"], /a.*?(?i:\B)(.)/u.exec('a\u212A '));

View File

@ -1,146 +0,0 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
aa(/(a)(?i)\1/);
aa(/([az])(?i)\1/);
function aa(re) {
assertTrue(re.test("aa"));
assertTrue(re.test("aA"));
assertFalse(re.test("Aa"));
assertFalse(re.test("AA"));
}
aai(/(a)(?-i)\1/i);
aai(/([az])(?-i)\1/i);
function aai(re) {
assertTrue(re.test("aa"));
assertFalse(re.test("aA"));
assertFalse(re.test("Aa"));
assertTrue(re.test("AA"));
}
abcd(/a(b(?i)c)d/);
abcd(/[aw]([bx](?i)[cy])[dz]/);
function abcd(re) {
assertTrue(re.test("abcd"));
assertFalse(re.test("abcD"));
assertTrue(re.test("abCd"));
assertFalse(re.test("abCD"));
assertFalse(re.test("aBcd"));
assertFalse(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertFalse(re.test("Abcd"));
assertFalse(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertFalse(re.test("ABcd"));
assertFalse(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abcdei(/a(b(?-i)c)d/i);
abcdei(/[aw]([bx](?-i)[cy])[dz]/i);
function abcdei(re) {
assertTrue(re.test("abcd"));
assertTrue(re.test("abcD"));
assertFalse(re.test("abCd"));
assertFalse(re.test("abCD"));
assertTrue(re.test("aBcd"));
assertTrue(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertTrue(re.test("Abcd"));
assertTrue(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertTrue(re.test("ABcd"));
assertTrue(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abc(/a(?i:b)c/);
abc(/[ax](?i:[by])[cz]/);
function abc(re) {
assertTrue(re.test("abc"));
assertFalse(re.test("abC"));
assertTrue(re.test("aBc"));
assertFalse(re.test("aBC"));
assertFalse(re.test("Abc"));
assertFalse(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
abci(/a(?-i:b)c/i);
abci(/[ax](?-i:[by])[cz]/i);
function abci(re) {
assertTrue(re.test("abc"));
assertTrue(re.test("abC"));
assertFalse(re.test("aBc"));
assertFalse(re.test("aBC"));
assertTrue(re.test("Abc"));
assertTrue(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
assertThrows(() => new RegExp("foo(?i:"));
assertThrows(() => new RegExp("foo(?--i)"));
assertThrows(() => new RegExp("foo(?i-i)"));
assertThrows(() => new RegExp("foo(?m:"));
assertThrows(() => new RegExp("foo(?--m)"));
assertThrows(() => new RegExp("foo(?m-m)"));
var re = /^\s(?m)^.$\s(?-m)$/;
assertTrue(re.test("\n.\n"));
assertFalse(re.test(" .\n"));
assertFalse(re.test("\n. "));
assertFalse(re.test(" . "));
assertFalse(re.test("_\n.\n"));
assertFalse(re.test("\n.\n_"));
assertFalse(re.test("_\n.\n_"));
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u017F cd'));
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u212A cd'));
assertEquals(["a\u017F ", " "], /a.*?(?i)\B(.)/.exec('a\u017F '));
assertEquals(["a\u212A ", " "], /a.*?(?i)\B(.)/.exec('a\u212A '));
// Nested flags.
var res = [
/^a(?i:b(?-i:c(?i:d)e)f)g$/,
/^a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g$/,
/^(?-i:a(?i:b(?-i:c(?i:d)e)f)g)$/i,
/^(?-i:a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g)$/i,
];
for (var idx = 0; idx < res.length; idx++) {
var re = res[idx];
for (var i = 0; i < 128; i++) {
var s = (i & 1) ? "A" : "a";
s += (i & 2) ? "B" : "b";
s += (i & 4) ? "C" : "c";
s += (i & 8) ? "D" : "d";
s += (i & 16) ? "E" : "e";
s += (i & 32) ? "F" : "f";
s += (i & 64) ? "G" : "g";
if ((i & (1 | 4 | 16 | 64)) != 0) {
assertFalse(re.test(s), s);
} else {
assertTrue(re.test(s), s);
}
}
}