RegExp: Add the ability to switch flags on and off within the regexp.
This is a reland of https://chromium-review.googlesource.com/c/v8/v8/+/752522 which was itself a reupload of https://chromium-review.googlesource.com/c/v8/v8/+/571746 where reviews took place. R=yangguo@chromium.org Bug: Change-Id: Ia4dbdd6e9a362e272753ff10dc66b7f72d81ee20 Reviewed-on: https://chromium-review.googlesource.com/753596 Reviewed-by: Yang Guo <yangguo@chromium.org> Commit-Queue: Erik Corry <erikcorry@chromium.org> Cr-Commit-Position: refs/heads/master@{#49129}
This commit is contained in:
parent
6ebb5641ff
commit
e83ee94565
@ -959,6 +959,7 @@ DEFINE_BOOL(serialization_statistics, false,
|
||||
|
||||
// Regexp
|
||||
DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code")
|
||||
DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.")
|
||||
|
||||
// Testing flags test/cctest/test-{flags,api,serialization}.cc
|
||||
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
|
||||
|
@ -132,17 +132,17 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
|
||||
|
||||
bool has_been_compiled = false;
|
||||
|
||||
if (parse_result.simple && !(flags & JSRegExp::kIgnoreCase) &&
|
||||
!(flags & JSRegExp::kSticky) &&
|
||||
if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
|
||||
pattern->length() <= kPatternTooShortForBoyerMoore) {
|
||||
// Parse-tree is a single atom that is equal to the pattern.
|
||||
AtomCompile(re, pattern, flags, pattern);
|
||||
has_been_compiled = true;
|
||||
} else if (parse_result.tree->IsAtom() && !(flags & JSRegExp::kIgnoreCase) &&
|
||||
!(flags & JSRegExp::kSticky) && parse_result.capture_count == 0) {
|
||||
} else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
|
||||
parse_result.capture_count == 0) {
|
||||
RegExpAtom* atom = parse_result.tree->AsAtom();
|
||||
Vector<const uc16> atom_pattern = atom->data();
|
||||
if (atom_pattern.length() <= kPatternTooShortForBoyerMoore) {
|
||||
if (!IgnoreCase(atom->flags()) &&
|
||||
atom_pattern.length() <= kPatternTooShortForBoyerMoore) {
|
||||
Handle<String> atom_string;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(
|
||||
isolate, atom_string,
|
||||
@ -622,7 +622,7 @@ RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
|
||||
}
|
||||
}
|
||||
|
||||
DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal);
|
||||
DCHECK(IsGlobal(regexp->GetFlags()));
|
||||
if (!interpreted) {
|
||||
register_array_size_ =
|
||||
Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
|
||||
@ -653,8 +653,7 @@ RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
|
||||
}
|
||||
|
||||
int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
|
||||
if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 &&
|
||||
last_index + 1 < subject_->length() &&
|
||||
if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
|
||||
unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
|
||||
unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
|
||||
// Advance over the surrogate pair.
|
||||
@ -916,7 +915,7 @@ class FrequencyCollator {
|
||||
class RegExpCompiler {
|
||||
public:
|
||||
RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
|
||||
JSRegExp::Flags flags, bool is_one_byte);
|
||||
bool is_one_byte);
|
||||
|
||||
int AllocateRegister() {
|
||||
if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
|
||||
@ -968,13 +967,6 @@ class RegExpCompiler {
|
||||
|
||||
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
|
||||
|
||||
inline bool ignore_case() { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||
inline bool unicode() { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||
// Both unicode and ignore_case flags are set. We need to use ICU to find
|
||||
// the closure over case equivalents.
|
||||
inline bool needs_unicode_case_equivalents() {
|
||||
return unicode() && ignore_case();
|
||||
}
|
||||
inline bool one_byte() { return one_byte_; }
|
||||
inline bool optimize() { return optimize_; }
|
||||
inline void set_optimize(bool value) { optimize_ = value; }
|
||||
@ -1004,7 +996,6 @@ class RegExpCompiler {
|
||||
std::vector<RegExpNode*>* work_list_;
|
||||
int recursion_depth_;
|
||||
RegExpMacroAssembler* macro_assembler_;
|
||||
JSRegExp::Flags flags_;
|
||||
bool one_byte_;
|
||||
bool reg_exp_too_big_;
|
||||
bool limiting_recursion_;
|
||||
@ -1036,13 +1027,12 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
|
||||
// Attempts to compile the regexp using an Irregexp code generator. Returns
|
||||
// a fixed array or a null handle depending on whether it succeeded.
|
||||
RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
|
||||
JSRegExp::Flags flags, bool one_byte)
|
||||
bool one_byte)
|
||||
: next_register_(2 * (capture_count + 1)),
|
||||
unicode_lookaround_stack_register_(kNoRegister),
|
||||
unicode_lookaround_position_register_(kNoRegister),
|
||||
work_list_(nullptr),
|
||||
recursion_depth_(0),
|
||||
flags_(flags),
|
||||
one_byte_(one_byte),
|
||||
reg_exp_too_big_(false),
|
||||
limiting_recursion_(false),
|
||||
@ -2503,7 +2493,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
QuickCheckDetails::Position* pos =
|
||||
details->positions(characters_filled_in);
|
||||
uc16 c = quarks[i];
|
||||
if (compiler->ignore_case()) {
|
||||
if (elm.atom()->ignore_case()) {
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = GetCaseIndependentLetters(isolate, c,
|
||||
compiler->one_byte(), chars);
|
||||
@ -2711,18 +2701,16 @@ class VisitMarker {
|
||||
NodeInfo* info_;
|
||||
};
|
||||
|
||||
|
||||
RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
|
||||
if (info()->replacement_calculated) return replacement();
|
||||
if (depth < 0) return this;
|
||||
DCHECK(!info()->visited);
|
||||
VisitMarker marker(info());
|
||||
return FilterSuccessor(depth - 1, ignore_case);
|
||||
return FilterSuccessor(depth - 1);
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
|
||||
RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
|
||||
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
|
||||
RegExpNode* next = on_success_->FilterOneByte(depth - 1);
|
||||
if (next == nullptr) return set_replacement(nullptr);
|
||||
on_success_ = next;
|
||||
return set_replacement(this);
|
||||
@ -2745,8 +2733,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
RegExpNode* TextNode::FilterOneByte(int depth) {
|
||||
if (info()->replacement_calculated) return replacement();
|
||||
if (depth < 0) return this;
|
||||
DCHECK(!info()->visited);
|
||||
@ -2759,7 +2746,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
for (int j = 0; j < quarks.length(); j++) {
|
||||
uint16_t c = quarks[j];
|
||||
if (c <= String::kMaxOneByteCharCode) continue;
|
||||
if (!ignore_case) return set_replacement(nullptr);
|
||||
if (!IgnoreCase(elm.atom()->flags())) return set_replacement(nullptr);
|
||||
// Here, we need to check for characters whose upper and lower cases
|
||||
// are outside the Latin-1 range.
|
||||
uint16_t converted = unibrow::Latin1::ConvertNonLatin1ToLatin1(c);
|
||||
@ -2781,42 +2768,41 @@ RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
ranges->at(0).from() == 0 &&
|
||||
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
|
||||
// This will be handled in a later filter.
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
|
||||
if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
|
||||
continue;
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
} else {
|
||||
if (range_count == 0 ||
|
||||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
|
||||
// This will be handled in a later filter.
|
||||
if (ignore_case && RangesContainLatin1Equivalents(ranges)) continue;
|
||||
if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
|
||||
continue;
|
||||
return set_replacement(nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return FilterSuccessor(depth - 1, ignore_case);
|
||||
return FilterSuccessor(depth - 1);
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
|
||||
if (info()->replacement_calculated) return replacement();
|
||||
if (depth < 0) return this;
|
||||
if (info()->visited) return this;
|
||||
{
|
||||
VisitMarker marker(info());
|
||||
|
||||
RegExpNode* continue_replacement =
|
||||
continue_node_->FilterOneByte(depth - 1, ignore_case);
|
||||
RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
|
||||
// If we can't continue after the loop then there is no sense in doing the
|
||||
// loop.
|
||||
if (continue_replacement == nullptr) return set_replacement(nullptr);
|
||||
}
|
||||
|
||||
return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
|
||||
return ChoiceNode::FilterOneByte(depth - 1);
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
RegExpNode* ChoiceNode::FilterOneByte(int depth) {
|
||||
if (info()->replacement_calculated) return replacement();
|
||||
if (depth < 0) return this;
|
||||
if (info()->visited) return this;
|
||||
@ -2836,8 +2822,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
RegExpNode* survivor = nullptr;
|
||||
for (int i = 0; i < choice_count; i++) {
|
||||
GuardedAlternative alternative = alternatives_->at(i);
|
||||
RegExpNode* replacement =
|
||||
alternative.node()->FilterOneByte(depth - 1, ignore_case);
|
||||
RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
|
||||
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
|
||||
if (replacement != nullptr) {
|
||||
alternatives_->at(i).set_node(replacement);
|
||||
@ -2857,7 +2842,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
|
||||
for (int i = 0; i < choice_count; i++) {
|
||||
RegExpNode* replacement =
|
||||
alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
|
||||
alternatives_->at(i).node()->FilterOneByte(depth - 1);
|
||||
if (replacement != nullptr) {
|
||||
alternatives_->at(i).set_node(replacement);
|
||||
new_alternatives->Add(alternatives_->at(i), zone());
|
||||
@ -2867,9 +2852,7 @@ RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
|
||||
bool ignore_case) {
|
||||
RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
|
||||
if (info()->replacement_calculated) return replacement();
|
||||
if (depth < 0) return this;
|
||||
if (info()->visited) return this;
|
||||
@ -2877,12 +2860,12 @@ RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth,
|
||||
// Alternative 0 is the negative lookahead, alternative 1 is what comes
|
||||
// afterwards.
|
||||
RegExpNode* node = alternatives_->at(1).node();
|
||||
RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
|
||||
RegExpNode* replacement = node->FilterOneByte(depth - 1);
|
||||
if (replacement == nullptr) return set_replacement(nullptr);
|
||||
alternatives_->at(1).set_node(replacement);
|
||||
|
||||
RegExpNode* neg_node = alternatives_->at(0).node();
|
||||
RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
|
||||
RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
|
||||
// If the negative lookahead is always going to fail then
|
||||
// we don't need to check it.
|
||||
if (neg_replacement == nullptr) return set_replacement(replacement);
|
||||
@ -3199,6 +3182,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
|
||||
TextElement elm = elements()->at(i);
|
||||
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
|
||||
if (elm.text_type() == TextElement::ATOM) {
|
||||
if (SkipPass(pass, elm.atom()->ignore_case())) continue;
|
||||
Vector<const uc16> quarks = elm.atom()->data();
|
||||
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
|
||||
if (first_element_checked && i == 0 && j == 0) continue;
|
||||
@ -3254,9 +3238,7 @@ int TextNode::Length() {
|
||||
return elm.cp_offset() + elm.length();
|
||||
}
|
||||
|
||||
|
||||
bool TextNode::SkipPass(int int_pass, bool ignore_case) {
|
||||
TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
|
||||
bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
|
||||
if (ignore_case) {
|
||||
return pass == SIMPLE_CHARACTER_MATCH;
|
||||
} else {
|
||||
@ -3264,31 +3246,32 @@ bool TextNode::SkipPass(int int_pass, bool ignore_case) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success) {
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags) {
|
||||
DCHECK_NOT_NULL(ranges);
|
||||
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
|
||||
elms->Add(TextElement::CharClass(new (zone) RegExpCharacterClass(ranges)),
|
||||
elms->Add(
|
||||
TextElement::CharClass(new (zone) RegExpCharacterClass(ranges, flags)),
|
||||
zone);
|
||||
return new (zone) TextNode(elms, read_backward, on_success);
|
||||
}
|
||||
|
||||
|
||||
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
||||
CharacterRange trail,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success) {
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags) {
|
||||
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
|
||||
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
|
||||
ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
|
||||
elms->Add(
|
||||
TextElement::CharClass(new (zone) RegExpCharacterClass(lead_ranges)),
|
||||
elms->Add(TextElement::CharClass(
|
||||
new (zone) RegExpCharacterClass(lead_ranges, flags)),
|
||||
zone);
|
||||
elms->Add(
|
||||
TextElement::CharClass(new (zone) RegExpCharacterClass(trail_ranges)),
|
||||
elms->Add(TextElement::CharClass(
|
||||
new (zone) RegExpCharacterClass(trail_ranges, flags)),
|
||||
zone);
|
||||
return new (zone) TextNode(elms, read_backward, on_success);
|
||||
}
|
||||
@ -3323,27 +3306,15 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
// check that now.
|
||||
if (trace->characters_preloaded() == 1) {
|
||||
for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
|
||||
if (!SkipPass(pass, compiler->ignore_case())) {
|
||||
TextEmitPass(compiler,
|
||||
static_cast<TextEmitPassType>(pass),
|
||||
true,
|
||||
trace,
|
||||
false,
|
||||
&bound_checked_to);
|
||||
}
|
||||
TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
|
||||
false, &bound_checked_to);
|
||||
}
|
||||
first_elt_done = true;
|
||||
}
|
||||
|
||||
for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
|
||||
if (!SkipPass(pass, compiler->ignore_case())) {
|
||||
TextEmitPass(compiler,
|
||||
static_cast<TextEmitPassType>(pass),
|
||||
false,
|
||||
trace,
|
||||
first_elt_done,
|
||||
&bound_checked_to);
|
||||
}
|
||||
TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
|
||||
first_elt_done, &bound_checked_to);
|
||||
}
|
||||
|
||||
Trace successor_trace(*trace);
|
||||
@ -3386,11 +3357,15 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
|
||||
TextElement elm = elements()->at(i);
|
||||
if (elm.text_type() == TextElement::CHAR_CLASS) {
|
||||
RegExpCharacterClass* cc = elm.char_class();
|
||||
if (IgnoreCase(cc->flags()) &&
|
||||
!NeedsUnicodeCaseEquivalents(cc->flags())) {
|
||||
// None of the standard character classes is different in the case
|
||||
// independent case and it slows us down if we don't know that.
|
||||
if (cc->is_standard(zone())) continue;
|
||||
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
|
||||
CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
|
||||
CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
|
||||
is_one_byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4353,9 +4328,9 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
RecursionCheck rc(compiler);
|
||||
|
||||
DCHECK_EQ(start_reg_ + 1, end_reg_);
|
||||
if (compiler->ignore_case()) {
|
||||
if (IgnoreCase(flags_)) {
|
||||
assembler->CheckNotBackReferenceIgnoreCase(
|
||||
start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
|
||||
start_reg_, read_backward(), IsUnicode(flags_), trace->backtrack());
|
||||
} else {
|
||||
assembler->CheckNotBackReference(start_reg_, read_backward(),
|
||||
trace->backtrack());
|
||||
@ -4364,7 +4339,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
|
||||
|
||||
// Check that the back reference does not end inside a surrogate pair.
|
||||
if (compiler->unicode() && !compiler->one_byte()) {
|
||||
if (IsUnicode(flags_) && !compiler->one_byte()) {
|
||||
assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
|
||||
}
|
||||
on_success()->Emit(compiler, trace);
|
||||
@ -4887,24 +4862,24 @@ void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
|
||||
(*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
|
||||
}
|
||||
|
||||
|
||||
void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
|
||||
ZoneList<CharacterRange>* bmp = splitter->bmp();
|
||||
if (bmp == nullptr) return;
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
|
||||
compiler->zone(), bmp, compiler->read_backward(), on_success)));
|
||||
compiler->zone(), bmp, compiler->read_backward(), on_success,
|
||||
default_flags)));
|
||||
}
|
||||
|
||||
|
||||
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
RegExpNode* on_success,
|
||||
UnicodeRangeSplitter* splitter) {
|
||||
ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
|
||||
if (non_bmp == nullptr) return;
|
||||
DCHECK(compiler->unicode());
|
||||
DCHECK(!compiler->one_byte());
|
||||
Zone* zone = compiler->zone();
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
CharacterRange::Canonicalize(non_bmp);
|
||||
for (int i = 0; i < non_bmp->length(); i++) {
|
||||
// Match surrogate pair.
|
||||
@ -4924,7 +4899,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
GuardedAlternative(TextNode::CreateForSurrogatePair(
|
||||
zone, CharacterRange::Singleton(from_l),
|
||||
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
|
||||
on_success)));
|
||||
on_success, default_flags)));
|
||||
} else {
|
||||
if (from_t != kTrailSurrogateStart) {
|
||||
// Add [from_l][from_t-\udfff]
|
||||
@ -4932,7 +4907,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
GuardedAlternative(TextNode::CreateForSurrogatePair(
|
||||
zone, CharacterRange::Singleton(from_l),
|
||||
CharacterRange::Range(from_t, kTrailSurrogateEnd),
|
||||
compiler->read_backward(), on_success)));
|
||||
compiler->read_backward(), on_success, default_flags)));
|
||||
from_l++;
|
||||
}
|
||||
if (to_t != kTrailSurrogateEnd) {
|
||||
@ -4941,7 +4916,7 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
GuardedAlternative(TextNode::CreateForSurrogatePair(
|
||||
zone, CharacterRange::Singleton(to_l),
|
||||
CharacterRange::Range(kTrailSurrogateStart, to_t),
|
||||
compiler->read_backward(), on_success)));
|
||||
compiler->read_backward(), on_success, default_flags)));
|
||||
to_l--;
|
||||
}
|
||||
if (from_l <= to_l) {
|
||||
@ -4950,49 +4925,47 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
GuardedAlternative(TextNode::CreateForSurrogatePair(
|
||||
zone, CharacterRange::Range(from_l, to_l),
|
||||
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
|
||||
compiler->read_backward(), on_success)));
|
||||
compiler->read_backward(), on_success, default_flags)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
|
||||
RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
|
||||
ZoneList<CharacterRange>* match, RegExpNode* on_success,
|
||||
bool read_backward) {
|
||||
ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
|
||||
JSRegExp::Flags flags) {
|
||||
Zone* zone = compiler->zone();
|
||||
RegExpNode* match_node = TextNode::CreateForCharacterRanges(
|
||||
zone, match, read_backward, on_success);
|
||||
zone, match, read_backward, on_success, flags);
|
||||
int stack_register = compiler->UnicodeLookaroundStackRegister();
|
||||
int position_register = compiler->UnicodeLookaroundPositionRegister();
|
||||
RegExpLookaround::Builder lookaround(false, match_node, stack_register,
|
||||
position_register);
|
||||
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
|
||||
zone, lookbehind, !read_backward, lookaround.on_match_success());
|
||||
zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
|
||||
return lookaround.ForMatch(negative_match);
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* MatchAndNegativeLookaroundInReadDirection(
|
||||
RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
|
||||
ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
|
||||
bool read_backward) {
|
||||
bool read_backward, JSRegExp::Flags flags) {
|
||||
Zone* zone = compiler->zone();
|
||||
int stack_register = compiler->UnicodeLookaroundStackRegister();
|
||||
int position_register = compiler->UnicodeLookaroundPositionRegister();
|
||||
RegExpLookaround::Builder lookaround(false, on_success, stack_register,
|
||||
position_register);
|
||||
RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
|
||||
zone, lookahead, read_backward, lookaround.on_match_success());
|
||||
zone, lookahead, read_backward, lookaround.on_match_success(), flags);
|
||||
return TextNode::CreateForCharacterRanges(
|
||||
zone, match, read_backward, lookaround.ForMatch(negative_match));
|
||||
zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
|
||||
}
|
||||
|
||||
|
||||
void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
RegExpNode* on_success,
|
||||
UnicodeRangeSplitter* splitter) {
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
|
||||
if (lead_surrogates == nullptr) return;
|
||||
Zone* zone = compiler->zone();
|
||||
@ -5005,20 +4978,22 @@ void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
// Reading backward. Assert that reading forward, there is no trail
|
||||
// surrogate, and then backward match the lead surrogate.
|
||||
match = NegativeLookaroundAgainstReadDirectionAndMatch(
|
||||
compiler, trail_surrogates, lead_surrogates, on_success, true);
|
||||
compiler, trail_surrogates, lead_surrogates, on_success, true,
|
||||
default_flags);
|
||||
} else {
|
||||
// Reading forward. Forward match the lead surrogate and assert that
|
||||
// no trail surrogate follows.
|
||||
match = MatchAndNegativeLookaroundInReadDirection(
|
||||
compiler, lead_surrogates, trail_surrogates, on_success, false);
|
||||
compiler, lead_surrogates, trail_surrogates, on_success, false,
|
||||
default_flags);
|
||||
}
|
||||
result->AddAlternative(GuardedAlternative(match));
|
||||
}
|
||||
|
||||
|
||||
void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
RegExpNode* on_success,
|
||||
UnicodeRangeSplitter* splitter) {
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
|
||||
if (trail_surrogates == nullptr) return;
|
||||
Zone* zone = compiler->zone();
|
||||
@ -5031,12 +5006,14 @@ void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
|
||||
// Reading backward. Backward match the trail surrogate and assert that no
|
||||
// lead surrogate precedes it.
|
||||
match = MatchAndNegativeLookaroundInReadDirection(
|
||||
compiler, trail_surrogates, lead_surrogates, on_success, true);
|
||||
compiler, trail_surrogates, lead_surrogates, on_success, true,
|
||||
default_flags);
|
||||
} else {
|
||||
// Reading forward. Assert that reading backward, there is no lead
|
||||
// surrogate, and then forward match the trail surrogate.
|
||||
match = NegativeLookaroundAgainstReadDirectionAndMatch(
|
||||
compiler, lead_surrogates, trail_surrogates, on_success, false);
|
||||
compiler, lead_surrogates, trail_surrogates, on_success, false,
|
||||
default_flags);
|
||||
}
|
||||
result->AddAlternative(GuardedAlternative(match));
|
||||
}
|
||||
@ -5052,7 +5029,9 @@ RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
|
||||
// the associated trail surrogate.
|
||||
ZoneList<CharacterRange>* range = CharacterRange::List(
|
||||
zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
|
||||
return TextNode::CreateForCharacterRanges(zone, range, false, on_success);
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
|
||||
default_flags);
|
||||
}
|
||||
|
||||
void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
|
||||
@ -5093,10 +5072,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
set_.Canonicalize();
|
||||
Zone* zone = compiler->zone();
|
||||
ZoneList<CharacterRange>* ranges = this->ranges(zone);
|
||||
if (compiler->needs_unicode_case_equivalents()) {
|
||||
if (NeedsUnicodeCaseEquivalents(flags_)) {
|
||||
AddUnicodeCaseEquivalents(ranges, zone);
|
||||
}
|
||||
if (compiler->unicode() && !compiler->one_byte() &&
|
||||
if (IsUnicode(flags_) && !compiler->one_byte() &&
|
||||
!contains_split_surrogate()) {
|
||||
if (is_negated()) {
|
||||
ZoneList<CharacterRange>* negated =
|
||||
@ -5105,9 +5084,10 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
ranges = negated;
|
||||
}
|
||||
if (ranges->length() == 0) {
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
ranges->Add(CharacterRange::Everything(), zone);
|
||||
RegExpCharacterClass* fail =
|
||||
new (zone) RegExpCharacterClass(ranges, NEGATED);
|
||||
new (zone) RegExpCharacterClass(ranges, default_flags, NEGATED);
|
||||
return new (zone) TextNode(fail, compiler->read_backward(), on_success);
|
||||
}
|
||||
if (standard_type() == '*') {
|
||||
@ -5182,10 +5162,12 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
// i is length or it is the index of an atom.
|
||||
if (i == length) break;
|
||||
int first_atom = i;
|
||||
JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
|
||||
i++;
|
||||
while (i < length) {
|
||||
RegExpTree* alternative = alternatives->at(i);
|
||||
if (!alternative->IsAtom()) break;
|
||||
if (alternative->AsAtom()->flags() != flags) break;
|
||||
i++;
|
||||
}
|
||||
// Sort atoms to get ones with common prefixes together.
|
||||
@ -5197,7 +5179,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
DCHECK_LT(first_atom, alternatives->length());
|
||||
DCHECK_LE(i, alternatives->length());
|
||||
DCHECK_LE(first_atom, i);
|
||||
if (compiler->ignore_case()) {
|
||||
if (IgnoreCase(flags)) {
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||
compiler->isolate()->regexp_macro_assembler_canonicalize();
|
||||
auto compare_closure =
|
||||
@ -5229,7 +5211,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
RegExpAtom* atom = alternative->AsAtom();
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
JSRegExp::Flags flags = atom->flags();
|
||||
unibrow::uchar common_prefix = atom->data().at(0);
|
||||
int first_with_prefix = i;
|
||||
int prefix_length = atom->length();
|
||||
@ -5237,10 +5220,11 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
while (i < length) {
|
||||
alternative = alternatives->at(i);
|
||||
if (!alternative->IsAtom()) break;
|
||||
atom = alternative->AsAtom();
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
if (atom->flags() != flags) break;
|
||||
unibrow::uchar new_prefix = atom->data().at(0);
|
||||
if (new_prefix != common_prefix) {
|
||||
if (!compiler->ignore_case()) break;
|
||||
if (!IgnoreCase(flags)) break;
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||
compiler->isolate()->regexp_macro_assembler_canonicalize();
|
||||
new_prefix = Canonical(canonicalize, new_prefix);
|
||||
@ -5257,7 +5241,7 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
// common prefix if the terms were similar or presorted in the input.
|
||||
// Find out how long the common prefix is.
|
||||
int run_length = i - first_with_prefix;
|
||||
atom = alternatives->at(first_with_prefix)->AsAtom();
|
||||
RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
|
||||
for (int j = 1; j < run_length && prefix_length > 1; j++) {
|
||||
RegExpAtom* old_atom =
|
||||
alternatives->at(j + first_with_prefix)->AsAtom();
|
||||
@ -5268,8 +5252,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
}
|
||||
}
|
||||
}
|
||||
RegExpAtom* prefix =
|
||||
new (zone) RegExpAtom(atom->data().SubVector(0, prefix_length));
|
||||
RegExpAtom* prefix = new (zone)
|
||||
RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
|
||||
ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
|
||||
pair->Add(prefix, zone);
|
||||
ZoneList<RegExpTree*>* suffixes =
|
||||
@ -5282,7 +5266,8 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
suffixes->Add(new (zone) RegExpEmpty(), zone);
|
||||
} else {
|
||||
RegExpTree* suffix = new (zone) RegExpAtom(
|
||||
old_atom->data().SubVector(prefix_length, old_atom->length()));
|
||||
old_atom->data().SubVector(prefix_length, old_atom->length()),
|
||||
flags);
|
||||
suffixes->Add(suffix, zone);
|
||||
}
|
||||
}
|
||||
@ -5305,7 +5290,6 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
Zone* zone = compiler->zone();
|
||||
ZoneList<RegExpTree*>* alternatives = this->alternatives();
|
||||
int length = alternatives->length();
|
||||
const bool unicode = compiler->unicode();
|
||||
|
||||
int write_posn = 0;
|
||||
int i = 0;
|
||||
@ -5316,24 +5300,28 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
RegExpAtom* atom = alternative->AsAtom();
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
if (atom->length() != 1) {
|
||||
alternatives->at(write_posn++) = alternatives->at(i);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
DCHECK_IMPLIES(unicode,
|
||||
JSRegExp::Flags flags = atom->flags();
|
||||
DCHECK_IMPLIES(IsUnicode(flags),
|
||||
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
bool contains_trail_surrogate =
|
||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||
int first_in_run = i;
|
||||
i++;
|
||||
// Find a run of single-character atom alternatives that have identical
|
||||
// flags (case independence and unicode-ness).
|
||||
while (i < length) {
|
||||
alternative = alternatives->at(i);
|
||||
if (!alternative->IsAtom()) break;
|
||||
atom = alternative->AsAtom();
|
||||
RegExpAtom* const atom = alternative->AsAtom();
|
||||
if (atom->length() != 1) break;
|
||||
DCHECK_IMPLIES(unicode,
|
||||
if (atom->flags() != flags) break;
|
||||
DCHECK_IMPLIES(IsUnicode(flags),
|
||||
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
contains_trail_surrogate |=
|
||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||
@ -5349,12 +5337,12 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
DCHECK_EQ(old_atom->length(), 1);
|
||||
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
|
||||
}
|
||||
RegExpCharacterClass::Flags flags;
|
||||
if (unicode && contains_trail_surrogate) {
|
||||
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
||||
RegExpCharacterClass::CharacterClassFlags character_class_flags;
|
||||
if (IsUnicode(flags) && contains_trail_surrogate) {
|
||||
character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
||||
}
|
||||
alternatives->at(write_posn++) =
|
||||
new (zone) RegExpCharacterClass(ranges, flags);
|
||||
new (zone) RegExpCharacterClass(ranges, flags, character_class_flags);
|
||||
} else {
|
||||
// Just copy any trivial alternatives.
|
||||
for (int j = first_in_run; j < i; j++) {
|
||||
@ -5586,8 +5574,9 @@ namespace {
|
||||
// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
|
||||
RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success,
|
||||
RegExpAssertion::AssertionType type) {
|
||||
DCHECK(compiler->needs_unicode_case_equivalents());
|
||||
RegExpAssertion::AssertionType type,
|
||||
JSRegExp::Flags flags) {
|
||||
DCHECK(NeedsUnicodeCaseEquivalents(flags));
|
||||
Zone* zone = compiler->zone();
|
||||
ZoneList<CharacterRange>* word_range =
|
||||
new (zone) ZoneList<CharacterRange>(2, zone);
|
||||
@ -5605,13 +5594,13 @@ RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
|
||||
RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
|
||||
stack_register, position_register);
|
||||
RegExpNode* backward = TextNode::CreateForCharacterRanges(
|
||||
zone, word_range, true, lookbehind.on_match_success());
|
||||
zone, word_range, true, lookbehind.on_match_success(), flags);
|
||||
// Look to the right.
|
||||
RegExpLookaround::Builder lookahead(lookahead_for_word,
|
||||
lookbehind.ForMatch(backward),
|
||||
stack_register, position_register);
|
||||
RegExpNode* forward = TextNode::CreateForCharacterRanges(
|
||||
zone, word_range, false, lookahead.on_match_success());
|
||||
zone, word_range, false, lookahead.on_match_success(), flags);
|
||||
result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
|
||||
}
|
||||
return result;
|
||||
@ -5629,13 +5618,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
||||
case START_OF_INPUT:
|
||||
return AssertionNode::AtStart(on_success);
|
||||
case BOUNDARY:
|
||||
return compiler->needs_unicode_case_equivalents()
|
||||
? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY)
|
||||
return NeedsUnicodeCaseEquivalents(flags_)
|
||||
? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
|
||||
flags_)
|
||||
: AssertionNode::AtBoundary(on_success);
|
||||
case NON_BOUNDARY:
|
||||
return compiler->needs_unicode_case_equivalents()
|
||||
return NeedsUnicodeCaseEquivalents(flags_)
|
||||
? BoundaryAssertionAsLookaround(compiler, on_success,
|
||||
NON_BOUNDARY)
|
||||
NON_BOUNDARY, flags_)
|
||||
: AssertionNode::AtNonBoundary(on_success);
|
||||
case END_OF_INPUT:
|
||||
return AssertionNode::AtEnd(on_success);
|
||||
@ -5651,7 +5641,9 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
||||
ZoneList<CharacterRange>* newline_ranges =
|
||||
new(zone) ZoneList<CharacterRange>(3, zone);
|
||||
CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
|
||||
RegExpCharacterClass* newline_atom = new (zone) RegExpCharacterClass('n');
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
RegExpCharacterClass* newline_atom =
|
||||
new (zone) RegExpCharacterClass('n', default_flags);
|
||||
TextNode* newline_matcher = new (zone) TextNode(
|
||||
newline_atom, false, ActionNode::PositiveSubmatchSuccess(
|
||||
stack_pointer_register, position_register,
|
||||
@ -5681,7 +5673,7 @@ RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
return new (compiler->zone())
|
||||
BackReferenceNode(RegExpCapture::StartRegister(index()),
|
||||
RegExpCapture::EndRegister(index()),
|
||||
RegExpCapture::EndRegister(index()), flags_,
|
||||
compiler->read_backward(), on_success);
|
||||
}
|
||||
|
||||
@ -6337,9 +6329,7 @@ void TextNode::CalculateOffsets() {
|
||||
|
||||
|
||||
void Analysis::VisitText(TextNode* that) {
|
||||
if (ignore_case()) {
|
||||
that->MakeCaseIndependent(isolate(), is_one_byte_);
|
||||
}
|
||||
EnsureAnalyzed(that->on_success());
|
||||
if (!has_failed()) {
|
||||
that->CalculateOffsets();
|
||||
@ -6450,7 +6440,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
|
||||
return;
|
||||
}
|
||||
uc16 character = atom->data()[j];
|
||||
if (bm->compiler()->ignore_case()) {
|
||||
if (IgnoreCase(atom->flags())) {
|
||||
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
|
||||
int length = GetCaseIndependentLetters(
|
||||
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
|
||||
@ -6602,9 +6592,9 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
|
||||
target->Accept(this);
|
||||
}
|
||||
|
||||
|
||||
RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags) {
|
||||
// If the regexp matching starts within a surrogate pair, step back
|
||||
// to the lead surrogate and start matching from there.
|
||||
DCHECK(!compiler->read_backward());
|
||||
@ -6619,11 +6609,11 @@ RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
|
||||
int stack_register = compiler->UnicodeLookaroundStackRegister();
|
||||
int position_register = compiler->UnicodeLookaroundPositionRegister();
|
||||
RegExpNode* step_back = TextNode::CreateForCharacterRanges(
|
||||
zone, lead_surrogates, true, on_success);
|
||||
zone, lead_surrogates, true, on_success, flags);
|
||||
RegExpLookaround::Builder builder(true, step_back, stack_register,
|
||||
position_register);
|
||||
RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
|
||||
zone, trail_surrogates, false, builder.on_match_success());
|
||||
zone, trail_surrogates, false, builder.on_match_success(), flags);
|
||||
|
||||
optional_step_back->AddAlternative(
|
||||
GuardedAlternative(builder.ForMatch(match_trail)));
|
||||
@ -6640,12 +6630,10 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
|
||||
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
|
||||
return IrregexpRegExpTooBig(isolate);
|
||||
}
|
||||
bool ignore_case = flags & JSRegExp::kIgnoreCase;
|
||||
bool is_sticky = flags & JSRegExp::kSticky;
|
||||
bool is_global = flags & JSRegExp::kGlobal;
|
||||
bool is_unicode = flags & JSRegExp::kUnicode;
|
||||
RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
|
||||
is_one_byte);
|
||||
bool is_sticky = IsSticky(flags);
|
||||
bool is_global = IsGlobal(flags);
|
||||
bool is_unicode = IsUnicode(flags);
|
||||
RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
|
||||
|
||||
if (compiler.optimize()) compiler.set_optimize(!TooMuchRegExpCode(pattern));
|
||||
|
||||
@ -6673,9 +6661,11 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
|
||||
if (!is_start_anchored && !is_sticky) {
|
||||
// Add a .*? at the beginning, outside the body capture, unless
|
||||
// this expression is anchored at the beginning or sticky.
|
||||
JSRegExp::Flags default_flags = JSRegExp::Flags();
|
||||
RegExpNode* loop_node = RegExpQuantifier::ToNode(
|
||||
0, RegExpTree::kInfinity, false, new (zone) RegExpCharacterClass('*'),
|
||||
&compiler, captured_body, data->contains_anchor);
|
||||
0, RegExpTree::kInfinity, false,
|
||||
new (zone) RegExpCharacterClass('*', default_flags), &compiler,
|
||||
captured_body, data->contains_anchor);
|
||||
|
||||
if (data->contains_anchor) {
|
||||
// Unroll loop once, to take care of the case that might start
|
||||
@ -6683,26 +6673,27 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
|
||||
ChoiceNode* first_step_node = new(zone) ChoiceNode(2, zone);
|
||||
first_step_node->AddAlternative(GuardedAlternative(captured_body));
|
||||
first_step_node->AddAlternative(GuardedAlternative(new (zone) TextNode(
|
||||
new (zone) RegExpCharacterClass('*'), false, loop_node)));
|
||||
new (zone) RegExpCharacterClass('*', default_flags), false,
|
||||
loop_node)));
|
||||
node = first_step_node;
|
||||
} else {
|
||||
node = loop_node;
|
||||
}
|
||||
}
|
||||
if (is_one_byte) {
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
|
||||
// Do it again to propagate the new nodes to places where they were not
|
||||
// put because they had not been calculated yet.
|
||||
if (node != nullptr) {
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
|
||||
node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
|
||||
}
|
||||
} else if (compiler.unicode() && (is_global || is_sticky)) {
|
||||
node = OptionallyStepBackToLeadSurrogate(&compiler, node);
|
||||
} else if (is_unicode && (is_global || is_sticky)) {
|
||||
node = OptionallyStepBackToLeadSurrogate(&compiler, node, flags);
|
||||
}
|
||||
|
||||
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
|
||||
data->node = node;
|
||||
Analysis analysis(isolate, flags, is_one_byte);
|
||||
Analysis analysis(isolate, is_one_byte);
|
||||
analysis.EnsureAnalyzed(node);
|
||||
if (analysis.has_failed()) {
|
||||
const char* error_message = analysis.error_message();
|
||||
|
@ -21,6 +21,36 @@ class RegExpNode;
|
||||
class RegExpTree;
|
||||
class BoyerMooreLookahead;
|
||||
|
||||
inline bool IgnoreCase(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kIgnoreCase) != 0;
|
||||
}
|
||||
|
||||
inline bool IsUnicode(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kUnicode) != 0;
|
||||
}
|
||||
|
||||
inline bool IsSticky(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kSticky) != 0;
|
||||
}
|
||||
|
||||
inline bool IsGlobal(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kGlobal) != 0;
|
||||
}
|
||||
|
||||
inline bool DotAll(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kDotAll) != 0;
|
||||
}
|
||||
|
||||
inline bool Multiline(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kMultiline) != 0;
|
||||
}
|
||||
|
||||
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
|
||||
// Both unicode and ignore_case flags are set. We need to use ICU to find
|
||||
// the closure over case equivalents.
|
||||
return IsUnicode(flags) && IgnoreCase(flags);
|
||||
}
|
||||
|
||||
class RegExpImpl {
|
||||
public:
|
||||
// Whether V8 is compiled with native regexp support or not.
|
||||
@ -495,9 +525,7 @@ class RegExpNode: public ZoneObject {
|
||||
// If we know that the input is one-byte then there are some nodes that can
|
||||
// never match. This method returns a node that can be substituted for
|
||||
// itself, or nullptr if the node can never match.
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
|
||||
return this;
|
||||
}
|
||||
virtual RegExpNode* FilterOneByte(int depth) { return this; }
|
||||
// Helper for FilterOneByte.
|
||||
RegExpNode* replacement() {
|
||||
DCHECK(info()->replacement_calculated);
|
||||
@ -569,7 +597,7 @@ class SeqRegExpNode: public RegExpNode {
|
||||
: RegExpNode(on_success->zone()), on_success_(on_success) { }
|
||||
RegExpNode* on_success() { return on_success_; }
|
||||
void set_on_success(RegExpNode* node) { on_success_ = node; }
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterOneByte(int depth);
|
||||
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start) {
|
||||
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
|
||||
@ -577,7 +605,7 @@ class SeqRegExpNode: public RegExpNode {
|
||||
}
|
||||
|
||||
protected:
|
||||
RegExpNode* FilterSuccessor(int depth, bool ignore_case);
|
||||
RegExpNode* FilterSuccessor(int depth);
|
||||
|
||||
private:
|
||||
RegExpNode* on_success_;
|
||||
@ -682,13 +710,15 @@ class TextNode: public SeqRegExpNode {
|
||||
static TextNode* CreateForCharacterRanges(Zone* zone,
|
||||
ZoneList<CharacterRange>* ranges,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success);
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags);
|
||||
// Create TextNode for a surrogate pair with a range given for the
|
||||
// lead and the trail surrogate each.
|
||||
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
||||
CharacterRange trail,
|
||||
bool read_backward,
|
||||
RegExpNode* on_success);
|
||||
RegExpNode* on_success,
|
||||
JSRegExp::Flags flags);
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
|
||||
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
|
||||
@ -705,7 +735,7 @@ class TextNode: public SeqRegExpNode {
|
||||
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
|
||||
BoyerMooreLookahead* bm, bool not_at_start);
|
||||
void CalculateOffsets();
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterOneByte(int depth);
|
||||
|
||||
private:
|
||||
enum TextEmitPassType {
|
||||
@ -715,7 +745,7 @@ class TextNode: public SeqRegExpNode {
|
||||
CASE_CHARACTER_MATCH, // Case-independent single character check.
|
||||
CHARACTER_CLASS_MATCH // Character class.
|
||||
};
|
||||
static bool SkipPass(int pass, bool ignore_case);
|
||||
static bool SkipPass(TextEmitPassType pass, bool ignore_case);
|
||||
static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
|
||||
static const int kLastPass = CHARACTER_CLASS_MATCH;
|
||||
void TextEmitPass(RegExpCompiler* compiler,
|
||||
@ -779,11 +809,12 @@ class AssertionNode: public SeqRegExpNode {
|
||||
|
||||
class BackReferenceNode: public SeqRegExpNode {
|
||||
public:
|
||||
BackReferenceNode(int start_reg, int end_reg, bool read_backward,
|
||||
RegExpNode* on_success)
|
||||
BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
|
||||
bool read_backward, RegExpNode* on_success)
|
||||
: SeqRegExpNode(on_success),
|
||||
start_reg_(start_reg),
|
||||
end_reg_(end_reg),
|
||||
flags_(flags),
|
||||
read_backward_(read_backward) {}
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
int start_register() { return start_reg_; }
|
||||
@ -805,6 +836,7 @@ class BackReferenceNode: public SeqRegExpNode {
|
||||
private:
|
||||
int start_reg_;
|
||||
int end_reg_;
|
||||
JSRegExp::Flags flags_;
|
||||
bool read_backward_;
|
||||
};
|
||||
|
||||
@ -929,7 +961,7 @@ class ChoiceNode: public RegExpNode {
|
||||
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
|
||||
return true;
|
||||
}
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterOneByte(int depth);
|
||||
virtual bool read_backward() { return false; }
|
||||
|
||||
protected:
|
||||
@ -1001,7 +1033,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
|
||||
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
|
||||
return !is_first;
|
||||
}
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterOneByte(int depth);
|
||||
};
|
||||
|
||||
|
||||
@ -1028,7 +1060,7 @@ class LoopChoiceNode: public ChoiceNode {
|
||||
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
|
||||
virtual bool read_backward() { return read_backward_; }
|
||||
virtual void Accept(NodeVisitor* visitor);
|
||||
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
|
||||
virtual RegExpNode* FilterOneByte(int depth);
|
||||
|
||||
private:
|
||||
// AddAlternative is made private for loop nodes because alternatives
|
||||
@ -1435,11 +1467,8 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
// +-------+ +------------+
|
||||
class Analysis: public NodeVisitor {
|
||||
public:
|
||||
Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
|
||||
: isolate_(isolate),
|
||||
flags_(flags),
|
||||
is_one_byte_(is_one_byte),
|
||||
error_message_(nullptr) {}
|
||||
Analysis(Isolate* isolate, bool is_one_byte)
|
||||
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
|
||||
void EnsureAnalyzed(RegExpNode* node);
|
||||
|
||||
#define DECLARE_VISIT(Type) \
|
||||
@ -1459,12 +1488,8 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
|
||||
|
||||
Isolate* isolate() const { return isolate_; }
|
||||
|
||||
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||
|
||||
private:
|
||||
Isolate* isolate_;
|
||||
JSRegExp::Flags flags_;
|
||||
bool is_one_byte_;
|
||||
const char* error_message_;
|
||||
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define V8_REGEXP_REGEXP_AST_H_
|
||||
|
||||
#include "src/objects.h"
|
||||
#include "src/objects/js-regexp.h"
|
||||
#include "src/objects/string.h"
|
||||
#include "src/utils.h"
|
||||
#include "src/zone/zone-containers.h"
|
||||
@ -144,7 +145,7 @@ class CharacterSet final BASE_EMBEDDED {
|
||||
explicit CharacterSet(ZoneList<CharacterRange>* ranges)
|
||||
: ranges_(ranges), standard_set_type_(0) {}
|
||||
ZoneList<CharacterRange>* ranges(Zone* zone);
|
||||
uc16 standard_set_type() { return standard_set_type_; }
|
||||
uc16 standard_set_type() const { return standard_set_type_; }
|
||||
void set_standard_set_type(uc16 special_set_type) {
|
||||
standard_set_type_ = special_set_type;
|
||||
}
|
||||
@ -274,7 +275,8 @@ class RegExpAssertion final : public RegExpTree {
|
||||
BOUNDARY,
|
||||
NON_BOUNDARY
|
||||
};
|
||||
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
|
||||
RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
|
||||
: assertion_type_(type), flags_(flags) {}
|
||||
void* Accept(RegExpVisitor* visitor, void* data) override;
|
||||
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
||||
RegExpAssertion* AsAssertion() override;
|
||||
@ -286,7 +288,8 @@ class RegExpAssertion final : public RegExpTree {
|
||||
AssertionType assertion_type() { return assertion_type_; }
|
||||
|
||||
private:
|
||||
AssertionType assertion_type_;
|
||||
const AssertionType assertion_type_;
|
||||
const JSRegExp::Flags flags_;
|
||||
};
|
||||
|
||||
|
||||
@ -300,12 +303,18 @@ class RegExpCharacterClass final : public RegExpTree {
|
||||
NEGATED = 1 << 0,
|
||||
CONTAINS_SPLIT_SURROGATE = 1 << 1,
|
||||
};
|
||||
typedef base::Flags<Flag> Flags;
|
||||
typedef base::Flags<Flag> CharacterClassFlags;
|
||||
|
||||
explicit RegExpCharacterClass(ZoneList<CharacterRange>* ranges,
|
||||
Flags flags = Flags())
|
||||
: set_(ranges), flags_(flags) {}
|
||||
explicit RegExpCharacterClass(uc16 type) : set_(type), flags_(0) {}
|
||||
RegExpCharacterClass(
|
||||
ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
|
||||
CharacterClassFlags character_class_flags = CharacterClassFlags())
|
||||
: set_(ranges),
|
||||
flags_(flags),
|
||||
character_class_flags_(character_class_flags) {}
|
||||
RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
|
||||
: set_(type),
|
||||
flags_(flags),
|
||||
character_class_flags_(CharacterClassFlags()) {}
|
||||
void* Accept(RegExpVisitor* visitor, void* data) override;
|
||||
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
||||
RegExpCharacterClass* AsCharacterClass() override;
|
||||
@ -332,22 +341,25 @@ class RegExpCharacterClass final : public RegExpTree {
|
||||
// D : non-ASCII digit
|
||||
// . : non-newline
|
||||
// * : All characters, for advancing unanchored regexp
|
||||
uc16 standard_type() { return set_.standard_set_type(); }
|
||||
uc16 standard_type() const { return set_.standard_set_type(); }
|
||||
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
|
||||
bool is_negated() const { return (flags_ & NEGATED) != 0; }
|
||||
bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
|
||||
JSRegExp::Flags flags() const { return flags_; }
|
||||
bool contains_split_surrogate() const {
|
||||
return (flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
|
||||
return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
CharacterSet set_;
|
||||
const Flags flags_;
|
||||
const JSRegExp::Flags flags_;
|
||||
const CharacterClassFlags character_class_flags_;
|
||||
};
|
||||
|
||||
|
||||
class RegExpAtom final : public RegExpTree {
|
||||
public:
|
||||
explicit RegExpAtom(Vector<const uc16> data) : data_(data) {}
|
||||
explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
|
||||
: data_(data), flags_(flags) {}
|
||||
void* Accept(RegExpVisitor* visitor, void* data) override;
|
||||
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
||||
RegExpAtom* AsAtom() override;
|
||||
@ -358,9 +370,12 @@ class RegExpAtom final : public RegExpTree {
|
||||
void AppendToText(RegExpText* text, Zone* zone) override;
|
||||
Vector<const uc16> data() { return data_; }
|
||||
int length() { return data_.length(); }
|
||||
JSRegExp::Flags flags() const { return flags_; }
|
||||
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||
|
||||
private:
|
||||
Vector<const uc16> data_;
|
||||
const JSRegExp::Flags flags_;
|
||||
};
|
||||
|
||||
|
||||
@ -532,9 +547,10 @@ class RegExpLookaround final : public RegExpTree {
|
||||
|
||||
class RegExpBackReference final : public RegExpTree {
|
||||
public:
|
||||
RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
|
||||
explicit RegExpBackReference(RegExpCapture* capture)
|
||||
: capture_(capture), name_(nullptr) {}
|
||||
explicit RegExpBackReference(JSRegExp::Flags flags)
|
||||
: capture_(nullptr), name_(nullptr), flags_(flags) {}
|
||||
RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
|
||||
: capture_(capture), name_(nullptr), flags_(flags) {}
|
||||
void* Accept(RegExpVisitor* visitor, void* data) override;
|
||||
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
|
||||
RegExpBackReference* AsBackReference() override;
|
||||
@ -552,6 +568,7 @@ class RegExpBackReference final : public RegExpTree {
|
||||
private:
|
||||
RegExpCapture* capture_;
|
||||
const ZoneVector<uc16>* name_;
|
||||
const JSRegExp::Flags flags_;
|
||||
};
|
||||
|
||||
|
||||
|
@ -31,10 +31,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
|
||||
named_back_references_(nullptr),
|
||||
in_(in),
|
||||
current_(kEndMarker),
|
||||
dotall_(flags & JSRegExp::kDotAll),
|
||||
ignore_case_(flags & JSRegExp::kIgnoreCase),
|
||||
multiline_(flags & JSRegExp::kMultiline),
|
||||
unicode_(flags & JSRegExp::kUnicode),
|
||||
top_level_flags_(flags),
|
||||
next_pos_(0),
|
||||
captures_started_(0),
|
||||
capture_count_(0),
|
||||
@ -44,7 +41,6 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
|
||||
is_scanned_for_captures_(false),
|
||||
has_named_captures_(false),
|
||||
failed_(false) {
|
||||
DCHECK_IMPLIES(dotall(), FLAG_harmony_regexp_dotall);
|
||||
Advance();
|
||||
}
|
||||
|
||||
@ -183,7 +179,7 @@ RegExpTree* RegExpParser::ParsePattern() {
|
||||
RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
// Used to store current state while parsing subexpressions.
|
||||
RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
|
||||
0, nullptr, ignore_case(), unicode(), zone());
|
||||
0, nullptr, top_level_flags_, zone());
|
||||
RegExpParserState* state = &initial_state;
|
||||
// Cache the builder in a local variable for quick access.
|
||||
RegExpBuilder* builder = initial_state.builder();
|
||||
@ -253,12 +249,12 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
return ReportError(CStrVector("Nothing to repeat"));
|
||||
case '^': {
|
||||
Advance();
|
||||
if (multiline()) {
|
||||
builder->AddAssertion(
|
||||
new (zone()) RegExpAssertion(RegExpAssertion::START_OF_LINE));
|
||||
if (builder->multiline()) {
|
||||
builder->AddAssertion(new (zone()) RegExpAssertion(
|
||||
RegExpAssertion::START_OF_LINE, builder->flags()));
|
||||
} else {
|
||||
builder->AddAssertion(
|
||||
new (zone()) RegExpAssertion(RegExpAssertion::START_OF_INPUT));
|
||||
builder->AddAssertion(new (zone()) RegExpAssertion(
|
||||
RegExpAssertion::START_OF_INPUT, builder->flags()));
|
||||
set_contains_anchor();
|
||||
}
|
||||
continue;
|
||||
@ -266,9 +262,10 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
case '$': {
|
||||
Advance();
|
||||
RegExpAssertion::AssertionType assertion_type =
|
||||
multiline() ? RegExpAssertion::END_OF_LINE
|
||||
builder->multiline() ? RegExpAssertion::END_OF_LINE
|
||||
: RegExpAssertion::END_OF_INPUT;
|
||||
builder->AddAssertion(new (zone()) RegExpAssertion(assertion_type));
|
||||
builder->AddAssertion(
|
||||
new (zone()) RegExpAssertion(assertion_type, builder->flags()));
|
||||
continue;
|
||||
}
|
||||
case '.': {
|
||||
@ -276,7 +273,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
new (zone()) ZoneList<CharacterRange>(2, zone());
|
||||
|
||||
if (dotall()) {
|
||||
if (builder->dotall()) {
|
||||
// Everything.
|
||||
DCHECK(FLAG_harmony_regexp_dotall);
|
||||
CharacterRange::AddClassEscape('*', ranges, false, zone());
|
||||
@ -285,78 +282,18 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
CharacterRange::AddClassEscape('.', ranges, false, zone());
|
||||
}
|
||||
|
||||
RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(ranges);
|
||||
RegExpCharacterClass* cc =
|
||||
new (zone()) RegExpCharacterClass(ranges, builder->flags());
|
||||
builder->AddCharacterClass(cc);
|
||||
break;
|
||||
}
|
||||
case '(': {
|
||||
SubexpressionType subexpr_type = CAPTURE;
|
||||
RegExpLookaround::Type lookaround_type = state->lookaround_type();
|
||||
bool is_named_capture = false;
|
||||
Advance();
|
||||
if (current() == '?') {
|
||||
switch (Next()) {
|
||||
case ':':
|
||||
subexpr_type = GROUPING;
|
||||
Advance(2);
|
||||
break;
|
||||
case '=':
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
Advance(2);
|
||||
break;
|
||||
case '!':
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
Advance(2);
|
||||
break;
|
||||
case '<':
|
||||
Advance();
|
||||
if (FLAG_harmony_regexp_lookbehind) {
|
||||
if (Next() == '=') {
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
Advance(2);
|
||||
break;
|
||||
} else if (Next() == '!') {
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
Advance(2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (FLAG_harmony_regexp_named_captures) {
|
||||
has_named_captures_ = true;
|
||||
is_named_capture = true;
|
||||
Advance();
|
||||
break;
|
||||
}
|
||||
// Fall through.
|
||||
default:
|
||||
return ReportError(CStrVector("Invalid group"));
|
||||
}
|
||||
}
|
||||
|
||||
const ZoneVector<uc16>* capture_name = nullptr;
|
||||
if (subexpr_type == CAPTURE) {
|
||||
if (captures_started_ >= kMaxCaptures) {
|
||||
return ReportError(CStrVector("Too many captures"));
|
||||
}
|
||||
captures_started_++;
|
||||
|
||||
if (is_named_capture) {
|
||||
capture_name = ParseCaptureGroupName(CHECK_FAILED);
|
||||
}
|
||||
}
|
||||
// Store current state and begin new disjunction parsing.
|
||||
state = new (zone()) RegExpParserState(
|
||||
state, subexpr_type, lookaround_type, captures_started_,
|
||||
capture_name, ignore_case(), unicode(), zone());
|
||||
state = ParseOpenParenthesis(state CHECK_FAILED);
|
||||
builder = state->builder();
|
||||
continue;
|
||||
}
|
||||
case '[': {
|
||||
RegExpTree* cc = ParseCharacterClass(CHECK_FAILED);
|
||||
RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED);
|
||||
builder->AddCharacterClass(cc->AsCharacterClass());
|
||||
break;
|
||||
}
|
||||
@ -368,13 +305,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
return ReportError(CStrVector("\\ at end of pattern"));
|
||||
case 'b':
|
||||
Advance(2);
|
||||
builder->AddAssertion(
|
||||
new (zone()) RegExpAssertion(RegExpAssertion::BOUNDARY));
|
||||
builder->AddAssertion(new (zone()) RegExpAssertion(
|
||||
RegExpAssertion::BOUNDARY, builder->flags()));
|
||||
continue;
|
||||
case 'B':
|
||||
Advance(2);
|
||||
builder->AddAssertion(
|
||||
new (zone()) RegExpAssertion(RegExpAssertion::NON_BOUNDARY));
|
||||
builder->AddAssertion(new (zone()) RegExpAssertion(
|
||||
RegExpAssertion::NON_BOUNDARY, builder->flags()));
|
||||
continue;
|
||||
// AtomEscape ::
|
||||
// CharacterClassEscape
|
||||
@ -391,10 +328,10 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
Advance(2);
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
new (zone()) ZoneList<CharacterRange>(2, zone());
|
||||
CharacterRange::AddClassEscape(c, ranges,
|
||||
unicode() && ignore_case(), zone());
|
||||
CharacterRange::AddClassEscape(
|
||||
c, ranges, unicode() && builder->ignore_case(), zone());
|
||||
RegExpCharacterClass* cc =
|
||||
new (zone()) RegExpCharacterClass(ranges);
|
||||
new (zone()) RegExpCharacterClass(ranges, builder->flags());
|
||||
builder->AddCharacterClass(cc);
|
||||
break;
|
||||
}
|
||||
@ -410,7 +347,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
return ReportError(CStrVector("Invalid property name"));
|
||||
}
|
||||
RegExpCharacterClass* cc =
|
||||
new (zone()) RegExpCharacterClass(ranges);
|
||||
new (zone()) RegExpCharacterClass(ranges, builder->flags());
|
||||
builder->AddCharacterClass(cc);
|
||||
} else {
|
||||
// With /u, no identity escapes except for syntax characters
|
||||
@ -443,7 +380,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpCapture* capture = GetCapture(index);
|
||||
RegExpTree* atom = new (zone()) RegExpBackReference(capture);
|
||||
RegExpTree* atom =
|
||||
new (zone()) RegExpBackReference(capture, builder->flags());
|
||||
builder->AddAtom(atom);
|
||||
}
|
||||
break;
|
||||
@ -638,6 +576,143 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
}
|
||||
}
|
||||
|
||||
RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
|
||||
RegExpParserState* state) {
|
||||
RegExpLookaround::Type lookaround_type = state->lookaround_type();
|
||||
bool is_named_capture = false;
|
||||
JSRegExp::Flags switch_on = JSRegExp::kNone;
|
||||
JSRegExp::Flags switch_off = JSRegExp::kNone;
|
||||
const ZoneVector<uc16>* capture_name = nullptr;
|
||||
SubexpressionType subexpr_type = CAPTURE;
|
||||
Advance();
|
||||
if (current() == '?') {
|
||||
switch (Next()) {
|
||||
case ':':
|
||||
Advance(2);
|
||||
subexpr_type = GROUPING;
|
||||
break;
|
||||
case '=':
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
case '!':
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKAHEAD;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
case '-':
|
||||
case 'i':
|
||||
case 's':
|
||||
case 'm': {
|
||||
if (!FLAG_regexp_mode_modifiers ||
|
||||
(Next() == 's' && !FLAG_harmony_regexp_dotall)) {
|
||||
ReportError(CStrVector("Invalid group"));
|
||||
return nullptr;
|
||||
}
|
||||
Advance();
|
||||
bool flags_sense = true; // Switching on flags.
|
||||
while (subexpr_type != GROUPING) {
|
||||
switch (current()) {
|
||||
case '-':
|
||||
if (!flags_sense) {
|
||||
ReportError(CStrVector("Multiple dashes in flag group"));
|
||||
return nullptr;
|
||||
}
|
||||
flags_sense = false;
|
||||
Advance();
|
||||
continue;
|
||||
case 's':
|
||||
if (!FLAG_harmony_regexp_dotall) {
|
||||
ReportError(CStrVector("Invalid group"));
|
||||
return nullptr;
|
||||
}
|
||||
// Fall through.
|
||||
case 'i':
|
||||
case 'm': {
|
||||
JSRegExp::Flags bit = JSRegExp::kUnicode;
|
||||
if (current() == 'i') bit = JSRegExp::kIgnoreCase;
|
||||
if (current() == 'm') bit = JSRegExp::kMultiline;
|
||||
if (current() == 's') bit = JSRegExp::kDotAll;
|
||||
if (((switch_on | switch_off) & bit) != 0) {
|
||||
ReportError(CStrVector("Repeated flag in flag group"));
|
||||
return nullptr;
|
||||
}
|
||||
if (flags_sense) {
|
||||
switch_on |= bit;
|
||||
} else {
|
||||
switch_off |= bit;
|
||||
}
|
||||
Advance();
|
||||
continue;
|
||||
}
|
||||
case ')': {
|
||||
Advance();
|
||||
state->builder()
|
||||
->FlushText(); // Flush pending text using old flags.
|
||||
// These (?i)-style flag switches don't put us in a subexpression
|
||||
// at all, they just modify the flags in the rest of the current
|
||||
// subexpression.
|
||||
JSRegExp::Flags flags =
|
||||
(state->builder()->flags() | switch_on) & ~switch_off;
|
||||
state->builder()->set_flags(flags);
|
||||
return state;
|
||||
}
|
||||
case ':':
|
||||
Advance();
|
||||
subexpr_type = GROUPING; // Will break us out of the outer loop.
|
||||
continue;
|
||||
default:
|
||||
ReportError(CStrVector("Invalid flag group"));
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '<':
|
||||
Advance();
|
||||
if (FLAG_harmony_regexp_lookbehind) {
|
||||
if (Next() == '=') {
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = POSITIVE_LOOKAROUND;
|
||||
break;
|
||||
} else if (Next() == '!') {
|
||||
Advance(2);
|
||||
lookaround_type = RegExpLookaround::LOOKBEHIND;
|
||||
subexpr_type = NEGATIVE_LOOKAROUND;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (FLAG_harmony_regexp_named_captures) {
|
||||
is_named_capture = true;
|
||||
has_named_captures_ = true;
|
||||
Advance();
|
||||
break;
|
||||
}
|
||||
// Fall through.
|
||||
default:
|
||||
ReportError(CStrVector("Invalid group"));
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
if (subexpr_type == CAPTURE) {
|
||||
if (captures_started_ >= kMaxCaptures) {
|
||||
ReportError(CStrVector("Too many captures"));
|
||||
return nullptr;
|
||||
}
|
||||
captures_started_++;
|
||||
|
||||
if (is_named_capture) {
|
||||
capture_name = ParseCaptureGroupName(CHECK_FAILED);
|
||||
}
|
||||
}
|
||||
JSRegExp::Flags flags = (state->builder()->flags() | switch_on) & ~switch_off;
|
||||
// Store current state and begin new disjunction parsing.
|
||||
return new (zone())
|
||||
RegExpParserState(state, subexpr_type, lookaround_type, captures_started_,
|
||||
capture_name, flags, zone());
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
// Currently only used in an DCHECK.
|
||||
@ -855,7 +930,8 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
|
||||
if (state->IsInsideCaptureGroup(name)) {
|
||||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpBackReference* atom = new (zone()) RegExpBackReference();
|
||||
RegExpBackReference* atom =
|
||||
new (zone()) RegExpBackReference(builder->flags());
|
||||
atom->set_name(name);
|
||||
|
||||
builder->AddAtom(atom);
|
||||
@ -1525,7 +1601,7 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
|
||||
}
|
||||
}
|
||||
|
||||
RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
|
||||
static const char* kUnterminated = "Unterminated character class";
|
||||
static const char* kRangeInvalid = "Invalid character class";
|
||||
static const char* kRangeOutOfOrder = "Range out of order in character class";
|
||||
@ -1539,7 +1615,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
}
|
||||
ZoneList<CharacterRange>* ranges =
|
||||
new (zone()) ZoneList<CharacterRange>(2, zone());
|
||||
bool add_unicode_case_equivalents = unicode() && ignore_case();
|
||||
bool add_unicode_case_equivalents = unicode() && builder->ignore_case();
|
||||
while (has_more() && current() != ']') {
|
||||
uc32 char_1, char_2;
|
||||
bool is_class_1, is_class_2;
|
||||
@ -1586,9 +1662,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
ranges->Add(CharacterRange::Everything(), zone());
|
||||
is_negated = !is_negated;
|
||||
}
|
||||
RegExpCharacterClass::Flags flags;
|
||||
if (is_negated) flags = RegExpCharacterClass::NEGATED;
|
||||
return new (zone()) RegExpCharacterClass(ranges, flags);
|
||||
RegExpCharacterClass::CharacterClassFlags character_class_flags;
|
||||
if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
|
||||
return new (zone())
|
||||
RegExpCharacterClass(ranges, builder->flags(), character_class_flags);
|
||||
}
|
||||
|
||||
|
||||
@ -1622,11 +1699,10 @@ bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
|
||||
return !parser.failed();
|
||||
}
|
||||
|
||||
RegExpBuilder::RegExpBuilder(Zone* zone, bool ignore_case, bool unicode)
|
||||
RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
|
||||
: zone_(zone),
|
||||
pending_empty_(false),
|
||||
ignore_case_(ignore_case),
|
||||
unicode_(unicode),
|
||||
flags_(flags),
|
||||
characters_(nullptr),
|
||||
pending_surrogate_(kNoPendingSurrogate),
|
||||
terms_(),
|
||||
@ -1662,7 +1738,7 @@ void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
|
||||
surrogate_pair.Add(lead_surrogate, zone());
|
||||
surrogate_pair.Add(trail_surrogate, zone());
|
||||
RegExpAtom* atom =
|
||||
new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
|
||||
new (zone()) RegExpAtom(surrogate_pair.ToConstVector(), flags_);
|
||||
AddAtom(atom);
|
||||
}
|
||||
} else {
|
||||
@ -1686,7 +1762,8 @@ void RegExpBuilder::FlushCharacters() {
|
||||
FlushPendingSurrogate();
|
||||
pending_empty_ = false;
|
||||
if (characters_ != nullptr) {
|
||||
RegExpTree* atom = new (zone()) RegExpAtom(characters_->ToConstVector());
|
||||
RegExpTree* atom =
|
||||
new (zone()) RegExpAtom(characters_->ToConstVector(), flags_);
|
||||
characters_ = nullptr;
|
||||
text_.Add(atom, zone());
|
||||
LAST(ADD_ATOM);
|
||||
@ -1762,7 +1839,7 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
|
||||
|
||||
void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
|
||||
AddTerm(new (zone()) RegExpCharacterClass(
|
||||
CharacterRange::List(zone(), CharacterRange::Singleton(c))));
|
||||
CharacterRange::List(zone(), CharacterRange::Singleton(c)), flags_));
|
||||
}
|
||||
|
||||
|
||||
@ -1880,11 +1957,11 @@ bool RegExpBuilder::AddQuantifierToAtom(
|
||||
int num_chars = char_vector.length();
|
||||
if (num_chars > 1) {
|
||||
Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
|
||||
text_.Add(new (zone()) RegExpAtom(prefix), zone());
|
||||
text_.Add(new (zone()) RegExpAtom(prefix, flags_), zone());
|
||||
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
|
||||
}
|
||||
characters_ = nullptr;
|
||||
atom = new (zone()) RegExpAtom(char_vector);
|
||||
atom = new (zone()) RegExpAtom(char_vector, flags_);
|
||||
FlushText();
|
||||
} else if (text_.length() > 0) {
|
||||
DCHECK(last_added_ == ADD_ATOM);
|
||||
|
@ -99,7 +99,7 @@ class BufferedZoneList {
|
||||
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
|
||||
class RegExpBuilder : public ZoneObject {
|
||||
public:
|
||||
RegExpBuilder(Zone* zone, bool ignore_case, bool unicode);
|
||||
RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
|
||||
void AddCharacter(uc16 character);
|
||||
void AddUnicodeCharacter(uc32 character);
|
||||
void AddEscapedUnicodeCharacter(uc32 character);
|
||||
@ -114,7 +114,14 @@ class RegExpBuilder : public ZoneObject {
|
||||
void NewAlternative(); // '|'
|
||||
bool AddQuantifierToAtom(int min, int max,
|
||||
RegExpQuantifier::QuantifierType type);
|
||||
void FlushText();
|
||||
RegExpTree* ToRegExp();
|
||||
JSRegExp::Flags flags() const { return flags_; }
|
||||
void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
|
||||
|
||||
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||
bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
|
||||
bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
|
||||
|
||||
private:
|
||||
static const uc16 kNoPendingSurrogate = 0;
|
||||
@ -122,18 +129,15 @@ class RegExpBuilder : public ZoneObject {
|
||||
void AddTrailSurrogate(uc16 trail_surrogate);
|
||||
void FlushPendingSurrogate();
|
||||
void FlushCharacters();
|
||||
void FlushText();
|
||||
void FlushTerms();
|
||||
bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
|
||||
bool NeedsDesugaringForIgnoreCase(uc32 c);
|
||||
Zone* zone() const { return zone_; }
|
||||
bool ignore_case() const { return ignore_case_; }
|
||||
bool unicode() const { return unicode_; }
|
||||
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||
|
||||
Zone* zone_;
|
||||
bool pending_empty_;
|
||||
bool ignore_case_;
|
||||
bool unicode_;
|
||||
JSRegExp::Flags flags_;
|
||||
ZoneList<uc16>* characters_;
|
||||
uc16 pending_surrogate_;
|
||||
BufferedZoneList<RegExpTree, 2> terms_;
|
||||
@ -159,7 +163,6 @@ class RegExpParser BASE_EMBEDDED {
|
||||
RegExpTree* ParsePattern();
|
||||
RegExpTree* ParseDisjunction();
|
||||
RegExpTree* ParseGroup();
|
||||
RegExpTree* ParseCharacterClass();
|
||||
|
||||
// Parses a {...,...} quantifier and stores the range in the given
|
||||
// out parameters.
|
||||
@ -175,6 +178,7 @@ class RegExpParser BASE_EMBEDDED {
|
||||
bool ParseUnicodeEscape(uc32* value);
|
||||
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
|
||||
bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate);
|
||||
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
|
||||
|
||||
uc32 ParseOctalLiteral();
|
||||
|
||||
@ -205,10 +209,9 @@ class RegExpParser BASE_EMBEDDED {
|
||||
int captures_started() { return captures_started_; }
|
||||
int position() { return next_pos_ - 1; }
|
||||
bool failed() { return failed_; }
|
||||
bool dotall() const { return dotall_; }
|
||||
bool ignore_case() const { return ignore_case_; }
|
||||
bool multiline() const { return multiline_; }
|
||||
bool unicode() const { return unicode_; }
|
||||
// The Unicode flag can't be changed using in-regexp syntax, so it's OK to
|
||||
// just read the initial flag value here.
|
||||
bool unicode() const { return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
|
||||
|
||||
static bool IsSyntaxCharacterOrSlash(uc32 c);
|
||||
|
||||
@ -226,34 +229,35 @@ class RegExpParser BASE_EMBEDDED {
|
||||
|
||||
class RegExpParserState : public ZoneObject {
|
||||
public:
|
||||
// Push a state on the stack.
|
||||
RegExpParserState(RegExpParserState* previous_state,
|
||||
SubexpressionType group_type,
|
||||
RegExpLookaround::Type lookaround_type,
|
||||
int disjunction_capture_index,
|
||||
const ZoneVector<uc16>* capture_name, bool ignore_case,
|
||||
bool unicode, Zone* zone)
|
||||
const ZoneVector<uc16>* capture_name,
|
||||
JSRegExp::Flags flags, Zone* zone)
|
||||
: previous_state_(previous_state),
|
||||
builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
|
||||
builder_(new (zone) RegExpBuilder(zone, flags)),
|
||||
group_type_(group_type),
|
||||
lookaround_type_(lookaround_type),
|
||||
disjunction_capture_index_(disjunction_capture_index),
|
||||
capture_name_(capture_name) {}
|
||||
// Parser state of containing expression, if any.
|
||||
RegExpParserState* previous_state() { return previous_state_; }
|
||||
RegExpParserState* previous_state() const { return previous_state_; }
|
||||
bool IsSubexpression() { return previous_state_ != nullptr; }
|
||||
// RegExpBuilder building this regexp's AST.
|
||||
RegExpBuilder* builder() { return builder_; }
|
||||
RegExpBuilder* builder() const { return builder_; }
|
||||
// Type of regexp being parsed (parenthesized group or entire regexp).
|
||||
SubexpressionType group_type() { return group_type_; }
|
||||
SubexpressionType group_type() const { return group_type_; }
|
||||
// Lookahead or Lookbehind.
|
||||
RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
|
||||
RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
|
||||
// Index in captures array of first capture in this sub-expression, if any.
|
||||
// Also the capture index of this sub-expression itself, if group_type
|
||||
// is CAPTURE.
|
||||
int capture_index() { return disjunction_capture_index_; }
|
||||
int capture_index() const { return disjunction_capture_index_; }
|
||||
// The name of the current sub-expression, if group_type is CAPTURE. Only
|
||||
// used for named captures.
|
||||
const ZoneVector<uc16>* capture_name() { return capture_name_; }
|
||||
const ZoneVector<uc16>* capture_name() const { return capture_name_; }
|
||||
|
||||
bool IsNamedCapture() const { return capture_name_ != nullptr; }
|
||||
|
||||
@ -264,17 +268,17 @@ class RegExpParser BASE_EMBEDDED {
|
||||
|
||||
private:
|
||||
// Linked list implementation of stack of states.
|
||||
RegExpParserState* previous_state_;
|
||||
RegExpParserState* const previous_state_;
|
||||
// Builder for the stored disjunction.
|
||||
RegExpBuilder* builder_;
|
||||
RegExpBuilder* const builder_;
|
||||
// Stored disjunction type (capture, look-ahead or grouping), if any.
|
||||
SubexpressionType group_type_;
|
||||
const SubexpressionType group_type_;
|
||||
// Stored read direction.
|
||||
RegExpLookaround::Type lookaround_type_;
|
||||
const RegExpLookaround::Type lookaround_type_;
|
||||
// Stored disjunction's capture index (if any).
|
||||
int disjunction_capture_index_;
|
||||
const int disjunction_capture_index_;
|
||||
// Stored capture name (if any).
|
||||
const ZoneVector<uc16>* capture_name_;
|
||||
const ZoneVector<uc16>* const capture_name_;
|
||||
};
|
||||
|
||||
// Return the 1-indexed RegExpCapture object, allocate if necessary.
|
||||
@ -291,6 +295,7 @@ class RegExpParser BASE_EMBEDDED {
|
||||
|
||||
bool ParseNamedBackReference(RegExpBuilder* builder,
|
||||
RegExpParserState* state);
|
||||
RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
|
||||
|
||||
// After the initial parsing pass, patch corresponding RegExpCapture objects
|
||||
// into all RegExpBackReferences. This is done after initial parsing in order
|
||||
@ -323,10 +328,10 @@ class RegExpParser BASE_EMBEDDED {
|
||||
ZoneList<RegExpBackReference*>* named_back_references_;
|
||||
FlatStringReader* in_;
|
||||
uc32 current_;
|
||||
bool dotall_;
|
||||
bool ignore_case_;
|
||||
bool multiline_;
|
||||
bool unicode_;
|
||||
// These are the flags specified outside the regexp syntax ie after the
|
||||
// terminating '/' or in the second argument to the constructor. The current
|
||||
// flags are stored on the RegExpBuilder.
|
||||
JSRegExp::Flags top_level_flags_;
|
||||
int next_pos_;
|
||||
int captures_started_;
|
||||
int capture_count_; // Only valid after we have scanned for captures.
|
||||
|
@ -158,6 +158,8 @@
|
||||
'es6/unicode-regexp-ignore-case-noi18n': [FAIL, ['no_i18n == True', PASS]],
|
||||
'regress/regress-5036': [PASS, ['no_i18n == True', FAIL]],
|
||||
'es7/regexp-ui-word': [PASS, ['no_i18n == True', FAIL]],
|
||||
'regexp-modifiers-i18n': [PASS, ['no_i18n == True', FAIL]],
|
||||
'regexp-modifiers-autogenerated-i18n': [PASS, ['no_i18n == True', FAIL]],
|
||||
# desugaring regexp property class relies on ICU.
|
||||
'harmony/regexp-property-*': [PASS, ['no_i18n == True', FAIL]],
|
||||
|
||||
|
81
test/mjsunit/regexp-modifiers-autogenerated-i18n.js
Normal file
81
test/mjsunit/regexp-modifiers-autogenerated-i18n.js
Normal file
@ -0,0 +1,81 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --regexp-mode-modifiers --harmony-regexp-property
|
||||
|
||||
// These regexps are just grepped out of the other tests we already have
|
||||
// and the syntax changed from out-of-line i flag to inline i flag.
|
||||
|
||||
// These tests won't all run on the noi18n build of V8.
|
||||
|
||||
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
|
||||
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
|
||||
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
|
||||
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
|
||||
assertTrue(/(?i)\u212b/u.test("\u212b"));
|
||||
assertFalse(/(?i)\u00df/u.test("SS"));
|
||||
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
|
||||
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
|
||||
assertTrue(/(?i)\u00e5/u.test("\u212b"));
|
||||
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
|
||||
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
|
||||
assertTrue(/(?i)\u00e5/u.test("\u212b"));
|
||||
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
|
||||
assertTrue(/(?i)\u00c5/u.test("\u212b"));
|
||||
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
|
||||
assertTrue(/(?i)\u212b/u.test("\u00c5"));
|
||||
assertTrue(/(?i)\u212b/u.test("\u00e5"));
|
||||
assertTrue(/(?i)\u212b/u.test("\u212b"));
|
||||
assertTrue(/(?i)\u{10400}/u.test("\u{10428}"));
|
||||
assertTrue(/(?i)\ud801\udc00/u.test("\u{10428}"));
|
||||
assertTrue(/(?i)[\u{10428}]/u.test("\u{10400}"));
|
||||
assertTrue(/(?i)[\ud801\udc28]/u.test("\u{10400}"));
|
||||
assertFalse(/(?i)\u00df/u.test("SS"));
|
||||
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
|
||||
assertTrue(/(?i)\u1f8d/u.test("\u1f85"));
|
||||
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
|
||||
assertTrue(/(?i)\u00e5\u00e5\u00e5/u.test("\u212b\u00e5\u00c5"));
|
||||
assertTrue(/(?i)AB\u{10400}/u.test("ab\u{10428}"));
|
||||
assertTrue(/(?i)\w/u.test('\u017F'));
|
||||
assertTrue(/(?i)\w/u.test('\u212A'));
|
||||
assertFalse(/(?i)\W/u.test('\u017F'));
|
||||
assertFalse(/(?i)\W/u.test('\u212A'));
|
||||
assertFalse(/(?i)\W/u.test('s'));
|
||||
assertFalse(/(?i)\W/u.test('S'));
|
||||
assertFalse(/(?i)\W/u.test('K'));
|
||||
assertFalse(/(?i)\W/u.test('k'));
|
||||
assertTrue(/(?i)[\w]/u.test('\u017F'));
|
||||
assertTrue(/(?i)[\w]/u.test('\u212A'));
|
||||
assertFalse(/(?i)[\W]/u.test('\u017F'));
|
||||
assertFalse(/(?i)[\W]/u.test('\u212A'));
|
||||
assertFalse(/(?i)[\W]/u.test('s'));
|
||||
assertFalse(/(?i)[\W]/u.test('S'));
|
||||
assertFalse(/(?i)[\W]/u.test('K'));
|
||||
assertFalse(/(?i)[\W]/u.test('k'));
|
||||
assertTrue(/(?i)\b/u.test('\u017F'));
|
||||
assertTrue(/(?i)\b/u.test('\u212A'));
|
||||
assertTrue(/(?i)\b/u.test('s'));
|
||||
assertTrue(/(?i)\b/u.test('S'));
|
||||
assertFalse(/(?i)\B/u.test('\u017F'));
|
||||
assertFalse(/(?i)\B/u.test('\u212A'));
|
||||
assertFalse(/(?i)\B/u.test('s'));
|
||||
assertFalse(/(?i)\B/u.test('S'));
|
||||
assertFalse(/(?i)\B/u.test('K'));
|
||||
assertFalse(/(?i)\B/u.test('k'));
|
||||
assertTrue(/(?i)\p{Ll}/u.test("a"));
|
||||
assertTrue(/(?i)\p{Ll}/u.test("\u{118D4}"));
|
||||
assertTrue(/(?i)\p{Ll}/u.test("A"));
|
||||
assertTrue(/(?i)\p{Ll}/u.test("\u{118B4}"));
|
||||
assertTrue(/(?i)\P{Ll}/u.test("a"));
|
||||
assertTrue(/(?i)\P{Ll}/u.test("\u{118D4}"));
|
||||
assertTrue(/(?i)\P{Ll}/u.test("A"));
|
||||
assertTrue(/(?i)\P{Ll}/u.test("\u{118B4}"));
|
||||
assertTrue(/(?i)\p{Lu}/u.test("a"));
|
||||
assertTrue(/(?i)\p{Lu}/u.test("\u{118D4}"));
|
||||
assertTrue(/(?i)\p{Lu}/u.test("A"));
|
||||
assertTrue(/(?i)\p{Lu}/u.test("\u{118B4}"));
|
||||
assertTrue(/(?i)\P{Lu}/u.test("a"));
|
||||
assertTrue(/(?i)\P{Lu}/u.test("\u{118D4}"));
|
||||
assertTrue(/(?i)\P{Lu}/u.test("A"));
|
||||
assertTrue(/(?i)\P{Lu}/u.test("\u{118B4}"));
|
74
test/mjsunit/regexp-modifiers-autogenerated.js
Normal file
74
test/mjsunit/regexp-modifiers-autogenerated.js
Normal file
@ -0,0 +1,74 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --regexp-mode-modifiers --harmony-regexp-property
|
||||
|
||||
// These regexps are just grepped out of the other tests we already have
|
||||
// and the syntax changed from out-of-line i flag to inline i flag.
|
||||
|
||||
assertFalse(/(?i)x(...)\1/.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"));
|
||||
assertTrue(/(?i)\u03a3((?:))\1\1x/.test("\u03c2x"), "backref-UC16-empty");
|
||||
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03a3\u03c2\u03c3x"));
|
||||
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"));
|
||||
assertFalse(/(?i)\xc1/.test('fooA'), "quickcheck-uc16-pattern-ascii-subject");
|
||||
assertFalse(/(?i)x(...)\1/.test("xaaaaa"), "backref-ASCII-short");
|
||||
assertTrue(/(?i)x((?:))\1\1x/.test("xx"), "backref-ASCII-empty");
|
||||
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcx"), "backref-ASCII-uncaptured");
|
||||
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcABCx"), "backref-ASCII-backtrack");
|
||||
assertFalse(/(?i)f/.test('b'));
|
||||
assertFalse(/(?i)[abc]f/.test('x'));
|
||||
assertFalse(/(?i)[abc]f/.test('xa'));
|
||||
assertFalse(/(?i)[abc]</.test('x'));
|
||||
assertFalse(/(?i)[abc]</.test('xa'));
|
||||
assertFalse(/(?i)f[abc]/.test('x'));
|
||||
assertFalse(/(?i)f[abc]/.test('xa'));
|
||||
assertFalse(/(?i)<[abc]/.test('x'));
|
||||
assertFalse(/(?i)<[abc]/.test('xa'));
|
||||
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
|
||||
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
|
||||
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
|
||||
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
|
||||
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
|
||||
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
|
||||
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
|
||||
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
|
||||
assertTrue(/(?i)[@-A]/.test("a"));
|
||||
assertTrue(/(?i)[@-A]/.test("A"));
|
||||
assertTrue(/(?i)[@-A]/.test("@"));
|
||||
assertFalse(/(?i)[¿-À]/.test('¾'));
|
||||
assertTrue(/(?i)[¿-À]/.test('¿'));
|
||||
assertTrue(/(?i)[¿-À]/.test('À'));
|
||||
assertTrue(/(?i)[¿-À]/.test('à'));
|
||||
assertFalse(/(?i)[¿-À]/.test('á'));
|
||||
assertFalse(/(?i)[¿-À]/.test('Á'));
|
||||
assertFalse(/(?i)[¿-À]/.test('Á'));
|
||||
assertFalse(/(?i)[Ö-×]/.test('Õ'));
|
||||
assertTrue(/(?i)[Ö-×]/.test('Ö'));
|
||||
assertTrue(/(?i)[Ö-×]/.test('ö'));
|
||||
assertTrue(/(?i)[Ö-×]/.test('×'));
|
||||
assertFalse(/(?i)[Ö-×]/.test('Ø'));
|
||||
assertTrue(/(?i)(a[\u1000A])+/.test('aa'));
|
||||
assertTrue(/(?i)\u0178/.test('\u00ff'));
|
||||
assertTrue(/(?i)\u039c/.test('\u00b5'));
|
||||
assertTrue(/(?i)\u039c/.test('\u03bc'));
|
||||
assertTrue(/(?i)\u00b5/.test('\u03bc'));
|
||||
assertTrue(/(?i)[\u039b-\u039d]/.test('\u00b5'));
|
||||
assertFalse(/(?i)[^\u039b-\u039d]/.test('\u00b5'));
|
||||
|
||||
assertTrue(/(?m)^bar/.test("bar"));
|
||||
assertTrue(/(?m)^bar/.test("bar\nfoo"));
|
||||
assertTrue(/(?m)^bar/.test("foo\nbar"));
|
||||
assertTrue(/(?m)bar$/.test("bar"));
|
||||
assertTrue(/(?m)bar$/.test("bar\nfoo"));
|
||||
assertTrue(/(?m)bar$/.test("foo\nbar"));
|
||||
assertFalse(/(?m)^bxr/.test("bar"));
|
||||
assertFalse(/(?m)^bxr/.test("bar\nfoo"));
|
||||
assertFalse(/(?m)^bxr/.test("foo\nbar"));
|
||||
assertFalse(/(?m)bxr$/.test("bar"));
|
||||
assertFalse(/(?m)bxr$/.test("bar\nfoo"));
|
||||
assertFalse(/(?m)bxr$/.test("foo\nbar"));
|
||||
assertTrue(/(?m)^.*$/.test("\n"));
|
||||
assertTrue(/(?m)^([()]|.)*$/.test("()\n()"));
|
||||
assertTrue(/(?m)^([()]|.)*$/.test("()\n"));
|
||||
assertTrue(/(?m)^[()]*$/.test("()\n."));
|
27
test/mjsunit/regexp-modifiers-dotall.js
Normal file
27
test/mjsunit/regexp-modifiers-dotall.js
Normal file
@ -0,0 +1,27 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --regexp-mode-modifiers --harmony-regexp-dotall
|
||||
|
||||
// S flag switches dotall mode on and off. Combine with i flag changes to test
|
||||
// the parser.
|
||||
test(/.(?s).(?i-s).a(?-i)a/);
|
||||
test(/.(?s:.)(?i:.a)a/);
|
||||
test(/.(?s).(?i-s).a(?-i)a/u);
|
||||
test(/.(?s:.)(?i:.a)a/u);
|
||||
|
||||
// m flag makes no difference
|
||||
test(/.(?sm).(?i-s).a(?-i)a/);
|
||||
test(/.(?s:.)(?i:.a)a/);
|
||||
test(/.(?sm).(?im-s).a(?m-i)a/u);
|
||||
test(/.(?s:.)(?i:.a)a/u);
|
||||
|
||||
function test(re) {
|
||||
assertTrue(re.test("...aa"));
|
||||
assertTrue(re.test(".\n.aa"));
|
||||
assertTrue(re.test(".\n.Aa"));
|
||||
assertFalse(re.test("\n\n.Aa"));
|
||||
assertFalse(re.test(".\n\nAa"));
|
||||
assertFalse(re.test(".\n.AA"));
|
||||
}
|
138
test/mjsunit/regexp-modifiers-i18n.js
Normal file
138
test/mjsunit/regexp-modifiers-i18n.js
Normal file
@ -0,0 +1,138 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --regexp-mode-modifiers
|
||||
|
||||
// These tests won't all run on the noi18n build of V8.
|
||||
|
||||
aa(/(a)(?i)\1/u);
|
||||
aa(/([az])(?i)\1/u);
|
||||
|
||||
function aa(re) {
|
||||
assertTrue(re.test("aa"));
|
||||
assertTrue(re.test("aA"));
|
||||
assertFalse(re.test("Aa"));
|
||||
assertFalse(re.test("AA"));
|
||||
}
|
||||
|
||||
aai(/(a)(?-i)\1/iu);
|
||||
aai(/([az])(?-i)\1/iu);
|
||||
|
||||
function aai(re) {
|
||||
assertTrue(re.test("aa"));
|
||||
assertFalse(re.test("aA"));
|
||||
assertFalse(re.test("Aa"));
|
||||
assertTrue(re.test("AA"));
|
||||
}
|
||||
|
||||
abcd(/a(b(?i)c)d/u);
|
||||
abcd(/[aw]([bx](?i)[cy])[dz]/u);
|
||||
|
||||
function abcd(re) {
|
||||
assertTrue(re.test("abcd"));
|
||||
assertFalse(re.test("abcD"));
|
||||
assertTrue(re.test("abCd"));
|
||||
assertFalse(re.test("abCD"));
|
||||
assertFalse(re.test("aBcd"));
|
||||
assertFalse(re.test("aBcD"));
|
||||
assertFalse(re.test("aBCd"));
|
||||
assertFalse(re.test("aBCD"));
|
||||
assertFalse(re.test("Abcd"));
|
||||
assertFalse(re.test("AbcD"));
|
||||
assertFalse(re.test("AbCd"));
|
||||
assertFalse(re.test("AbCD"));
|
||||
assertFalse(re.test("ABcd"));
|
||||
assertFalse(re.test("ABcD"));
|
||||
assertFalse(re.test("ABCd"));
|
||||
assertFalse(re.test("ABCD"));
|
||||
}
|
||||
|
||||
abcdei(/a(b(?-i)c)d/iu);
|
||||
abcdei(/[aw]([bx](?-i)[cy])[dz]/iu);
|
||||
|
||||
function abcdei(re) {
|
||||
assertTrue(re.test("abcd"));
|
||||
assertTrue(re.test("abcD"));
|
||||
assertFalse(re.test("abCd"));
|
||||
assertFalse(re.test("abCD"));
|
||||
assertTrue(re.test("aBcd"));
|
||||
assertTrue(re.test("aBcD"));
|
||||
assertFalse(re.test("aBCd"));
|
||||
assertFalse(re.test("aBCD"));
|
||||
assertTrue(re.test("Abcd"));
|
||||
assertTrue(re.test("AbcD"));
|
||||
assertFalse(re.test("AbCd"));
|
||||
assertFalse(re.test("AbCD"));
|
||||
assertTrue(re.test("ABcd"));
|
||||
assertTrue(re.test("ABcD"));
|
||||
assertFalse(re.test("ABCd"));
|
||||
assertFalse(re.test("ABCD"));
|
||||
}
|
||||
|
||||
abc(/a(?i:b)c/u);
|
||||
abc(/[ax](?i:[by])[cz]/u);
|
||||
|
||||
function abc(re) {
|
||||
assertTrue(re.test("abc"));
|
||||
assertFalse(re.test("abC"));
|
||||
assertTrue(re.test("aBc"));
|
||||
assertFalse(re.test("aBC"));
|
||||
assertFalse(re.test("Abc"));
|
||||
assertFalse(re.test("AbC"));
|
||||
assertFalse(re.test("ABc"));
|
||||
assertFalse(re.test("ABC"));
|
||||
}
|
||||
|
||||
abci(/a(?-i:b)c/iu);
|
||||
abci(/[ax](?-i:[by])[cz]/iu);
|
||||
|
||||
function abci(re) {
|
||||
assertTrue(re.test("abc"));
|
||||
assertTrue(re.test("abC"));
|
||||
assertFalse(re.test("aBc"));
|
||||
assertFalse(re.test("aBC"));
|
||||
assertTrue(re.test("Abc"));
|
||||
assertTrue(re.test("AbC"));
|
||||
assertFalse(re.test("ABc"));
|
||||
assertFalse(re.test("ABC"));
|
||||
}
|
||||
|
||||
// The following tests are taken from test/mjsunit/es7/regexp-ui-word.js but
|
||||
// using inline syntax instead of the global /i flag.
|
||||
assertTrue(/(?i)\w/u.test('\u017F'));
|
||||
assertTrue(/(?i)\w/u.test('\u212A'));
|
||||
assertFalse(/(?i)\W/u.test('\u017F'));
|
||||
assertFalse(/(?i)\W/u.test('\u212A'));
|
||||
assertFalse(/(?i)\W/u.test('s'));
|
||||
assertFalse(/(?i)\W/u.test('S'));
|
||||
assertFalse(/(?i)\W/u.test('K'));
|
||||
assertFalse(/(?i)\W/u.test('k'));
|
||||
|
||||
assertTrue(/(?i)[\w]/u.test('\u017F'));
|
||||
assertTrue(/(?i)[\w]/u.test('\u212A'));
|
||||
assertFalse(/(?i)[\W]/u.test('\u017F'));
|
||||
assertFalse(/(?i)[\W]/u.test('\u212A'));
|
||||
assertFalse(/(?i)[\W]/u.test('s'));
|
||||
assertFalse(/(?i)[\W]/u.test('S'));
|
||||
assertFalse(/(?i)[\W]/u.test('K'));
|
||||
assertFalse(/(?i)[\W]/u.test('k'));
|
||||
|
||||
assertTrue(/(?i)\b/u.test('\u017F'));
|
||||
assertFalse(/(?i:)\b/u.test('\u017F'));
|
||||
assertTrue(/(?i)\b/u.test('\u212A'));
|
||||
assertFalse(/(?i:)\b/u.test('\u212A'));
|
||||
assertTrue(/(?i)\b/u.test('s'));
|
||||
assertTrue(/(?i)\b/u.test('S'));
|
||||
assertFalse(/(?i)\B/u.test('\u017F'));
|
||||
assertFalse(/(?i)\B/u.test('\u212A'));
|
||||
assertFalse(/(?i)\B/u.test('s'));
|
||||
assertFalse(/(?i)\B/u.test('S'));
|
||||
assertFalse(/(?i)\B/u.test('K'));
|
||||
assertFalse(/(?i)\B/u.test('k'));
|
||||
|
||||
assertEquals(["abcd\u017F", "\u017F"], /a.*?(.)(?i)\b/u.exec('abcd\u017F cd'));
|
||||
assertEquals(["abcd\u212A", "\u212A"], /a.*?(.)(?i)\b/u.exec('abcd\u212A cd'));
|
||||
|
||||
assertEquals(["a\u017F", "\u017F"], /a.*?(?i:\B)(.)/u.exec('a\u017F '));
|
||||
assertEquals(["a\u212A", "\u212A"], /a.*?(?i:\B)(.)/u.exec('a\u212A '));
|
146
test/mjsunit/regexp-modifiers.js
Normal file
146
test/mjsunit/regexp-modifiers.js
Normal file
@ -0,0 +1,146 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// Flags: --regexp-mode-modifiers
|
||||
|
||||
aa(/(a)(?i)\1/);
|
||||
aa(/([az])(?i)\1/);
|
||||
|
||||
function aa(re) {
|
||||
assertTrue(re.test("aa"));
|
||||
assertTrue(re.test("aA"));
|
||||
assertFalse(re.test("Aa"));
|
||||
assertFalse(re.test("AA"));
|
||||
}
|
||||
|
||||
aai(/(a)(?-i)\1/i);
|
||||
aai(/([az])(?-i)\1/i);
|
||||
|
||||
function aai(re) {
|
||||
assertTrue(re.test("aa"));
|
||||
assertFalse(re.test("aA"));
|
||||
assertFalse(re.test("Aa"));
|
||||
assertTrue(re.test("AA"));
|
||||
}
|
||||
|
||||
abcd(/a(b(?i)c)d/);
|
||||
abcd(/[aw]([bx](?i)[cy])[dz]/);
|
||||
|
||||
function abcd(re) {
|
||||
assertTrue(re.test("abcd"));
|
||||
assertFalse(re.test("abcD"));
|
||||
assertTrue(re.test("abCd"));
|
||||
assertFalse(re.test("abCD"));
|
||||
assertFalse(re.test("aBcd"));
|
||||
assertFalse(re.test("aBcD"));
|
||||
assertFalse(re.test("aBCd"));
|
||||
assertFalse(re.test("aBCD"));
|
||||
assertFalse(re.test("Abcd"));
|
||||
assertFalse(re.test("AbcD"));
|
||||
assertFalse(re.test("AbCd"));
|
||||
assertFalse(re.test("AbCD"));
|
||||
assertFalse(re.test("ABcd"));
|
||||
assertFalse(re.test("ABcD"));
|
||||
assertFalse(re.test("ABCd"));
|
||||
assertFalse(re.test("ABCD"));
|
||||
}
|
||||
|
||||
abcdei(/a(b(?-i)c)d/i);
|
||||
abcdei(/[aw]([bx](?-i)[cy])[dz]/i);
|
||||
|
||||
function abcdei(re) {
|
||||
assertTrue(re.test("abcd"));
|
||||
assertTrue(re.test("abcD"));
|
||||
assertFalse(re.test("abCd"));
|
||||
assertFalse(re.test("abCD"));
|
||||
assertTrue(re.test("aBcd"));
|
||||
assertTrue(re.test("aBcD"));
|
||||
assertFalse(re.test("aBCd"));
|
||||
assertFalse(re.test("aBCD"));
|
||||
assertTrue(re.test("Abcd"));
|
||||
assertTrue(re.test("AbcD"));
|
||||
assertFalse(re.test("AbCd"));
|
||||
assertFalse(re.test("AbCD"));
|
||||
assertTrue(re.test("ABcd"));
|
||||
assertTrue(re.test("ABcD"));
|
||||
assertFalse(re.test("ABCd"));
|
||||
assertFalse(re.test("ABCD"));
|
||||
}
|
||||
|
||||
abc(/a(?i:b)c/);
|
||||
abc(/[ax](?i:[by])[cz]/);
|
||||
|
||||
function abc(re) {
|
||||
assertTrue(re.test("abc"));
|
||||
assertFalse(re.test("abC"));
|
||||
assertTrue(re.test("aBc"));
|
||||
assertFalse(re.test("aBC"));
|
||||
assertFalse(re.test("Abc"));
|
||||
assertFalse(re.test("AbC"));
|
||||
assertFalse(re.test("ABc"));
|
||||
assertFalse(re.test("ABC"));
|
||||
}
|
||||
|
||||
abci(/a(?-i:b)c/i);
|
||||
abci(/[ax](?-i:[by])[cz]/i);
|
||||
|
||||
function abci(re) {
|
||||
assertTrue(re.test("abc"));
|
||||
assertTrue(re.test("abC"));
|
||||
assertFalse(re.test("aBc"));
|
||||
assertFalse(re.test("aBC"));
|
||||
assertTrue(re.test("Abc"));
|
||||
assertTrue(re.test("AbC"));
|
||||
assertFalse(re.test("ABc"));
|
||||
assertFalse(re.test("ABC"));
|
||||
}
|
||||
|
||||
assertThrows(() => new RegExp("foo(?i:"));
|
||||
assertThrows(() => new RegExp("foo(?--i)"));
|
||||
assertThrows(() => new RegExp("foo(?i-i)"));
|
||||
|
||||
assertThrows(() => new RegExp("foo(?m:"));
|
||||
assertThrows(() => new RegExp("foo(?--m)"));
|
||||
assertThrows(() => new RegExp("foo(?m-m)"));
|
||||
|
||||
var re = /^\s(?m)^.$\s(?-m)$/;
|
||||
assertTrue(re.test("\n.\n"));
|
||||
assertFalse(re.test(" .\n"));
|
||||
assertFalse(re.test("\n. "));
|
||||
assertFalse(re.test(" . "));
|
||||
assertFalse(re.test("_\n.\n"));
|
||||
assertFalse(re.test("\n.\n_"));
|
||||
assertFalse(re.test("_\n.\n_"));
|
||||
|
||||
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u017F cd'));
|
||||
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u212A cd'));
|
||||
|
||||
assertEquals(["a\u017F ", " "], /a.*?(?i)\B(.)/.exec('a\u017F '));
|
||||
assertEquals(["a\u212A ", " "], /a.*?(?i)\B(.)/.exec('a\u212A '));
|
||||
|
||||
// Nested flags.
|
||||
var res = [
|
||||
/^a(?i:b(?-i:c(?i:d)e)f)g$/,
|
||||
/^a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g$/,
|
||||
/^(?-i:a(?i:b(?-i:c(?i:d)e)f)g)$/i,
|
||||
/^(?-i:a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g)$/i,
|
||||
];
|
||||
|
||||
for (var idx = 0; idx < res.length; idx++) {
|
||||
var re = res[idx];
|
||||
for (var i = 0; i < 128; i++) {
|
||||
var s = (i & 1) ? "A" : "a";
|
||||
s += (i & 2) ? "B" : "b";
|
||||
s += (i & 4) ? "C" : "c";
|
||||
s += (i & 8) ? "D" : "d";
|
||||
s += (i & 16) ? "E" : "e";
|
||||
s += (i & 32) ? "F" : "f";
|
||||
s += (i & 64) ? "G" : "g";
|
||||
if ((i & (1 | 4 | 16 | 64)) != 0) {
|
||||
assertFalse(re.test(s), s);
|
||||
} else {
|
||||
assertTrue(re.test(s), s);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user