[regexp] More cleanups
- Anonymous namespaces instead of static functions. - Comments. - Reserve enough space in the range ZoneList. Change-Id: Ie79fda770974796cd590a155dc5fd504472e5bc9 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3220341 Auto-Submit: Jakob Gruber <jgruber@chromium.org> Reviewed-by: Patrick Thier <pthier@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/main@{#77391}
This commit is contained in:
parent
b36b2074ac
commit
a2b9710fd8
@ -27,14 +27,16 @@ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
|
|||||||
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
|
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
|
||||||
#undef MAKE_TYPE_CASE
|
#undef MAKE_TYPE_CASE
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
|
Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
|
||||||
Interval result = Interval::Empty();
|
Interval result = Interval::Empty();
|
||||||
for (int i = 0; i < children->length(); i++)
|
for (int i = 0; i < children->length(); i++)
|
||||||
result = result.Union(children->at(i)->CaptureRegisters());
|
result = result.Union(children->at(i)->CaptureRegisters());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
Interval RegExpAlternative::CaptureRegisters() {
|
Interval RegExpAlternative::CaptureRegisters() {
|
||||||
return ListCaptureRegisters(nodes());
|
return ListCaptureRegisters(nodes());
|
||||||
@ -130,6 +132,7 @@ bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
|
|||||||
|
|
||||||
bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
|
bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Convert regular expression trees to a simple sexp representation.
|
// Convert regular expression trees to a simple sexp representation.
|
||||||
// This representation should be different from the input grammar
|
// This representation should be different from the input grammar
|
||||||
@ -148,6 +151,7 @@ class RegExpUnparser final : public RegExpVisitor {
|
|||||||
Zone* zone_;
|
Zone* zone_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
|
void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
|
||||||
os_ << "(|";
|
os_ << "(|";
|
||||||
@ -312,8 +316,9 @@ RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
static int IncreaseBy(int previous, int increase) {
|
int IncreaseBy(int previous, int increase) {
|
||||||
if (RegExpTree::kInfinity - previous < increase) {
|
if (RegExpTree::kInfinity - previous < increase) {
|
||||||
return RegExpTree::kInfinity;
|
return RegExpTree::kInfinity;
|
||||||
} else {
|
} else {
|
||||||
@ -321,6 +326,7 @@ static int IncreaseBy(int previous, int increase) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
|
RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
|
||||||
: nodes_(nodes) {
|
: nodes_(nodes) {
|
||||||
|
@ -45,8 +45,10 @@ RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
|
|||||||
on_success);
|
on_success);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
|
namespace {
|
||||||
const int* special_class, int length) {
|
|
||||||
|
bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
|
||||||
|
const int* special_class, int length) {
|
||||||
length--; // Remove final marker.
|
length--; // Remove final marker.
|
||||||
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
||||||
DCHECK_NE(0, ranges->length());
|
DCHECK_NE(0, ranges->length());
|
||||||
@ -74,8 +76,8 @@ static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool CompareRanges(ZoneList<CharacterRange>* ranges,
|
bool CompareRanges(ZoneList<CharacterRange>* ranges, const int* special_class,
|
||||||
const int* special_class, int length) {
|
int length) {
|
||||||
length--; // Remove final marker.
|
length--; // Remove final marker.
|
||||||
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
DCHECK_EQ(kRangeEndMarker, special_class[length]);
|
||||||
if (ranges->length() * 2 != length) {
|
if (ranges->length() * 2 != length) {
|
||||||
@ -91,6 +93,8 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
bool RegExpCharacterClass::is_standard(Zone* zone) {
|
bool RegExpCharacterClass::is_standard(Zone* zone) {
|
||||||
// TODO(lrn): Remove need for this function, by not throwing away information
|
// TODO(lrn): Remove need for this function, by not throwing away information
|
||||||
// along the way.
|
// along the way.
|
||||||
@ -442,6 +446,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
|
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
|
||||||
RegExpAtom* atom1 = (*a)->AsAtom();
|
RegExpAtom* atom1 = (*a)->AsAtom();
|
||||||
RegExpAtom* atom2 = (*b)->AsAtom();
|
RegExpAtom* atom2 = (*b)->AsAtom();
|
||||||
@ -464,7 +470,7 @@ int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static unibrow::uchar Canonical(
|
unibrow::uchar Canonical(
|
||||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
|
||||||
unibrow::uchar c) {
|
unibrow::uchar c) {
|
||||||
unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
|
unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
|
||||||
@ -491,6 +497,8 @@ int CompareFirstCharCaseIndependent(
|
|||||||
}
|
}
|
||||||
#endif // V8_INTL_SUPPORT
|
#endif // V8_INTL_SUPPORT
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// We can stable sort runs of atoms, since the order does not matter if they
|
// We can stable sort runs of atoms, since the order does not matter if they
|
||||||
// start with different characters.
|
// start with different characters.
|
||||||
// Returns true if any consecutive atoms were found.
|
// Returns true if any consecutive atoms were found.
|
||||||
@ -1035,8 +1043,10 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
|
|||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AddClass(const int* elmv, int elmc,
|
namespace {
|
||||||
ZoneList<CharacterRange>* ranges, Zone* zone) {
|
|
||||||
|
void AddClass(const int* elmv, int elmc, ZoneList<CharacterRange>* ranges,
|
||||||
|
Zone* zone) {
|
||||||
elmc--;
|
elmc--;
|
||||||
DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
|
DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
|
||||||
for (int i = 0; i < elmc; i += 2) {
|
for (int i = 0; i < elmc; i += 2) {
|
||||||
@ -1045,8 +1055,8 @@ static void AddClass(const int* elmv, int elmc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AddClassNegated(const int* elmv, int elmc,
|
void AddClassNegated(const int* elmv, int elmc,
|
||||||
ZoneList<CharacterRange>* ranges, Zone* zone) {
|
ZoneList<CharacterRange>* ranges, Zone* zone) {
|
||||||
elmc--;
|
elmc--;
|
||||||
DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
|
DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
|
||||||
DCHECK_NE(0x0000, elmv[0]);
|
DCHECK_NE(0x0000, elmv[0]);
|
||||||
@ -1061,6 +1071,8 @@ static void AddClassNegated(const int* elmv, int elmc,
|
|||||||
ranges->Add(CharacterRange::Range(last, kMaxCodePoint), zone);
|
ranges->Add(CharacterRange::Range(last, kMaxCodePoint), zone);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
|
void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
|
||||||
ZoneList<CharacterRange>* ranges,
|
ZoneList<CharacterRange>* ranges,
|
||||||
bool add_unicode_case_equivalents,
|
bool add_unicode_case_equivalents,
|
||||||
@ -1268,10 +1280,11 @@ ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
|
|||||||
return ranges_;
|
return ranges_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Move a number of elements in a zonelist to another position
|
// Move a number of elements in a zonelist to another position
|
||||||
// in the same list. Handles overlapping source and target areas.
|
// in the same list. Handles overlapping source and target areas.
|
||||||
static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
|
void MoveRanges(ZoneList<CharacterRange>* list, int from, int to, int count) {
|
||||||
int count) {
|
|
||||||
// Ranges are potentially overlapping.
|
// Ranges are potentially overlapping.
|
||||||
if (from < to) {
|
if (from < to) {
|
||||||
for (int i = count - 1; i >= 0; i--) {
|
for (int i = count - 1; i >= 0; i--) {
|
||||||
@ -1284,8 +1297,8 @@ static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
|
int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
|
||||||
CharacterRange insert) {
|
CharacterRange insert) {
|
||||||
// Inserts a range into list[0..count[, which must be sorted
|
// Inserts a range into list[0..count[, which must be sorted
|
||||||
// by from value and non-overlapping and non-adjacent, using at most
|
// by from value and non-overlapping and non-adjacent, using at most
|
||||||
// list[0..count] for the result. Returns the number of resulting
|
// list[0..count] for the result. Returns the number of resulting
|
||||||
@ -1340,6 +1353,8 @@ static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
|
|||||||
return count - (end_pos - start_pos) + 1;
|
return count - (end_pos - start_pos) + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
void CharacterSet::Canonicalize() {
|
void CharacterSet::Canonicalize() {
|
||||||
// Special/default classes are always considered canonical. The result
|
// Special/default classes are always considered canonical. The result
|
||||||
// of calling ranges() will be sorted.
|
// of calling ranges() will be sorted.
|
||||||
@ -1405,6 +1420,8 @@ void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Scoped object to keep track of how much we unroll quantifier loops in the
|
// Scoped object to keep track of how much we unroll quantifier loops in the
|
||||||
// regexp graph generator.
|
// regexp graph generator.
|
||||||
class RegExpExpansionLimiter {
|
class RegExpExpansionLimiter {
|
||||||
@ -1442,6 +1459,8 @@ class RegExpExpansionLimiter {
|
|||||||
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
|
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
|
RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
|
||||||
RegExpTree* body, RegExpCompiler* compiler,
|
RegExpTree* body, RegExpCompiler* compiler,
|
||||||
RegExpNode* on_success,
|
RegExpNode* on_success,
|
||||||
|
@ -748,14 +748,11 @@ bool ContainsOnlyUtf16CodeUnits(unibrow::uchar* chars, int length) {
|
|||||||
}
|
}
|
||||||
#endif // DEBUG
|
#endif // DEBUG
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
// Returns the number of characters in the equivalence class, omitting those
|
// Returns the number of characters in the equivalence class, omitting those
|
||||||
// that cannot occur in the source string because it is Latin1.
|
// that cannot occur in the source string because it is Latin1.
|
||||||
static int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
|
int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
|
||||||
bool one_byte_subject,
|
bool one_byte_subject, unibrow::uchar* letters,
|
||||||
unibrow::uchar* letters,
|
int letter_length) {
|
||||||
int letter_length) {
|
|
||||||
#ifdef V8_INTL_SUPPORT
|
#ifdef V8_INTL_SUPPORT
|
||||||
if (RegExpCaseFolding::IgnoreSet().contains(character)) {
|
if (RegExpCaseFolding::IgnoreSet().contains(character)) {
|
||||||
letters[0] = character;
|
letters[0] = character;
|
||||||
@ -815,10 +812,9 @@ static int GetCaseIndependentLetters(Isolate* isolate, base::uc16 character,
|
|||||||
#endif // V8_INTL_SUPPORT
|
#endif // V8_INTL_SUPPORT
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool EmitSimpleCharacter(Isolate* isolate,
|
inline bool EmitSimpleCharacter(Isolate* isolate, RegExpCompiler* compiler,
|
||||||
RegExpCompiler* compiler, base::uc16 c,
|
base::uc16 c, Label* on_failure, int cp_offset,
|
||||||
Label* on_failure, int cp_offset,
|
bool check, bool preloaded) {
|
||||||
bool check, bool preloaded) {
|
|
||||||
RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
||||||
bool bound_checked = false;
|
bool bound_checked = false;
|
||||||
if (!preloaded) {
|
if (!preloaded) {
|
||||||
@ -831,10 +827,9 @@ static inline bool EmitSimpleCharacter(Isolate* isolate,
|
|||||||
|
|
||||||
// Only emits non-letters (things that don't have case). Only used for case
|
// Only emits non-letters (things that don't have case). Only used for case
|
||||||
// independent matches.
|
// independent matches.
|
||||||
static inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
|
inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
|
||||||
base::uc16 c, Label* on_failure,
|
base::uc16 c, Label* on_failure, int cp_offset,
|
||||||
int cp_offset, bool check,
|
bool check, bool preloaded) {
|
||||||
bool preloaded) {
|
|
||||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||||
bool one_byte = compiler->one_byte();
|
bool one_byte = compiler->one_byte();
|
||||||
unibrow::uchar chars[4];
|
unibrow::uchar chars[4];
|
||||||
@ -861,9 +856,9 @@ static inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
|
|||||||
return checked;
|
return checked;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
|
bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
|
||||||
bool one_byte, base::uc16 c1,
|
bool one_byte, base::uc16 c1, base::uc16 c2,
|
||||||
base::uc16 c2, Label* on_failure) {
|
Label* on_failure) {
|
||||||
const uint32_t char_mask = CharMask(one_byte);
|
const uint32_t char_mask = CharMask(one_byte);
|
||||||
base::uc16 exor = c1 ^ c2;
|
base::uc16 exor = c1 ^ c2;
|
||||||
// Check whether exor has only one bit set.
|
// Check whether exor has only one bit set.
|
||||||
@ -892,9 +887,9 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
|
|||||||
|
|
||||||
// Only emits letters (things that have case). Only used for case independent
|
// Only emits letters (things that have case). Only used for case independent
|
||||||
// matches.
|
// matches.
|
||||||
static inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
|
inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
|
||||||
base::uc16 c, Label* on_failure,
|
base::uc16 c, Label* on_failure, int cp_offset,
|
||||||
int cp_offset, bool check, bool preloaded) {
|
bool check, bool preloaded) {
|
||||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||||
bool one_byte = compiler->one_byte();
|
bool one_byte = compiler->one_byte();
|
||||||
unibrow::uchar chars[4];
|
unibrow::uchar chars[4];
|
||||||
@ -932,9 +927,9 @@ static inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
|
void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
|
||||||
Label* fall_through, Label* above_or_equal,
|
Label* fall_through, Label* above_or_equal,
|
||||||
Label* below) {
|
Label* below) {
|
||||||
if (below != fall_through) {
|
if (below != fall_through) {
|
||||||
masm->CheckCharacterLT(border, below);
|
masm->CheckCharacterLT(border, below);
|
||||||
if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
|
if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
|
||||||
@ -943,9 +938,9 @@ static void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first,
|
void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first, int last,
|
||||||
int last, Label* fall_through,
|
Label* fall_through, Label* in_range,
|
||||||
Label* in_range, Label* out_of_range) {
|
Label* out_of_range) {
|
||||||
if (in_range == fall_through) {
|
if (in_range == fall_through) {
|
||||||
if (first == last) {
|
if (first == last) {
|
||||||
masm->CheckNotCharacter(first, out_of_range);
|
masm->CheckNotCharacter(first, out_of_range);
|
||||||
@ -964,11 +959,11 @@ static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first,
|
|||||||
|
|
||||||
// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
|
// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
|
||||||
// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
|
// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
|
||||||
static void EmitUseLookupTable(RegExpMacroAssembler* masm,
|
void EmitUseLookupTable(RegExpMacroAssembler* masm,
|
||||||
ZoneList<base::uc32>* ranges,
|
ZoneList<base::uc32>* ranges, uint32_t start_index,
|
||||||
uint32_t start_index, uint32_t end_index,
|
uint32_t end_index, base::uc32 min_char,
|
||||||
base::uc32 min_char, Label* fall_through,
|
Label* fall_through, Label* even_label,
|
||||||
Label* even_label, Label* odd_label) {
|
Label* odd_label) {
|
||||||
static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
|
static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
|
||||||
static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
|
static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
|
||||||
|
|
||||||
@ -1019,10 +1014,9 @@ static void EmitUseLookupTable(RegExpMacroAssembler* masm,
|
|||||||
if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
|
if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void CutOutRange(RegExpMacroAssembler* masm,
|
void CutOutRange(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
|
||||||
ZoneList<base::uc32>* ranges, uint32_t start_index,
|
uint32_t start_index, uint32_t end_index, uint32_t cut_index,
|
||||||
uint32_t end_index, uint32_t cut_index,
|
Label* even_label, Label* odd_label) {
|
||||||
Label* even_label, Label* odd_label) {
|
|
||||||
bool odd = (((cut_index - start_index) & 1) == 1);
|
bool odd = (((cut_index - start_index) & 1) == 1);
|
||||||
Label* in_range_label = odd ? odd_label : even_label;
|
Label* in_range_label = odd ? odd_label : even_label;
|
||||||
Label dummy;
|
Label dummy;
|
||||||
@ -1043,9 +1037,9 @@ static void CutOutRange(RegExpMacroAssembler* masm,
|
|||||||
|
|
||||||
// Unicode case. Split the search space into kSize spaces that are handled
|
// Unicode case. Split the search space into kSize spaces that are handled
|
||||||
// with recursion.
|
// with recursion.
|
||||||
static void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
|
void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
|
||||||
uint32_t end_index, uint32_t* new_start_index,
|
uint32_t end_index, uint32_t* new_start_index,
|
||||||
uint32_t* new_end_index, base::uc32* border) {
|
uint32_t* new_end_index, base::uc32* border) {
|
||||||
static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
|
static const uint32_t kSize = RegExpMacroAssembler::kTableSize;
|
||||||
static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
|
static const uint32_t kMask = RegExpMacroAssembler::kTableMask;
|
||||||
|
|
||||||
@ -1109,11 +1103,11 @@ static void SplitSearchSpace(ZoneList<base::uc32>* ranges, uint32_t start_index,
|
|||||||
// know that the character is in the range of min_char to max_char inclusive.
|
// know that the character is in the range of min_char to max_char inclusive.
|
||||||
// Either label can be nullptr indicating backtracking. Either label can also
|
// Either label can be nullptr indicating backtracking. Either label can also
|
||||||
// be equal to the fall_through label.
|
// be equal to the fall_through label.
|
||||||
static void GenerateBranches(RegExpMacroAssembler* masm,
|
void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<base::uc32>* ranges,
|
||||||
ZoneList<base::uc32>* ranges, uint32_t start_index,
|
uint32_t start_index, uint32_t end_index,
|
||||||
uint32_t end_index, base::uc32 min_char,
|
base::uc32 min_char, base::uc32 max_char,
|
||||||
base::uc32 max_char, Label* fall_through,
|
Label* fall_through, Label* even_label,
|
||||||
Label* even_label, Label* odd_label) {
|
Label* odd_label) {
|
||||||
DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
|
DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
|
||||||
DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
|
DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
|
||||||
|
|
||||||
@ -1221,10 +1215,10 @@ static void GenerateBranches(RegExpMacroAssembler* masm,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
||||||
RegExpCharacterClass* cc, bool one_byte,
|
RegExpCharacterClass* cc, bool one_byte, Label* on_failure,
|
||||||
Label* on_failure, int cp_offset, bool check_offset,
|
int cp_offset, bool check_offset, bool preloaded,
|
||||||
bool preloaded, Zone* zone) {
|
Zone* zone) {
|
||||||
ZoneList<CharacterRange>* ranges = cc->ranges(zone);
|
ZoneList<CharacterRange>* ranges = cc->ranges(zone);
|
||||||
CharacterRange::Canonicalize(ranges);
|
CharacterRange::Canonicalize(ranges);
|
||||||
|
|
||||||
@ -1269,14 +1263,11 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// A new list with ascending entries. Each entry is a code unit
|
// Generate a flat list of range boundaries for consumption by
|
||||||
// where there is a boundary between code units that are part of
|
// GenerateBranches. See the comment on that function for how the list should
|
||||||
// the class and code units that are not. Normally we insert an
|
// be structured
|
||||||
// entry at zero which goes to the failure label, but if there
|
|
||||||
// was already one there we fall through for success on that entry.
|
|
||||||
// Subsequent entries have alternating meaning (success/failure).
|
|
||||||
ZoneList<base::uc32>* range_boundaries =
|
ZoneList<base::uc32>* range_boundaries =
|
||||||
zone->New<ZoneList<base::uc32>>(last_valid_range, zone);
|
zone->New<ZoneList<base::uc32>>(last_valid_range * 2, zone);
|
||||||
|
|
||||||
bool zeroth_entry_is_failure = !cc->is_negated();
|
bool zeroth_entry_is_failure = !cc->is_negated();
|
||||||
|
|
||||||
@ -1288,6 +1279,7 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
|||||||
} else {
|
} else {
|
||||||
range_boundaries->Add(range.from(), zone);
|
range_boundaries->Add(range.from(), zone);
|
||||||
}
|
}
|
||||||
|
// `+ 1` to convert from inclusive to exclusive `to`.
|
||||||
range_boundaries->Add(range.to() + 1, zone);
|
range_boundaries->Add(range.to() + 1, zone);
|
||||||
}
|
}
|
||||||
int end_index = range_boundaries->length() - 1;
|
int end_index = range_boundaries->length() - 1;
|
||||||
@ -1306,6 +1298,8 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
|
|||||||
macro_assembler->Bind(&fall_through);
|
macro_assembler->Bind(&fall_through);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
RegExpNode::~RegExpNode() = default;
|
RegExpNode::~RegExpNode() = default;
|
||||||
|
|
||||||
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
|
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
|
||||||
@ -1393,8 +1387,10 @@ void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
|
|||||||
return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
|
return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Takes the left-most 1-bit and smears it out, setting all bits to its right.
|
// Takes the left-most 1-bit and smears it out, setting all bits to its right.
|
||||||
static inline uint32_t SmearBitsRight(uint32_t v) {
|
inline uint32_t SmearBitsRight(uint32_t v) {
|
||||||
v |= v >> 1;
|
v |= v >> 1;
|
||||||
v |= v >> 2;
|
v |= v >> 2;
|
||||||
v |= v >> 4;
|
v |= v >> 4;
|
||||||
@ -1403,6 +1399,8 @@ static inline uint32_t SmearBitsRight(uint32_t v) {
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
bool QuickCheckDetails::Rationalize(bool asc) {
|
bool QuickCheckDetails::Rationalize(bool asc) {
|
||||||
bool found_useful_op = false;
|
bool found_useful_op = false;
|
||||||
const uint32_t char_mask = CharMask(asc);
|
const uint32_t char_mask = CharMask(asc);
|
||||||
@ -1839,7 +1837,9 @@ bool RangeContainsLatin1Equivalents(CharacterRange range) {
|
|||||||
range.Contains(0x0178);
|
range.Contains(0x0178);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
|
namespace {
|
||||||
|
|
||||||
|
bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
|
||||||
for (int i = 0; i < ranges->length(); i++) {
|
for (int i = 0; i < ranges->length(); i++) {
|
||||||
// TODO(dcarney): this could be a lot more efficient.
|
// TODO(dcarney): this could be a lot more efficient.
|
||||||
if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
|
if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
|
||||||
@ -1847,6 +1847,8 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
RegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
|
RegExpNode* TextNode::FilterOneByte(int depth, RegExpFlags flags) {
|
||||||
if (info()->replacement_calculated) return replacement();
|
if (info()->replacement_calculated) return replacement();
|
||||||
if (depth < 0) return this;
|
if (depth < 0) return this;
|
||||||
@ -2266,18 +2268,22 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
on_success()->Emit(compiler, trace);
|
on_success()->Emit(compiler, trace);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
|
namespace {
|
||||||
|
|
||||||
|
bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
|
||||||
if (quick_check == nullptr) return false;
|
if (quick_check == nullptr) return false;
|
||||||
if (offset >= quick_check->characters()) return false;
|
if (offset >= quick_check->characters()) return false;
|
||||||
return quick_check->positions(offset)->determines_perfectly;
|
return quick_check->positions(offset)->determines_perfectly;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void UpdateBoundsCheck(int index, int* checked_up_to) {
|
void UpdateBoundsCheck(int index, int* checked_up_to) {
|
||||||
if (index > *checked_up_to) {
|
if (index > *checked_up_to) {
|
||||||
*checked_up_to = index;
|
*checked_up_to = index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// We call this repeatedly to generate code for each pass over the text node.
|
// We call this repeatedly to generate code for each pass over the text node.
|
||||||
// The passes are in increasing order of difficulty because we hope one
|
// The passes are in increasing order of difficulty because we hope one
|
||||||
// of the first passes will fail in which case we are saved the work of the
|
// of the first passes will fail in which case we are saved the work of the
|
||||||
@ -2398,11 +2404,10 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
|
|||||||
bool read_backward,
|
bool read_backward,
|
||||||
RegExpNode* on_success) {
|
RegExpNode* on_success) {
|
||||||
DCHECK_NOT_NULL(ranges);
|
DCHECK_NOT_NULL(ranges);
|
||||||
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(1, zone);
|
// TODO(jgruber): There's no fundamental need to create this
|
||||||
elms->Add(
|
// RegExpCharacterClass; we could refactor to avoid the allocation.
|
||||||
TextElement::CharClass(zone->New<RegExpCharacterClass>(zone, ranges)),
|
return zone->New<TextNode>(zone->New<RegExpCharacterClass>(zone, ranges),
|
||||||
zone);
|
read_backward, on_success);
|
||||||
return zone->New<TextNode>(elms, read_backward, on_success);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
|
||||||
|
@ -971,8 +971,9 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
// Currently only used in an DCHECK.
|
namespace {
|
||||||
static bool IsSpecialClassEscape(base::uc32 c) {
|
|
||||||
|
bool IsSpecialClassEscape(base::uc32 c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'd':
|
case 'd':
|
||||||
case 'D':
|
case 'D':
|
||||||
@ -985,6 +986,8 @@ static bool IsSpecialClassEscape(base::uc32 c) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// In order to know whether an escape is a backreference or not we have to scan
|
// In order to know whether an escape is a backreference or not we have to scan
|
||||||
|
@ -154,9 +154,11 @@ bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) {
|
|||||||
return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp);
|
return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
// Identifies the sort of regexps where the regexp engine is faster
|
// Identifies the sort of regexps where the regexp engine is faster
|
||||||
// than the code used for atom matches.
|
// than the code used for atom matches.
|
||||||
static bool HasFewDifferentCharacters(Handle<String> pattern) {
|
bool HasFewDifferentCharacters(Handle<String> pattern) {
|
||||||
int length = std::min(kMaxLookaheadForBoyerMoore, pattern->length());
|
int length = std::min(kMaxLookaheadForBoyerMoore, pattern->length());
|
||||||
if (length <= kPatternTooShortForBoyerMoore) return false;
|
if (length <= kPatternTooShortForBoyerMoore) return false;
|
||||||
const int kMod = 128;
|
const int kMod = 128;
|
||||||
@ -176,6 +178,8 @@ static bool HasFewDifferentCharacters(Handle<String> pattern) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// Generic RegExp methods. Dispatches to implementation specific methods.
|
// Generic RegExp methods. Dispatches to implementation specific methods.
|
||||||
|
|
||||||
// static
|
// static
|
||||||
@ -332,9 +336,11 @@ void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
|
|||||||
re, pattern, JSRegExp::AsJSRegExpFlags(flags), match_pattern);
|
re, pattern, JSRegExp::AsJSRegExpFlags(flags), match_pattern);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetAtomLastCapture(Isolate* isolate,
|
namespace {
|
||||||
Handle<RegExpMatchInfo> last_match_info,
|
|
||||||
String subject, int from, int to) {
|
void SetAtomLastCapture(Isolate* isolate,
|
||||||
|
Handle<RegExpMatchInfo> last_match_info, String subject,
|
||||||
|
int from, int to) {
|
||||||
SealHandleScope shs(isolate);
|
SealHandleScope shs(isolate);
|
||||||
last_match_info->SetNumberOfCaptureRegisters(2);
|
last_match_info->SetNumberOfCaptureRegisters(2);
|
||||||
last_match_info->SetLastSubject(subject);
|
last_match_info->SetLastSubject(subject);
|
||||||
@ -343,6 +349,8 @@ static void SetAtomLastCapture(Isolate* isolate,
|
|||||||
last_match_info->SetCapture(1, to);
|
last_match_info->SetCapture(1, to);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
|
int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
|
||||||
Handle<String> subject, int index, int32_t* output,
|
Handle<String> subject, int index, int32_t* output,
|
||||||
int output_size) {
|
int output_size) {
|
||||||
|
Loading…
Reference in New Issue
Block a user