Some irregexp optimizations around keeping track of when the current character

register contains the next n characters.
Review URL: http://codereview.chromium.org/16410

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1014 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2008-12-22 12:48:14 +00:00
parent ed7d9e9c5b
commit 16852b987d
2 changed files with 38 additions and 13 deletions

View File

@ -1423,7 +1423,8 @@ bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
cp_offset_ != 0 ||
backtrack() != NULL ||
characters_preloaded_ != 0 ||
quick_check_performed_.characters() != 0);
quick_check_performed_.characters() != 0 ||
bound_checked_up_to_ != 0);
if (actions_ == NULL && backtrack() == NULL) {
// Here we just have some deferred cp advances to fix and we are back to
@ -1647,16 +1648,23 @@ static inline bool EmitAtomNonLetter(
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
bool ascii,
uc16 c1,
uc16 c2,
Label* on_failure) {
uc16 char_mask;
if (ascii) {
char_mask = String::kMaxAsciiCharCode;
} else {
char_mask = String::kMaxUC16CharCode;
}
uc16 exor = c1 ^ c2;
// Check whether exor has only one bit set.
if (((exor - 1) & exor) == 0) {
// If c1 and c2 differ only by one bit.
// Ecma262UnCanonicalize always gives the highest number last.
ASSERT(c2 > c1);
uc16 mask = String::kMaxUC16CharCode ^ exor;
uc16 mask = char_mask ^ exor;
macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
return true;
}
@ -1667,7 +1675,7 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
// subtract the difference from the found character, then do the or
// trick. We avoid the theoretical case where negative numbers are
// involved in order to simplify code generation.
uc16 mask = String::kMaxUC16CharCode ^ diff;
uc16 mask = char_mask ^ diff;
macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
diff,
mask,
@ -1682,6 +1690,7 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
// matches.
static inline bool EmitAtomLetter(
RegExpMacroAssembler* macro_assembler,
bool ascii,
uc16 c,
Label* on_failure,
int cp_offset,
@ -1700,6 +1709,7 @@ static inline bool EmitAtomLetter(
switch (length) {
case 2: {
if (ShortCutEmitCharacterPair(macro_assembler,
ascii,
chars[0],
chars[1],
on_failure)) {
@ -2007,6 +2017,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
char_mask = String::kMaxUC16CharCode;
}
if ((mask & char_mask) == char_mask) need_mask = false;
mask &= char_mask;
} else {
// For 2-character preloads in ASCII mode we also use a 16 bit load with
// zero extend.
@ -2323,6 +2334,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
ASSERT(compiler->ignore_case());
bound_checked = EmitAtomLetter(assembler,
compiler->ascii(),
quarks[j],
backtrack,
cp_offset + j,
@ -2403,9 +2415,7 @@ bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
bool first_elt_done = false;
int bound_checked_to = variant->cp_offset() - 1;
QuickCheckDetails* quick_check = variant->quick_check_performed();
bound_checked_to += Max(quick_check->characters(),
variant->characters_preloaded());
bound_checked_to += variant->bound_checked_up_to();
// If a character is preloaded into the current character register then
// check that now.
@ -2472,6 +2482,7 @@ void GenerationVariant::AdvanceVariant(int by, bool ascii) {
// characters by means of mask and compare.
quick_check_performed_.Advance(by, ascii);
cp_offset_ += by;
bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
}
@ -2779,8 +2790,9 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
int first_normal_choice = greedy_loop ? 1 : 0;
int preload_characters = CalculatePreloadCharacters(compiler);
bool preload_is_current = false;
bool preload_has_checked_bounds = false;
bool preload_is_current =
(current_variant->characters_preloaded() == preload_characters);
bool preload_has_checked_bounds = preload_is_current;
AlternativeGenerationList alt_gens(choice_count);
@ -2792,11 +2804,13 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
alt_gen->quick_check_details.set_characters(preload_characters);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
GenerationVariant new_variant(*current_variant);
new_variant.set_characters_preloaded(preload_is_current ?
preload_characters :
0);
if (preload_has_checked_bounds) {
new_variant.set_bound_checked_up_to(preload_characters);
}
new_variant.quick_check_performed()->Clear();
alt_gen->expects_preload = preload_is_current;
bool generate_full_check_inline = false;
@ -2816,19 +2830,25 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
macro_assembler->Bind(&alt_gen->possible_success);
new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
new_variant.set_characters_preloaded(preload_characters);
new_variant.set_bound_checked_up_to(preload_characters);
generate_full_check_inline = true;
}
} else {
// No quick check was generated. Put the full code here.
// If this is not the first choice then there could be slow checks from
// previous cases that go here when they fail. There's no reason to
// insist that they preload characters since the slow check we are about
// to generate probably can't use it.
if (i != first_normal_choice) {
alt_gen->expects_preload = false;
new_variant.set_characters_preloaded(0);
}
if (i < choice_count - 1) {
new_variant.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
}
if (generate_full_check_inline) {
if (preload_is_current) {
new_variant.set_characters_preloaded(preload_characters);
}
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &new_variant);
}

View File

@ -1011,7 +1011,8 @@ class GenerationVariant {
backtrack_(NULL),
stop_node_(NULL),
loop_label_(NULL),
characters_preloaded_(0) { }
characters_preloaded_(0),
bound_checked_up_to_(0) { }
bool Flush(RegExpCompiler* compiler, RegExpNode* successor);
int cp_offset() { return cp_offset_; }
DeferredAction* actions() { return actions_; }
@ -1020,12 +1021,14 @@ class GenerationVariant {
actions_ == NULL &&
cp_offset_ == 0 &&
characters_preloaded_ == 0 &&
bound_checked_up_to_ == 0 &&
quick_check_performed_.characters() == 0;
}
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
int characters_preloaded() { return characters_preloaded_; }
int bound_checked_up_to() { return bound_checked_up_to_; }
QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
bool mentions_reg(int reg);
// These set methods and AdvanceVariant should be used only on new
@ -1040,6 +1043,7 @@ class GenerationVariant {
void set_stop_node(RegExpNode* node) { stop_node_ = node; }
void set_loop_label(Label* label) { loop_label_ = label; }
void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; }
void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
void set_quick_check_performed(QuickCheckDetails* d) {
quick_check_performed_ = *d;
}
@ -1063,6 +1067,7 @@ class GenerationVariant {
RegExpNode* stop_node_;
Label* loop_label_;
int characters_preloaded_;
int bound_checked_up_to_;
QuickCheckDetails quick_check_performed_;
};