Switch regexp strategy for regexps that are just plain

strings with a small alphabet.  We already have code
that handles these regexps well, we were just not always
activating it.
Review URL: https://chromiumcodereview.appspot.com/9959096

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11218 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2012-04-03 12:24:55 +00:00
parent d37104f6bc
commit 965fea65c2

View File

@ -108,6 +108,36 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
} }
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
// In a 3-character pattern you can maximally step forwards 3 characters
// at a time, which is not always enough to pay for the extra logic.
const int kPatternTooShortForBoyerMoore = 2;
// Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) {
int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
if (length <= kPatternTooShortForBoyerMoore) return false;
const int kMod = 128;
bool character_found[kMod];
int different = 0;
memset(&character_found[0], 0, sizeof(character_found));
for (int i = 0; i < length; i++) {
int ch = (pattern->Get(i) & (kMod - 1));
if (!character_found[ch]) {
character_found[ch] = true;
different++;
// We declare a regexp low-alphabet if it has at least 3 times as many
// characters as it has different characters.
if (different * 3 > length) return false;
}
}
return true;
}
// Generic RegExp methods. Dispatches to implementation specific methods. // Generic RegExp methods. Dispatches to implementation specific methods.
@ -141,9 +171,14 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
return Handle<Object>::null(); return Handle<Object>::null();
} }
if (parse_result.simple && !flags.is_ignore_case()) { bool has_been_compiled = false;
if (parse_result.simple &&
!flags.is_ignore_case() &&
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern. // Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern); AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
} else if (parse_result.tree->IsAtom() && } else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() && !flags.is_ignore_case() &&
parse_result.capture_count == 0) { parse_result.capture_count == 0) {
@ -151,8 +186,12 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Vector<const uc16> atom_pattern = atom->data(); Vector<const uc16> atom_pattern = atom->data();
Handle<String> atom_string = Handle<String> atom_string =
isolate->factory()->NewStringFromTwoByte(atom_pattern); isolate->factory()->NewStringFromTwoByte(atom_pattern);
AtomCompile(re, pattern, flags, atom_string); if (!HasFewDifferentCharacters(atom_string)) {
} else { AtomCompile(re, pattern, flags, atom_string);
has_been_compiled = true;
}
}
if (!has_been_compiled) {
IrregexpInitialize(re, pattern, flags, parse_result.capture_count); IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
} }
ASSERT(re->data()->IsFixedArray()); ASSERT(re->data()->IsFixedArray());
@ -3429,6 +3468,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
return true; return true;
} }
/* Code generation for choice nodes. /* Code generation for choice nodes.
* *
* We generate quick checks that do a mask and compare to eliminate a * We generate quick checks that do a mask and compare to eliminate a
@ -3507,7 +3547,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
* \______________/ * \______________/
*/ */
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length(); int choice_count = alternatives_->length();
@ -3578,9 +3617,6 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
bool skip_was_emitted = false; bool skip_was_emitted = false;
// More makes code generation slower, less makes V8 benchmark score lower.
const int kMaxLookaheadForBoyerMoore = 8;
if (!greedy_loop && choice_count == 2) { if (!greedy_loop && choice_count == 2) {
GuardedAlternative alt1 = alternatives_->at(1); GuardedAlternative alt1 = alternatives_->at(1);
if (alt1.guards() == NULL || alt1.guards()->length() == 0) { if (alt1.guards() == NULL || alt1.guards()->length() == 0) {