Switch regexp strategy for regexps that are just plain
strings with a small alphabet. We already have code that handles these regexps well, we were just not always activating it. Review URL: https://chromiumcodereview.appspot.com/9959096 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11218 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
d37104f6bc
commit
965fea65c2
@ -108,6 +108,36 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// More makes code generation slower, less makes V8 benchmark score lower.
|
||||||
|
const int kMaxLookaheadForBoyerMoore = 8;
|
||||||
|
// In a 3-character pattern you can maximally step forwards 3 characters
|
||||||
|
// at a time, which is not always enough to pay for the extra logic.
|
||||||
|
const int kPatternTooShortForBoyerMoore = 2;
|
||||||
|
|
||||||
|
|
||||||
|
// Identifies the sort of regexps where the regexp engine is faster
|
||||||
|
// than the code used for atom matches.
|
||||||
|
static bool HasFewDifferentCharacters(Handle<String> pattern) {
|
||||||
|
int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
|
||||||
|
if (length <= kPatternTooShortForBoyerMoore) return false;
|
||||||
|
const int kMod = 128;
|
||||||
|
bool character_found[kMod];
|
||||||
|
int different = 0;
|
||||||
|
memset(&character_found[0], 0, sizeof(character_found));
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
int ch = (pattern->Get(i) & (kMod - 1));
|
||||||
|
if (!character_found[ch]) {
|
||||||
|
character_found[ch] = true;
|
||||||
|
different++;
|
||||||
|
// We declare a regexp low-alphabet if it has at least 3 times as many
|
||||||
|
// characters as it has different characters.
|
||||||
|
if (different * 3 > length) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Generic RegExp methods. Dispatches to implementation specific methods.
|
// Generic RegExp methods. Dispatches to implementation specific methods.
|
||||||
|
|
||||||
|
|
||||||
@ -141,9 +171,14 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
|
|||||||
return Handle<Object>::null();
|
return Handle<Object>::null();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (parse_result.simple && !flags.is_ignore_case()) {
|
bool has_been_compiled = false;
|
||||||
|
|
||||||
|
if (parse_result.simple &&
|
||||||
|
!flags.is_ignore_case() &&
|
||||||
|
!HasFewDifferentCharacters(pattern)) {
|
||||||
// Parse-tree is a single atom that is equal to the pattern.
|
// Parse-tree is a single atom that is equal to the pattern.
|
||||||
AtomCompile(re, pattern, flags, pattern);
|
AtomCompile(re, pattern, flags, pattern);
|
||||||
|
has_been_compiled = true;
|
||||||
} else if (parse_result.tree->IsAtom() &&
|
} else if (parse_result.tree->IsAtom() &&
|
||||||
!flags.is_ignore_case() &&
|
!flags.is_ignore_case() &&
|
||||||
parse_result.capture_count == 0) {
|
parse_result.capture_count == 0) {
|
||||||
@ -151,8 +186,12 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
|
|||||||
Vector<const uc16> atom_pattern = atom->data();
|
Vector<const uc16> atom_pattern = atom->data();
|
||||||
Handle<String> atom_string =
|
Handle<String> atom_string =
|
||||||
isolate->factory()->NewStringFromTwoByte(atom_pattern);
|
isolate->factory()->NewStringFromTwoByte(atom_pattern);
|
||||||
AtomCompile(re, pattern, flags, atom_string);
|
if (!HasFewDifferentCharacters(atom_string)) {
|
||||||
} else {
|
AtomCompile(re, pattern, flags, atom_string);
|
||||||
|
has_been_compiled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!has_been_compiled) {
|
||||||
IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
|
IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
|
||||||
}
|
}
|
||||||
ASSERT(re->data()->IsFixedArray());
|
ASSERT(re->data()->IsFixedArray());
|
||||||
@ -3429,6 +3468,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Code generation for choice nodes.
|
/* Code generation for choice nodes.
|
||||||
*
|
*
|
||||||
* We generate quick checks that do a mask and compare to eliminate a
|
* We generate quick checks that do a mask and compare to eliminate a
|
||||||
@ -3507,7 +3547,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
|
|||||||
* \______________/
|
* \______________/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||||
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
|
||||||
int choice_count = alternatives_->length();
|
int choice_count = alternatives_->length();
|
||||||
@ -3578,9 +3617,6 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
|||||||
|
|
||||||
bool skip_was_emitted = false;
|
bool skip_was_emitted = false;
|
||||||
|
|
||||||
// More makes code generation slower, less makes V8 benchmark score lower.
|
|
||||||
const int kMaxLookaheadForBoyerMoore = 8;
|
|
||||||
|
|
||||||
if (!greedy_loop && choice_count == 2) {
|
if (!greedy_loop && choice_count == 2) {
|
||||||
GuardedAlternative alt1 = alternatives_->at(1);
|
GuardedAlternative alt1 = alternatives_->at(1);
|
||||||
if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
|
if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
|
||||||
|
Loading…
Reference in New Issue
Block a user