From c956219ef42bc46761dcca209e07d7aee684fc9a Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Mon, 26 Jan 2009 13:04:49 +0000 Subject: [PATCH] * Remember to check for end of string even where we know the character class must match. Thanks to Mads and Christian for finding this bug Review URL: http://codereview.chromium.org/18750 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1150 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 9 +++++---- src/regexp-macro-assembler-ia32.h | 7 +++---- src/regexp-macro-assembler.h | 6 ++++++ test/mjsunit/regexp.js | 4 ++++ 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 14cab72bcb..f1daf81c87 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -1835,6 +1835,9 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, // ASCII optimizations for us. macro_assembler->GoTo(on_failure); } + if (check_offset) { + macro_assembler->CheckPosition(cp_offset, on_failure); + } return; } @@ -1842,10 +1845,8 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, !cc->is_negated() && ranges->at(0).IsEverything(max_char)) { // This is a common case hit by non-anchored expressions. - // TODO(erikcorry): We should have a macro assembler instruction that just - // checks for end of string without loading the character. if (check_offset) { - macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); + macro_assembler->CheckPosition(cp_offset, on_failure); } return; } @@ -2477,7 +2478,7 @@ bool AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { switch (type_) { case AT_END: { Label ok; - assembler->LoadCurrentCharacter(trace->cp_offset(), &ok); + assembler->CheckPosition(trace->cp_offset(), &ok); assembler->GoTo(trace->backtrack()); assembler->Bind(&ok); break; diff --git a/src/regexp-macro-assembler-ia32.h b/src/regexp-macro-assembler-ia32.h index dd74c655f7..116b03db48 100644 --- a/src/regexp-macro-assembler-ia32.h +++ b/src/regexp-macro-assembler-ia32.h @@ -71,6 +71,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { uc16 minus, uc16 mask, Label* on_not_equal); + // Checks whether the given offset from the current position is before + // the end of the string. + virtual void CheckPosition(int cp_offset, Label* on_outside_input); virtual bool CheckSpecialCharacterClass(uc16 type, int cp_offset, bool check_offset, @@ -171,10 +174,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { // This function must not trigger a garbage collection. static Address GrowStack(Address stack_top); - // Checks whether the given offset from the current position is before - // the end of the string. - void CheckPosition(int cp_offset, Label* on_outside_input); - // The ebp-relative location of a regexp register. Operand register_location(int register_index); diff --git a/src/regexp-macro-assembler.h b/src/regexp-macro-assembler.h index 74133d587e..0d27fac4bd 100644 --- a/src/regexp-macro-assembler.h +++ b/src/regexp-macro-assembler.h @@ -111,6 +111,12 @@ class RegExpMacroAssembler { virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal) = 0; + + // Checks whether the given offset from the current position is before + // the end of the string. May overwrite the current character. + virtual void CheckPosition(int cp_offset, Label* on_outside_input) { + LoadCurrentCharacter(cp_offset, on_outside_input, true); + } // Check whether a standard/default character class matches the current // character. Returns false if the type of special character class does // not have custom support. diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js index 716b54be04..705754bef1 100644 --- a/test/mjsunit/regexp.js +++ b/test/mjsunit/regexp.js @@ -332,3 +332,7 @@ assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5'); assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6'); assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7'); assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8'); + + +// Don't hang on this one. +/[^\xfe-\xff]*/.test("");