Implement $ for non-multiline.

Review URL: http://codereview.chromium.org/10992

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@862 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2008-11-28 08:53:53 +00:00
parent 2b72eeedfb
commit 4722a886ec
16 changed files with 195 additions and 71 deletions

View File

@ -247,6 +247,16 @@ void IrregexpAssembler::CheckNotBackReferenceNoCase(int capture_index,
}
void IrregexpAssembler::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
Emit(BC_CHECK_NOT_REGS_EQUAL);
Emit(reg1);
Emit(reg2);
EmitOrLink(on_not_equal);
}
void IrregexpAssembler::CheckRegister(int byte_code,
int reg_index,
uint16_t vs,

View File

@ -103,6 +103,7 @@ class IrregexpAssembler {
// on_mismatch label will never be called.
void CheckNotBackReference(int capture_index, Label* on_mismatch);
void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch);
void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
// Checks a register for strictly-less-than or greater-than-or-equal.
void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than);

View File

@ -58,12 +58,13 @@ V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
V(LOOKUP_MAP1, 26, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 27, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 28, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 29, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 30, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 31, 8) /* check_reg_ge register_index value16 addr32 */
V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;

View File

@ -204,6 +204,7 @@ DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful with --irregexp")
DEFINE_bool(attempt_multiline_irregexp, false, "attempt to use Irregexp for multiline regexps")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")

View File

@ -323,6 +323,13 @@ static bool RawMatch(const byte* code_base,
pc = code_base + Load32(new_pc);
break;
}
BYTECODE(CHECK_NOT_REGS_EQUAL)
if (registers[pc[1]] == registers[pc[2]]) {
pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
} else {
pc = code_base + Load32(pc + 3);
}
break;
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[pc[1]];
int len = registers[pc[1] + 1] - from;

View File

@ -992,8 +992,7 @@ bool RegExpNode::GoTo(RegExpCompiler* compiler) {
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
info_.follows_start_interest ||
info_.at_end) {
info_.follows_start_interest) {
return false;
}
if (label_.is_bound()) {
@ -1014,25 +1013,17 @@ bool RegExpNode::GoTo(RegExpCompiler* compiler) {
}
// EndNodes are special. Because they can be very common and they are very
// short we normally inline them. That is, if we are asked to emit a GoTo
// we just emit the entire node. Since they don't have successors this
// works.
bool EndNode::GoTo(RegExpCompiler* compiler) {
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
info()->follows_start_interest ||
info()->at_end) {
info()->follows_start_interest) {
return false;
}
if (!label()->is_bound()) {
Bind(compiler->macro_assembler());
}
switch (action_) {
case ACCEPT:
compiler->macro_assembler()->Succeed();
break;
case BACKTRACK:
compiler->macro_assembler()->Backtrack();
break;
}
return true;
return Emit(compiler);
}
@ -1045,11 +1036,20 @@ bool EndNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro = compiler->macro_assembler();
switch (action_) {
case ACCEPT:
Bind(macro);
if (!label()->is_bound()) Bind(macro);
if (info()->at_end) {
Label succeed;
// LoadCurrentCharacter will go to the label if we are at the end of the
// input string.
macro->LoadCurrentCharacter(0, &succeed);
macro->Backtrack();
macro->Bind(&succeed);
}
macro->Succeed();
return true;
case BACKTRACK:
Bind(macro);
if (!label()->is_bound()) Bind(macro);
ASSERT(!info()->at_end);
macro->Backtrack();
return true;
}
@ -1088,13 +1088,6 @@ ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
}
ActionNode* ActionNode::SavePosition(int reg, RegExpNode* on_success) {
ActionNode* result = new ActionNode(SAVE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
return result;
}
ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
@ -1102,16 +1095,27 @@ ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
}
ActionNode* ActionNode::BeginSubmatch(int reg, RegExpNode* on_success) {
ActionNode* ActionNode::BeginSubmatch(int stack_reg,
int position_reg,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(BEGIN_SUBMATCH, on_success);
result->data_.u_submatch_stack_pointer_register.reg = reg;
result->data_.u_submatch.stack_pointer_register = stack_reg;
result->data_.u_submatch.current_position_register = position_reg;
return result;
}
ActionNode* ActionNode::EscapeSubmatch(int reg, RegExpNode* on_success) {
ActionNode* ActionNode::EscapeSubmatch(int stack_reg,
bool restore_position,
int position_reg,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success);
result->data_.u_submatch_stack_pointer_register.reg = reg;
result->data_.u_submatch.stack_pointer_register = stack_reg;
if (restore_position) {
result->data_.u_submatch.current_position_register = position_reg;
} else {
result->data_.u_submatch.current_position_register = -1;
}
return result;
}
@ -1320,7 +1324,12 @@ bool TextNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Bind(macro_assembler);
int element_count = elms_->length();
ASSERT(element_count != 0);
int cp_offset = 0;
if (info()->at_end) {
macro_assembler->Backtrack();
return true;
}
// First, handle straight character matches.
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
@ -1481,21 +1490,31 @@ bool ActionNode::Emit(RegExpCompiler* compiler) {
macro->Backtrack();
break;
}
case SAVE_POSITION:
macro->WriteCurrentPositionToRegister(
data_.u_position_register.reg);
break;
case RESTORE_POSITION:
macro->ReadCurrentPositionFromRegister(
data_.u_position_register.reg);
break;
case BEGIN_SUBMATCH:
macro->WriteCurrentPositionToRegister(
data_.u_submatch.current_position_register);
macro->WriteStackPointerToRegister(
data_.u_submatch_stack_pointer_register.reg);
data_.u_submatch.stack_pointer_register);
break;
case ESCAPE_SUBMATCH:
if (info()->at_end) {
Label at_end;
// Load current character jumps to the label if we are beyond the string
// end.
macro->LoadCurrentCharacter(0, &at_end);
macro->Backtrack();
macro->Bind(&at_end);
}
if (data_.u_submatch.current_position_register != -1) {
macro->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register);
}
macro->ReadStackPointerFromRegister(
data_.u_submatch_stack_pointer_register.reg);
data_.u_submatch.stack_pointer_register);
break;
default:
UNREACHABLE();
@ -1513,10 +1532,16 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler) {
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
if (compiler->ignore_case()) {
macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
if (info()->at_end) {
// If we are constrained to match at the end of the input then succeed
// iff the back reference is empty.
macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
if (compiler->ignore_case()) {
macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
}
}
return on_success()->GoTo(compiler);
}
@ -1751,16 +1776,13 @@ void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add("label=\"$%i:=$pos\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::SAVE_POSITION:
stream()->Add("label=\"$%i:=$pos\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::RESTORE_POSITION:
stream()->Add("label=\"$pos:=$%i\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::BEGIN_SUBMATCH:
stream()->Add("label=\"begin\", shape=septagon");
stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
that->data_.u_submatch.current_position_register);
break;
case ActionNode::ESCAPE_SUBMATCH:
stream()->Add("label=\"escape\", shape=septagon");
@ -1991,15 +2013,15 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
// fail
return ActionNode::BeginSubmatch(
stack_pointer_register,
ActionNode::SavePosition(
position_register,
body()->ToNode(
compiler,
ActionNode::RestorePosition(
position_register,
ActionNode::EscapeSubmatch(stack_pointer_register,
on_success)),
on_failure)));
position_register,
body()->ToNode(
compiler,
ActionNode::EscapeSubmatch(
stack_pointer_register,
true, // Also restore input position.
position_register,
on_success),
on_failure));
} else {
// begin submatch scope
// try
@ -2018,14 +2040,16 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
on_success));
RegExpNode* body_node = body()->ToNode(
compiler,
ActionNode::EscapeSubmatch(stack_pointer_register, on_failure),
ActionNode::EscapeSubmatch(stack_pointer_register,
false, // Don't also restore position
0, // Unused arguments.
on_failure),
compiler->backtrack());
GuardedAlternative body_alt(body_node);
try_node->AddAlternative(body_alt);
return ActionNode::BeginSubmatch(stack_pointer_register,
ActionNode::SavePosition(
position_register,
try_node));
position_register,
try_node);
}
}
@ -2270,7 +2294,9 @@ RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
ActionNode* action = new ActionNode(*this);
action->info()->AddFromPreceding(&full_info);
AddSibling(action);
action->set_on_success(action->on_success()->PropagateForward(info));
if (type_ != ESCAPE_SUBMATCH) {
action->set_on_success(action->on_success()->PropagateForward(info));
}
return action;
}
@ -2292,6 +2318,9 @@ RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
if (!choice->on_failure_->IsBacktrack()) {
choice->on_failure_ = choice->on_failure_->PropagateForward(info);
}
return choice;
}
@ -2302,7 +2331,21 @@ RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
RegExpNode* sibling = GetSibling(&full_info);
if (sibling != NULL) return sibling;
EnsureSiblings();
BackReferenceNode* back_ref = new BackReferenceNode(*this);
back_ref->info()->AddFromPreceding(&full_info);
AddSibling(back_ref);
// TODO(erikcorry): A back reference has to have two successors (by default
// the same node). The first is used if the back reference matches a non-
// empty back reference, the second if it matches an empty one. This doesn't
// matter for at_end, which is the only one implemented right now, but it will
// matter for other pieces of info.
back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
return back_ref;
}
@ -2672,6 +2715,10 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
return Handle<FixedArray>::null();
}
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
if (FLAG_irregexp_native) {
#ifdef ARM
UNIMPLEMENTED();

View File

@ -576,7 +576,6 @@ class ActionNode: public SeqRegExpNode {
STORE_REGISTER,
INCREMENT_REGISTER,
STORE_POSITION,
SAVE_POSITION,
RESTORE_POSITION,
BEGIN_SUBMATCH,
ESCAPE_SUBMATCH
@ -584,10 +583,14 @@ class ActionNode: public SeqRegExpNode {
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* SavePosition(int reg, RegExpNode* on_success);
static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int reg, RegExpNode* on_success);
static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* EscapeSubmatch(int stack_pointer_reg,
bool and_restore_position,
int restore_reg,
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info);
@ -604,8 +607,9 @@ class ActionNode: public SeqRegExpNode {
int reg;
} u_position_register;
struct {
int reg;
} u_submatch_stack_pointer_register;
int stack_pointer_register;
int current_position_register;
} u_submatch;
} data_;
ActionNode(Type type, RegExpNode* on_success)
: SeqRegExpNode(on_success),

View File

@ -236,6 +236,16 @@ void RegExpMacroAssemblerIA32::CheckNotBackReference(
}
void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
__ mov(eax, register_location(reg1));
__ mov(ecx, register_location(reg2));
__ cmp(ecx, Operand(eax));
BranchOrBacktrack(not_equal, on_not_equal);
}
void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
__ cmp(edx, c);
BranchOrBacktrack(not_equal, on_not_equal);

View File

@ -52,6 +52,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,

View File

@ -206,6 +206,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
}
void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
assembler_->CheckNotRegistersEqual(reg1, reg2, on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start,
Label* bitmap,
Label* on_zero) {

View File

@ -67,6 +67,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);

View File

@ -194,6 +194,18 @@ void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, on_no_match);
}
void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
int reg2,
Label* on_not_equal) {
PrintF(" CheckNotRegistersEqual(reg1=%d, reg2=%d, label[%08x]);\n",
reg1,
reg2,
on_not_equal);
assembler_->CheckNotRegistersEqual(reg1, reg2, on_not_equal);
}
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {

View File

@ -54,6 +54,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
Label* on_no_match);
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c,
uc16 or_with,

View File

@ -93,6 +93,7 @@ class RegExpMacroAssembler {
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
uc16 minus_then_or_with,
Label* on_not_equal) = 0;
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal) = 0;
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(

View File

@ -1226,5 +1226,5 @@ TEST(CharacterRangeCaseIndependence) {
TEST(Graph) {
V8::Initialize(NULL);
Execute("(?:foo|bar$)", false, true);
Execute("foo$(?!bar)", false, true);
}

View File

@ -244,3 +244,23 @@ assertEquals("bar$00", "foox".replace(re, "bar$00"), "$00");
assertEquals("bar$000", "foox".replace(re, "bar$000"), "$000");
assertEquals("barx", "foox".replace(re, "bar$01"), "$01 2");
assertEquals("barx5", "foox".replace(re, "bar$15"), "$15");
assertFalse(/()foo$\1/.test("football"), "football1");
assertFalse(/foo$(?=ball)/.test("football"), "football2");
assertFalse(/foo$(?!bar)/.test("football"), "football3");
assertTrue(/()foo$\1/.test("foo"), "football4");
assertTrue(/foo$(?=(ball)?)/.test("foo"), "football5");
assertTrue(/()foo$(?!bar)/.test("foo"), "football6");
assertFalse(/(x?)foo$\1/.test("football"), "football7");
assertFalse(/foo$(?=ball)/.test("football"), "football8");
assertFalse(/foo$(?!bar)/.test("football"), "football9");
assertTrue(/(x?)foo$\1/.test("foo"), "football10");
assertTrue(/foo$(?=(ball)?)/.test("foo"), "football11");
assertTrue(/foo$(?!bar)/.test("foo"), "football12");
// Check that the back reference has two successors. See
// BackReferenceNode::PropagateForward.
assertFalse(/f(o)\b\1/.test('foo'));
assertTrue(/f(o)\B\1/.test('foo'));
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");