Irregexp: Backtrack past look-aheads works correctly.
Allows backtracking to clear registers instead of pushing and popping them to restore state. Redo of 1135 with bug fixed. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1156 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
b7ca939e2f
commit
2de5de495f
14
src/ast.h
14
src/ast.h
@ -1521,9 +1521,15 @@ class RegExpCapture: public RegExpTree {
|
||||
|
||||
class RegExpLookahead: public RegExpTree {
|
||||
public:
|
||||
RegExpLookahead(RegExpTree* body, bool is_positive)
|
||||
RegExpLookahead(RegExpTree* body,
|
||||
bool is_positive,
|
||||
int capture_count,
|
||||
int capture_from)
|
||||
: body_(body),
|
||||
is_positive_(is_positive) { }
|
||||
is_positive_(is_positive),
|
||||
capture_count_(capture_count),
|
||||
capture_from_(capture_from) { }
|
||||
|
||||
virtual void* Accept(RegExpVisitor* visitor, void* data);
|
||||
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success);
|
||||
@ -1535,9 +1541,13 @@ class RegExpLookahead: public RegExpTree {
|
||||
virtual int max_match() { return 0; }
|
||||
RegExpTree* body() { return body_; }
|
||||
bool is_positive() { return is_positive_; }
|
||||
int capture_count() { return capture_count_; }
|
||||
int capture_from() { return capture_from_; }
|
||||
private:
|
||||
RegExpTree* body_;
|
||||
bool is_positive_;
|
||||
int capture_count_;
|
||||
int capture_from_;
|
||||
};
|
||||
|
||||
|
||||
|
187
src/jsregexp.cc
187
src/jsregexp.cc
@ -1361,41 +1361,44 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers) {
|
||||
}
|
||||
|
||||
|
||||
void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,
|
||||
int max_register,
|
||||
OutSet& affected_registers) {
|
||||
// Stay safe and check every half times the limit.
|
||||
// (Round up in case the limit is 1).
|
||||
int push_limit = (assembler->stack_limit_slack() + 1) / 2;
|
||||
for (int reg = 0, pushes = 0; reg <= max_register; reg++) {
|
||||
if (affected_registers.Get(reg)) {
|
||||
pushes++;
|
||||
RegExpMacroAssembler::StackCheckFlag check_stack_limit =
|
||||
(pushes % push_limit) == 0 ?
|
||||
RegExpMacroAssembler::kCheckStackLimit :
|
||||
RegExpMacroAssembler::kNoStackLimitCheck;
|
||||
assembler->PushRegister(reg, check_stack_limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
|
||||
int max_register,
|
||||
OutSet& affected_registers) {
|
||||
OutSet& registers_to_pop,
|
||||
OutSet& registers_to_clear) {
|
||||
for (int reg = max_register; reg >= 0; reg--) {
|
||||
if (affected_registers.Get(reg)) assembler->PopRegister(reg);
|
||||
if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
|
||||
else if (registers_to_clear.Get(reg)) {
|
||||
int clear_to = reg;
|
||||
while (reg > 0 && registers_to_clear.Get(reg - 1)) {
|
||||
reg--;
|
||||
}
|
||||
assembler->ClearRegisters(reg, clear_to);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
int max_register,
|
||||
OutSet& affected_registers) {
|
||||
OutSet& affected_registers,
|
||||
OutSet* registers_to_pop,
|
||||
OutSet* registers_to_clear) {
|
||||
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
|
||||
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
|
||||
|
||||
for (int reg = 0; reg <= max_register; reg++) {
|
||||
if (!affected_registers.Get(reg)) {
|
||||
continue;
|
||||
}
|
||||
// Count pushes performed to force a stack limit check occasionally.
|
||||
int pushes = 0;
|
||||
|
||||
// The chronologically first deferred action in the trace
|
||||
// is used to infer the action needed to restore a register
|
||||
// to its previous state (or not, if it's safe to ignore it).
|
||||
enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
|
||||
DeferredActionUndoType undo_action = IGNORE;
|
||||
|
||||
int value = 0;
|
||||
bool absolute = false;
|
||||
bool clear = false;
|
||||
@ -1410,8 +1413,16 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
case ActionNode::SET_REGISTER: {
|
||||
Trace::DeferredSetRegister* psr =
|
||||
static_cast<Trace::DeferredSetRegister*>(action);
|
||||
value += psr->value();
|
||||
absolute = true;
|
||||
if (!absolute) {
|
||||
value += psr->value();
|
||||
absolute = true;
|
||||
}
|
||||
// SET_REGISTER is currently only used for newly introduced loop
|
||||
// counters. They can have a significant previous value if they
|
||||
// occour in a loop. TODO(lrn): Propagate this information, so
|
||||
// we can set undo_action to IGNORE if we know there is no value to
|
||||
// restore.
|
||||
undo_action = RESTORE;
|
||||
ASSERT_EQ(store_position, -1);
|
||||
ASSERT(!clear);
|
||||
break;
|
||||
@ -1422,6 +1433,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
}
|
||||
ASSERT_EQ(store_position, -1);
|
||||
ASSERT(!clear);
|
||||
undo_action = RESTORE;
|
||||
break;
|
||||
case ActionNode::STORE_POSITION: {
|
||||
Trace::DeferredCapture* pc =
|
||||
@ -1429,6 +1441,19 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
if (!clear && store_position == -1) {
|
||||
store_position = pc->cp_offset();
|
||||
}
|
||||
|
||||
// For captures we know that stores and clears alternate.
|
||||
// Other register, are never cleared, and if the occur
|
||||
// inside a loop, they might be assigned more than once.
|
||||
if (reg <= 1) {
|
||||
// Registers zero and one, aka "capture zero", is
|
||||
// always set correctly if we succeed. There is no
|
||||
// need to undo a setting on backtrack, because we
|
||||
// will set it again or fail.
|
||||
undo_action = IGNORE;
|
||||
} else {
|
||||
undo_action = pc->is_capture() ? CLEAR : RESTORE;
|
||||
}
|
||||
ASSERT(!absolute);
|
||||
ASSERT_EQ(value, 0);
|
||||
break;
|
||||
@ -1437,8 +1462,10 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
// Since we're scanning in reverse order, if we've already
|
||||
// set the position we have to ignore historically earlier
|
||||
// clearing operations.
|
||||
if (store_position == -1)
|
||||
if (store_position == -1) {
|
||||
clear = true;
|
||||
}
|
||||
undo_action = RESTORE;
|
||||
ASSERT(!absolute);
|
||||
ASSERT_EQ(value, 0);
|
||||
break;
|
||||
@ -1449,10 +1476,27 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
|
||||
}
|
||||
}
|
||||
}
|
||||
// Prepare for the undo-action (e.g., push if it's going to be popped).
|
||||
if (undo_action == RESTORE) {
|
||||
pushes++;
|
||||
RegExpMacroAssembler::StackCheckFlag stack_check =
|
||||
RegExpMacroAssembler::kNoStackLimitCheck;
|
||||
if (pushes == push_limit) {
|
||||
stack_check = RegExpMacroAssembler::kCheckStackLimit;
|
||||
pushes = 0;
|
||||
}
|
||||
|
||||
assembler->PushRegister(reg, stack_check);
|
||||
registers_to_pop->Set(reg);
|
||||
} else if (undo_action == CLEAR) {
|
||||
registers_to_clear->Set(reg);
|
||||
}
|
||||
// Perform the chronologically last action (or accumulated increment)
|
||||
// for the register.
|
||||
if (store_position != -1) {
|
||||
assembler->WriteCurrentPositionToRegister(reg, store_position);
|
||||
} else if (clear) {
|
||||
assembler->ClearRegister(reg);
|
||||
assembler->ClearRegisters(reg, reg);
|
||||
} else if (absolute) {
|
||||
assembler->SetRegister(reg, value);
|
||||
} else if (value != 0) {
|
||||
@ -1487,9 +1531,15 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
||||
|
||||
// Generate deferred actions here along with code to undo them again.
|
||||
OutSet affected_registers;
|
||||
|
||||
int max_register = FindAffectedRegisters(&affected_registers);
|
||||
PushAffectedRegisters(assembler, max_register, affected_registers);
|
||||
PerformDeferredActions(assembler, max_register, affected_registers);
|
||||
OutSet registers_to_pop;
|
||||
OutSet registers_to_clear;
|
||||
PerformDeferredActions(assembler,
|
||||
max_register,
|
||||
affected_registers,
|
||||
®isters_to_pop,
|
||||
®isters_to_clear);
|
||||
if (backtrack() != NULL) {
|
||||
// Here we have a concrete backtrack location. These are set up by choice
|
||||
// nodes and so they indicate that we have a deferred save of the current
|
||||
@ -1512,7 +1562,10 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
||||
if (backtrack() != NULL) {
|
||||
assembler->PopCurrentPosition();
|
||||
}
|
||||
RestoreAffectedRegisters(assembler, max_register, affected_registers);
|
||||
RestoreAffectedRegisters(assembler,
|
||||
max_register,
|
||||
registers_to_pop,
|
||||
registers_to_clear);
|
||||
if (backtrack() == NULL) {
|
||||
assembler->Backtrack();
|
||||
} else {
|
||||
@ -1524,15 +1577,26 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
|
||||
|
||||
|
||||
bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
if (!trace->is_trivial()) {
|
||||
return trace->Flush(compiler, this);
|
||||
}
|
||||
RegExpMacroAssembler* assembler = compiler->macro_assembler();
|
||||
|
||||
// Omit flushing the trace. We discard the entire stack frame anyway.
|
||||
|
||||
if (!label()->is_bound()) {
|
||||
// We are completely independent of the trace, since we ignore it,
|
||||
// so this code can be used as the generic version.
|
||||
assembler->Bind(label());
|
||||
}
|
||||
|
||||
// Throw away everything on the backtrack stack since the start
|
||||
// of the negative submatch and restore the character position.
|
||||
assembler->ReadCurrentPositionFromRegister(current_position_register_);
|
||||
assembler->ReadStackPointerFromRegister(stack_pointer_register_);
|
||||
if (clear_capture_count_ > 0) {
|
||||
// Clear any captures that might have been performed during the success
|
||||
// of the body of the negative look-ahead.
|
||||
int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
|
||||
assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
|
||||
}
|
||||
// Now that we have unwound the stack we find at the top of the stack the
|
||||
// backtrack that the BeginSubmatch node got.
|
||||
assembler->Backtrack();
|
||||
@ -1588,9 +1652,12 @@ ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
|
||||
}
|
||||
|
||||
|
||||
ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
|
||||
ActionNode* ActionNode::StorePosition(int reg,
|
||||
bool is_capture,
|
||||
RegExpNode* on_success) {
|
||||
ActionNode* result = new ActionNode(STORE_POSITION, on_success);
|
||||
result->data_.u_position_register.reg = reg;
|
||||
result->data_.u_position_register.is_capture = is_capture;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1616,10 +1683,14 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg,
|
||||
|
||||
ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
|
||||
int position_reg,
|
||||
int clear_register_count,
|
||||
int clear_register_from,
|
||||
RegExpNode* on_success) {
|
||||
ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
|
||||
result->data_.u_submatch.stack_pointer_register = stack_reg;
|
||||
result->data_.u_submatch.current_position_register = position_reg;
|
||||
result->data_.u_submatch.clear_register_count = clear_register_count;
|
||||
result->data_.u_submatch.clear_register_from = clear_register_from;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -3172,7 +3243,9 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
switch (type_) {
|
||||
case STORE_POSITION: {
|
||||
Trace::DeferredCapture
|
||||
new_capture(data_.u_position_register.reg, trace);
|
||||
new_capture(data_.u_position_register.reg,
|
||||
data_.u_position_register.is_capture,
|
||||
trace);
|
||||
Trace new_trace = *trace;
|
||||
new_trace.add_action(&new_capture);
|
||||
return on_success()->Emit(compiler, &new_trace);
|
||||
@ -3237,13 +3310,31 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
assembler->Bind(&skip_empty_check);
|
||||
return on_success()->Emit(compiler, trace);
|
||||
}
|
||||
case POSITIVE_SUBMATCH_SUCCESS:
|
||||
case POSITIVE_SUBMATCH_SUCCESS: {
|
||||
if (!trace->is_trivial()) return trace->Flush(compiler, this);
|
||||
assembler->ReadCurrentPositionFromRegister(
|
||||
data_.u_submatch.current_position_register);
|
||||
assembler->ReadStackPointerFromRegister(
|
||||
data_.u_submatch.stack_pointer_register);
|
||||
return on_success()->Emit(compiler, trace);
|
||||
int clear_register_count = data_.u_submatch.clear_register_count;
|
||||
if (clear_register_count == 0) {
|
||||
return on_success()->Emit(compiler, trace);
|
||||
}
|
||||
int clear_registers_from = data_.u_submatch.clear_register_from;
|
||||
Label clear_registers_backtrack;
|
||||
Trace new_trace = *trace;
|
||||
new_trace.set_backtrack(&clear_registers_backtrack);
|
||||
bool ok = on_success()->Emit(compiler, &new_trace);
|
||||
if (!ok) { return false; }
|
||||
|
||||
assembler->Bind(&clear_registers_backtrack);
|
||||
int clear_registers_to = clear_registers_from + clear_register_count - 1;
|
||||
assembler->ClearRegisters(clear_registers_from, clear_registers_to);
|
||||
|
||||
ASSERT(trace->backtrack() == NULL);
|
||||
assembler->Backtrack();
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
@ -3861,7 +3952,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
|
||||
if (body_can_be_empty) {
|
||||
// If the body can be empty we need to store the start position
|
||||
// so we can bail out if it was empty.
|
||||
body_node = ActionNode::StorePosition(body_start_reg, body_node);
|
||||
body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
|
||||
}
|
||||
if (needs_capture_clearing) {
|
||||
// Before entering the body of this loop we need to clear captures.
|
||||
@ -3923,6 +4014,8 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
|
||||
newline_atom,
|
||||
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
|
||||
position_register,
|
||||
0, // No captures inside.
|
||||
-1, // Ignored if no captures.
|
||||
on_success));
|
||||
// Create an end-of-input matcher.
|
||||
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
|
||||
@ -3961,16 +4054,26 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
|
||||
RegExpNode* on_success) {
|
||||
int stack_pointer_register = compiler->AllocateRegister();
|
||||
int position_register = compiler->AllocateRegister();
|
||||
|
||||
const int registers_per_capture = 2;
|
||||
const int register_of_first_capture = 2;
|
||||
int register_count = capture_count_ * registers_per_capture;
|
||||
int register_start =
|
||||
register_of_first_capture + capture_from_ * registers_per_capture;
|
||||
|
||||
RegExpNode* success;
|
||||
if (is_positive()) {
|
||||
return ActionNode::BeginSubmatch(
|
||||
RegExpNode* node = ActionNode::BeginSubmatch(
|
||||
stack_pointer_register,
|
||||
position_register,
|
||||
body()->ToNode(
|
||||
compiler,
|
||||
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
|
||||
position_register,
|
||||
register_count,
|
||||
register_start,
|
||||
on_success)));
|
||||
return node;
|
||||
} else {
|
||||
// We use a ChoiceNode for a negative lookahead because it has most of
|
||||
// the characteristics we need. It has the body of the lookahead as its
|
||||
@ -3986,7 +4089,9 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
|
||||
body()->ToNode(
|
||||
compiler,
|
||||
success = new NegativeSubmatchSuccess(stack_pointer_register,
|
||||
position_register)));
|
||||
position_register,
|
||||
register_count,
|
||||
register_start)));
|
||||
ChoiceNode* choice_node =
|
||||
new NegativeLookaheadChoiceNode(body_alt,
|
||||
GuardedAlternative(on_success));
|
||||
@ -4009,9 +4114,9 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
|
||||
RegExpNode* on_success) {
|
||||
int start_reg = RegExpCapture::StartRegister(index);
|
||||
int end_reg = RegExpCapture::EndRegister(index);
|
||||
RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
|
||||
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
|
||||
RegExpNode* body_node = body->ToNode(compiler, store_end);
|
||||
return ActionNode::StorePosition(start_reg, body_node);
|
||||
return ActionNode::StorePosition(start_reg, true, body_node);
|
||||
}
|
||||
|
||||
|
||||
|
@ -719,13 +719,17 @@ class ActionNode: public SeqRegExpNode {
|
||||
};
|
||||
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
|
||||
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
|
||||
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
|
||||
static ActionNode* StorePosition(int reg,
|
||||
bool is_capture,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
|
||||
static ActionNode* BeginSubmatch(int stack_pointer_reg,
|
||||
int position_reg,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
|
||||
int restore_reg,
|
||||
int clear_capture_count,
|
||||
int clear_capture_from,
|
||||
RegExpNode* on_success);
|
||||
static ActionNode* EmptyMatchCheck(int start_register,
|
||||
int repetition_register,
|
||||
@ -755,10 +759,13 @@ class ActionNode: public SeqRegExpNode {
|
||||
} u_increment_register;
|
||||
struct {
|
||||
int reg;
|
||||
bool is_capture;
|
||||
} u_position_register;
|
||||
struct {
|
||||
int stack_pointer_register;
|
||||
int current_position_register;
|
||||
int clear_register_count;
|
||||
int clear_register_from;
|
||||
} u_submatch;
|
||||
struct {
|
||||
int start_register;
|
||||
@ -913,15 +920,22 @@ class EndNode: public RegExpNode {
|
||||
|
||||
class NegativeSubmatchSuccess: public EndNode {
|
||||
public:
|
||||
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
|
||||
NegativeSubmatchSuccess(int stack_pointer_reg,
|
||||
int position_reg,
|
||||
int clear_capture_count,
|
||||
int clear_capture_start)
|
||||
: EndNode(NEGATIVE_SUBMATCH_SUCCESS),
|
||||
stack_pointer_register_(stack_pointer_reg),
|
||||
current_position_register_(position_reg) { }
|
||||
current_position_register_(position_reg),
|
||||
clear_capture_count_(clear_capture_count),
|
||||
clear_capture_start_(clear_capture_start) { }
|
||||
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
|
||||
|
||||
private:
|
||||
int stack_pointer_register_;
|
||||
int current_position_register_;
|
||||
int clear_capture_count_;
|
||||
int clear_capture_start_;
|
||||
};
|
||||
|
||||
|
||||
@ -1087,18 +1101,20 @@ class Trace {
|
||||
friend class Trace;
|
||||
};
|
||||
|
||||
class DeferredCapture: public DeferredAction {
|
||||
class DeferredCapture : public DeferredAction {
|
||||
public:
|
||||
DeferredCapture(int reg, Trace* trace)
|
||||
DeferredCapture(int reg, bool is_capture, Trace* trace)
|
||||
: DeferredAction(ActionNode::STORE_POSITION, reg),
|
||||
cp_offset_(trace->cp_offset()) { }
|
||||
int cp_offset() { return cp_offset_; }
|
||||
bool is_capture() { return is_capture_; }
|
||||
private:
|
||||
int cp_offset_;
|
||||
bool is_capture_;
|
||||
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
|
||||
};
|
||||
|
||||
class DeferredSetRegister :public DeferredAction {
|
||||
class DeferredSetRegister : public DeferredAction {
|
||||
public:
|
||||
DeferredSetRegister(int reg, int value)
|
||||
: DeferredAction(ActionNode::SET_REGISTER, reg),
|
||||
@ -1118,7 +1134,7 @@ class Trace {
|
||||
Interval range_;
|
||||
};
|
||||
|
||||
class DeferredIncrementRegister: public DeferredAction {
|
||||
class DeferredIncrementRegister : public DeferredAction {
|
||||
public:
|
||||
explicit DeferredIncrementRegister(int reg)
|
||||
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
|
||||
@ -1189,13 +1205,13 @@ class Trace {
|
||||
int FindAffectedRegisters(OutSet* affected_registers);
|
||||
void PerformDeferredActions(RegExpMacroAssembler* macro,
|
||||
int max_register,
|
||||
OutSet& affected_registers);
|
||||
OutSet& affected_registers,
|
||||
OutSet* registers_to_pop,
|
||||
OutSet* registers_to_clear);
|
||||
void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
|
||||
int max_register,
|
||||
OutSet& affected_registers);
|
||||
void PushAffectedRegisters(RegExpMacroAssembler* macro,
|
||||
int max_register,
|
||||
OutSet& affected_registers);
|
||||
OutSet& registers_to_pop,
|
||||
OutSet& registers_to_clear);
|
||||
int cp_offset_;
|
||||
DeferredAction* actions_;
|
||||
Label* backtrack_;
|
||||
|
@ -4149,7 +4149,10 @@ RegExpTree* RegExpParser::ParseGroup() {
|
||||
} else {
|
||||
ASSERT(type == '=' || type == '!');
|
||||
bool is_positive = (type == '=');
|
||||
return new RegExpLookahead(body, is_positive);
|
||||
return new RegExpLookahead(body,
|
||||
is_positive,
|
||||
end_capture_index - capture_index,
|
||||
capture_index);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -332,15 +332,29 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
|
||||
__ push(ebx);
|
||||
const int four_arguments = 4;
|
||||
FrameAlign(four_arguments, ecx);
|
||||
// Put arguments into allocated stack area.
|
||||
// Put arguments into allocated stack area, last argument highest on stack.
|
||||
// Parameters are
|
||||
// UC16** buffer - really the String** of the input string
|
||||
// int byte_offset1 - byte offset from *buffer of start of capture
|
||||
// int byte_offset2 - byte offset from *buffer of current position
|
||||
// size_t byte_length - length of capture in bytes(!)
|
||||
|
||||
// Set byte_length.
|
||||
__ mov(Operand(esp, 3 * kPointerSize), ebx);
|
||||
// Set byte_offset2.
|
||||
// Found by adding negative string-end offset of current position (edi)
|
||||
// to String** offset of end of string.
|
||||
__ mov(ecx, Operand(ebp, kInputEndOffset));
|
||||
__ add(edi, Operand(ecx));
|
||||
__ mov(Operand(esp, 2 * kPointerSize), edi);
|
||||
// Set byte_offset1.
|
||||
// Start of capture, where eax already holds string-end negative offset.
|
||||
__ add(eax, Operand(ecx));
|
||||
__ mov(Operand(esp, 1 * kPointerSize), eax);
|
||||
// Set buffer. Original String** parameter to regexp code.
|
||||
__ mov(eax, Operand(ebp, kInputBuffer));
|
||||
__ mov(Operand(esp, 0 * kPointerSize), eax);
|
||||
|
||||
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
|
||||
CallCFunction(function_address, four_arguments);
|
||||
// Pop original values before reacting on result value.
|
||||
@ -946,9 +960,12 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::ClearRegister(int reg) {
|
||||
void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
|
||||
ASSERT(reg_from <= reg_to);
|
||||
__ mov(eax, Operand(ebp, kInputStartMinusOne));
|
||||
__ mov(register_location(reg), eax);
|
||||
for (int reg = reg_from; reg <= reg_to; reg++) {
|
||||
__ mov(register_location(reg), eax);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -987,8 +1004,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
|
||||
stack_top);
|
||||
|
||||
if (result < 0 && !Top::has_pending_exception()) {
|
||||
// We detected a stack overflow in RegExp code, but haven't created
|
||||
// the exception yet.
|
||||
// We detected a stack overflow (on the backtrack stack) in RegExp code,
|
||||
// but haven't created the exception yet.
|
||||
Top::StackOverflow();
|
||||
}
|
||||
return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE);
|
||||
@ -1170,6 +1187,9 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) {
|
||||
// TODO(lrn): Since we no longer use the system stack arbitrarily, we
|
||||
// know the current stack alignment - esp points to the last regexp register.
|
||||
// We can do this simpler then.
|
||||
int frameAlignment = OS::ActivationFrameAlignment();
|
||||
if (frameAlignment != 0) {
|
||||
// Make stack end at alignment and make room for num_arguments words
|
||||
|
@ -110,7 +110,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
|
||||
virtual void SetRegister(int register_index, int to);
|
||||
virtual void Succeed();
|
||||
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
|
||||
virtual void ClearRegister(int reg);
|
||||
virtual void ClearRegisters(int reg_from, int reg_to);
|
||||
virtual void WriteStackPointerToRegister(int reg);
|
||||
|
||||
static Result Execute(Code* code,
|
||||
|
@ -104,8 +104,11 @@ void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) {
|
||||
SetRegister(reg, -1);
|
||||
void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
|
||||
ASSERT(reg_from <= reg_to);
|
||||
for (int reg = reg_from; reg <= reg_to; reg++) {
|
||||
SetRegister(reg, -1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
|
||||
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
|
||||
virtual void SetRegister(int register_index, int to);
|
||||
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
|
||||
virtual void ClearRegister(int reg);
|
||||
virtual void ClearRegisters(int reg_from, int reg_to);
|
||||
virtual void ReadCurrentPositionFromRegister(int reg);
|
||||
virtual void WriteStackPointerToRegister(int reg);
|
||||
virtual void ReadStackPointerFromRegister(int reg);
|
||||
|
@ -150,9 +150,9 @@ void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
|
||||
}
|
||||
|
||||
|
||||
void RegExpMacroAssemblerTracer::ClearRegister(int reg) {
|
||||
PrintF(" ClearRegister(register=%d);\n", reg);
|
||||
assembler_->ClearRegister(reg);
|
||||
void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) {
|
||||
PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to);
|
||||
assembler_->ClearRegisters(reg_from, reg_to);
|
||||
}
|
||||
|
||||
|
||||
|
@ -107,7 +107,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
|
||||
virtual void SetRegister(int register_index, int to);
|
||||
virtual void Succeed();
|
||||
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
|
||||
virtual void ClearRegister(int reg);
|
||||
virtual void ClearRegisters(int reg_from, int reg_to);
|
||||
virtual void WriteStackPointerToRegister(int reg);
|
||||
private:
|
||||
RegExpMacroAssembler* assembler_;
|
||||
|
@ -174,7 +174,7 @@ class RegExpMacroAssembler {
|
||||
virtual void SetRegister(int register_index, int to) = 0;
|
||||
virtual void Succeed() = 0;
|
||||
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
|
||||
virtual void ClearRegister(int reg) = 0;
|
||||
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
|
||||
virtual void WriteStackPointerToRegister(int reg) = 0;
|
||||
|
||||
private:
|
||||
|
52
test/mjsunit/regexp-capture.js
Executable file
52
test/mjsunit/regexp-capture.js
Executable file
@ -0,0 +1,52 @@
|
||||
// Copyright 2009 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Tests from http://blog.stevenlevithan.com/archives/npcg-javascript
|
||||
|
||||
assertEquals(true, /(x)?\1y/.test("y"));
|
||||
assertEquals(["y", undefined], /(x)?\1y/.exec("y"));
|
||||
assertEquals(["y", undefined], /(x)?y/.exec("y"));
|
||||
assertEquals(["y", undefined], "y".match(/(x)?\1y/));
|
||||
assertEquals(["y", undefined], "y".match(/(x)?y/));
|
||||
assertEquals(["y"], "y".match(/(x)?\1y/g));
|
||||
assertEquals(["", undefined, ""], "y".split(/(x)?\1y/));
|
||||
assertEquals(["", undefined, ""], "y".split(/(x)?y/));
|
||||
assertEquals(0, "y".search(/(x)?\1y/));
|
||||
assertEquals("z", "y".replace(/(x)?\1y/, "z"));
|
||||
assertEquals("", "y".replace(/(x)?y/, "$1"));
|
||||
assertEquals("undefined", "y".replace(/(x)?\1y/,
|
||||
function($0, $1){
|
||||
return String($1);
|
||||
}));
|
||||
assertEquals("undefined", "y".replace(/(x)?y/,
|
||||
function($0, $1){
|
||||
return String($1);
|
||||
}));
|
||||
assertEquals("undefined", "y".replace(/(x)?y/,
|
||||
function($0, $1){
|
||||
return $1;
|
||||
}));
|
166
test/mjsunit/regexp-lookahead.js
Normal file
166
test/mjsunit/regexp-lookahead.js
Normal file
@ -0,0 +1,166 @@
|
||||
// Copyright 2009 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// Tests captures in positive and negative look-ahead in regular expressions.
|
||||
|
||||
function stringEscape(string) {
|
||||
// Converts string to source literal.
|
||||
return '"' + string.replace(/["\\]/g, "\\$1") + '"';
|
||||
}
|
||||
|
||||
function testRE(re, input, expected_result) {
|
||||
var testName = re + ".test(" + stringEscape(input) +")";
|
||||
if (expected_result) {
|
||||
assertTrue(re.test(input), testName);
|
||||
} else {
|
||||
assertFalse(re.test(input), testName);
|
||||
}
|
||||
}
|
||||
|
||||
function execRE(re, input, expected_result) {
|
||||
var testName = re + ".exec('" + stringEscape(input) +"')";
|
||||
assertEquals(expected_result, re.exec(input), testName);
|
||||
}
|
||||
|
||||
// Test of simple positive lookahead.
|
||||
|
||||
var re = /^(?=a)/;
|
||||
testRE(re, "a", true);
|
||||
testRE(re, "b", false);
|
||||
execRE(re, "a", [""]);
|
||||
|
||||
re = /^(?=\woo)f\w/;
|
||||
testRE(re, "foo", true);
|
||||
testRE(re, "boo", false);
|
||||
testRE(re, "fao", false);
|
||||
testRE(re, "foa", false);
|
||||
execRE(re, "foo", ["fo"]);
|
||||
|
||||
re = /(?=\w).(?=\W)/;
|
||||
testRE(re, ".a! ", true);
|
||||
testRE(re, ".! ", false);
|
||||
testRE(re, ".ab! ", true);
|
||||
execRE(re, ".ab! ", ["b"]);
|
||||
|
||||
re = /(?=f(?=[^f]o))../;
|
||||
testRE(re, ", foo!", true);
|
||||
testRE(re, ", fo!", false);
|
||||
testRE(re, ", ffo", false);
|
||||
execRE(re, ", foo!", ["fo"]);
|
||||
|
||||
// Positive lookahead with captures.
|
||||
re = /^[^\'\"]*(?=([\'\"])).*\1(\w+)\1/;
|
||||
testRE(re, " 'foo' ", true);
|
||||
testRE(re, ' "foo" ', true);
|
||||
testRE(re, " \" 'foo' ", false);
|
||||
testRE(re, " ' \"foo\" ", false);
|
||||
testRE(re, " 'foo\" ", false);
|
||||
testRE(re, " \"foo' ", false);
|
||||
execRE(re, " 'foo' ", [" 'foo'", "'", "foo"]);
|
||||
execRE(re, ' "foo" ', [' "foo"', '"', 'foo']);
|
||||
|
||||
// Captures are cleared on backtrack past the look-ahead.
|
||||
re = /^(?:(?=(.))a|b)\1$/;
|
||||
testRE(re, "aa", true);
|
||||
testRE(re, "b", true);
|
||||
testRE(re, "bb", false);
|
||||
testRE(re, "a", false);
|
||||
execRE(re, "aa", ["aa", "a"]);
|
||||
execRE(re, "b", ["b", undefined]);
|
||||
|
||||
re = /^(?=(.)(?=(.)\1\2)\2\1)\1\2/;
|
||||
testRE(re, "abab", true);
|
||||
testRE(re, "ababxxxxxxxx", true);
|
||||
testRE(re, "aba", false);
|
||||
execRE(re, "abab", ["ab", "a", "b"]);
|
||||
|
||||
re = /^(?:(?=(.))a|b|c)$/;
|
||||
testRE(re, "a", true);
|
||||
testRE(re, "b", true);
|
||||
testRE(re, "c", true);
|
||||
testRE(re, "d", false);
|
||||
execRE(re, "a", ["a", "a"]);
|
||||
execRE(re, "b", ["b", undefined]);
|
||||
execRE(re, "c", ["c", undefined]);
|
||||
|
||||
execRE(/^(?=(b))b/, "b", ["b", "b"]);
|
||||
execRE(/^(?:(?=(b))|a)b/, "ab", ["ab", undefined]);
|
||||
execRE(/^(?:(?=(b)(?:(?=(c))|d))|)bd/, "bd", ["bd", "b", undefined]);
|
||||
|
||||
|
||||
|
||||
// Test of Negative Look-Ahead.
|
||||
|
||||
re = /(?!x)./;
|
||||
testRE(re, "y", true);
|
||||
testRE(re, "x", false);
|
||||
execRE(re, "y", ["y"]);
|
||||
|
||||
re = /(?!(\d))|\d/;
|
||||
testRE(re, "4", true);
|
||||
execRE(re, "4", ["4", undefined]);
|
||||
execRE(re, "x", ["", undefined]);
|
||||
|
||||
|
||||
// Test mixed nested look-ahead with captures.
|
||||
|
||||
re = /^(?=(x)(?=(y)))/;
|
||||
testRE(re, "xy", true);
|
||||
testRE(re, "xz", false);
|
||||
execRE(re, "xy", ["", "x", "y"]);
|
||||
|
||||
re = /^(?!(x)(?!(y)))/;
|
||||
testRE(re, "xy", true);
|
||||
testRE(re, "xz", false);
|
||||
execRE(re, "xy", ["", undefined, undefined]);
|
||||
|
||||
re = /^(?=(x)(?!(y)))/;
|
||||
testRE(re, "xz", true);
|
||||
testRE(re, "xy", false)
|
||||
execRE(re, "xz", ["", "x", undefined]);
|
||||
|
||||
re = /^(?!(x)(?=(y)))/;
|
||||
testRE(re, "xz", true);
|
||||
testRE(re, "xy", false);
|
||||
execRE(re, "xz", ["", undefined, undefined]);
|
||||
|
||||
re = /^(?=(x)(?!(y)(?=(z))))/;
|
||||
testRE(re, "xaz", true);
|
||||
testRE(re, "xya", true);
|
||||
testRE(re, "xyz", false);
|
||||
testRE(re, "a", false);
|
||||
execRE(re, "xaz", ["", "x", undefined, undefined]);
|
||||
execRE(re, "xya", ["", "x", undefined, undefined]);
|
||||
|
||||
re = /^(?!(x)(?=(y)(?!(z))))/;
|
||||
testRE(re, "a", true);
|
||||
testRE(re, "xa", true);
|
||||
testRE(re, "xyz", true);
|
||||
testRE(re, "xya", false);
|
||||
execRE(re, "a", ["", undefined, undefined, undefined]);
|
||||
execRE(re, "xa", ["", undefined, undefined, undefined]);
|
||||
execRE(re, "xyz", ["", undefined, undefined, undefined]);
|
30
test/mjsunit/regress/regress-187.js
Normal file
30
test/mjsunit/regress/regress-187.js
Normal file
@ -0,0 +1,30 @@
|
||||
// Copyright 2008 the V8 project authors. All rights reserved.
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met:
|
||||
//
|
||||
// * Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
// * Redistributions in binary form must reproduce the above
|
||||
// copyright notice, this list of conditions and the following
|
||||
// disclaimer in the documentation and/or other materials provided
|
||||
// with the distribution.
|
||||
// * Neither the name of Google Inc. nor the names of its
|
||||
// contributors may be used to endorse or promote products derived
|
||||
// from this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
// See http://code.google.com/p/v8/issues/detail?id=187
|
||||
|
||||
assertEquals("f,", "foo".match(/(?:(?=(f)o)fx|)./));
|
Loading…
Reference in New Issue
Block a user