Irregexp: Backtrack past look-aheads works correctly.

Allows backtracking to clear registers instead of pushing and popping
them to restore state.
Redo of 1135 with bug fixed.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1156 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2009-01-26 14:38:17 +00:00
parent b7ca939e2f
commit 2de5de495f
14 changed files with 475 additions and 70 deletions

View File

@ -1521,9 +1521,15 @@ class RegExpCapture: public RegExpTree {
class RegExpLookahead: public RegExpTree {
public:
RegExpLookahead(RegExpTree* body, bool is_positive)
RegExpLookahead(RegExpTree* body,
bool is_positive,
int capture_count,
int capture_from)
: body_(body),
is_positive_(is_positive) { }
is_positive_(is_positive),
capture_count_(capture_count),
capture_from_(capture_from) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
@ -1535,9 +1541,13 @@ class RegExpLookahead: public RegExpTree {
virtual int max_match() { return 0; }
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
};

View File

@ -1361,41 +1361,44 @@ int Trace::FindAffectedRegisters(OutSet* affected_registers) {
}
void Trace::PushAffectedRegisters(RegExpMacroAssembler* assembler,
int max_register,
OutSet& affected_registers) {
// Stay safe and check every half times the limit.
// (Round up in case the limit is 1).
int push_limit = (assembler->stack_limit_slack() + 1) / 2;
for (int reg = 0, pushes = 0; reg <= max_register; reg++) {
if (affected_registers.Get(reg)) {
pushes++;
RegExpMacroAssembler::StackCheckFlag check_stack_limit =
(pushes % push_limit) == 0 ?
RegExpMacroAssembler::kCheckStackLimit :
RegExpMacroAssembler::kNoStackLimitCheck;
assembler->PushRegister(reg, check_stack_limit);
}
}
}
void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
int max_register,
OutSet& affected_registers) {
OutSet& registers_to_pop,
OutSet& registers_to_clear) {
for (int reg = max_register; reg >= 0; reg--) {
if (affected_registers.Get(reg)) assembler->PopRegister(reg);
if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
else if (registers_to_clear.Get(reg)) {
int clear_to = reg;
while (reg > 0 && registers_to_clear.Get(reg - 1)) {
reg--;
}
assembler->ClearRegisters(reg, clear_to);
}
}
}
void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
int max_register,
OutSet& affected_registers) {
OutSet& affected_registers,
OutSet* registers_to_pop,
OutSet* registers_to_clear) {
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
for (int reg = 0; reg <= max_register; reg++) {
if (!affected_registers.Get(reg)) {
continue;
}
// Count pushes performed to force a stack limit check occasionally.
int pushes = 0;
// The chronologically first deferred action in the trace
// is used to infer the action needed to restore a register
// to its previous state (or not, if it's safe to ignore it).
enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
DeferredActionUndoType undo_action = IGNORE;
int value = 0;
bool absolute = false;
bool clear = false;
@ -1410,8 +1413,16 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
case ActionNode::SET_REGISTER: {
Trace::DeferredSetRegister* psr =
static_cast<Trace::DeferredSetRegister*>(action);
value += psr->value();
absolute = true;
if (!absolute) {
value += psr->value();
absolute = true;
}
// SET_REGISTER is currently only used for newly introduced loop
// counters. They can have a significant previous value if they
// occour in a loop. TODO(lrn): Propagate this information, so
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = RESTORE;
ASSERT_EQ(store_position, -1);
ASSERT(!clear);
break;
@ -1422,6 +1433,7 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
}
ASSERT_EQ(store_position, -1);
ASSERT(!clear);
undo_action = RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
@ -1429,6 +1441,19 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
if (!clear && store_position == -1) {
store_position = pc->cp_offset();
}
// For captures we know that stores and clears alternate.
// Other register, are never cleared, and if the occur
// inside a loop, they might be assigned more than once.
if (reg <= 1) {
// Registers zero and one, aka "capture zero", is
// always set correctly if we succeed. There is no
// need to undo a setting on backtrack, because we
// will set it again or fail.
undo_action = IGNORE;
} else {
undo_action = pc->is_capture() ? CLEAR : RESTORE;
}
ASSERT(!absolute);
ASSERT_EQ(value, 0);
break;
@ -1437,8 +1462,10 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
if (store_position == -1)
if (store_position == -1) {
clear = true;
}
undo_action = RESTORE;
ASSERT(!absolute);
ASSERT_EQ(value, 0);
break;
@ -1449,10 +1476,27 @@ void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
}
}
}
// Prepare for the undo-action (e.g., push if it's going to be popped).
if (undo_action == RESTORE) {
pushes++;
RegExpMacroAssembler::StackCheckFlag stack_check =
RegExpMacroAssembler::kNoStackLimitCheck;
if (pushes == push_limit) {
stack_check = RegExpMacroAssembler::kCheckStackLimit;
pushes = 0;
}
assembler->PushRegister(reg, stack_check);
registers_to_pop->Set(reg);
} else if (undo_action == CLEAR) {
registers_to_clear->Set(reg);
}
// Perform the chronologically last action (or accumulated increment)
// for the register.
if (store_position != -1) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
assembler->ClearRegister(reg);
assembler->ClearRegisters(reg, reg);
} else if (absolute) {
assembler->SetRegister(reg, value);
} else if (value != 0) {
@ -1487,9 +1531,15 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
// Generate deferred actions here along with code to undo them again.
OutSet affected_registers;
int max_register = FindAffectedRegisters(&affected_registers);
PushAffectedRegisters(assembler, max_register, affected_registers);
PerformDeferredActions(assembler, max_register, affected_registers);
OutSet registers_to_pop;
OutSet registers_to_clear;
PerformDeferredActions(assembler,
max_register,
affected_registers,
&registers_to_pop,
&registers_to_clear);
if (backtrack() != NULL) {
// Here we have a concrete backtrack location. These are set up by choice
// nodes and so they indicate that we have a deferred save of the current
@ -1512,7 +1562,10 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
if (backtrack() != NULL) {
assembler->PopCurrentPosition();
}
RestoreAffectedRegisters(assembler, max_register, affected_registers);
RestoreAffectedRegisters(assembler,
max_register,
registers_to_pop,
registers_to_clear);
if (backtrack() == NULL) {
assembler->Backtrack();
} else {
@ -1524,15 +1577,26 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
if (!trace->is_trivial()) {
return trace->Flush(compiler, this);
}
RegExpMacroAssembler* assembler = compiler->macro_assembler();
// Omit flushing the trace. We discard the entire stack frame anyway.
if (!label()->is_bound()) {
// We are completely independent of the trace, since we ignore it,
// so this code can be used as the generic version.
assembler->Bind(label());
}
// Throw away everything on the backtrack stack since the start
// of the negative submatch and restore the character position.
assembler->ReadCurrentPositionFromRegister(current_position_register_);
assembler->ReadStackPointerFromRegister(stack_pointer_register_);
if (clear_capture_count_ > 0) {
// Clear any captures that might have been performed during the success
// of the body of the negative look-ahead.
int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
}
// Now that we have unwound the stack we find at the top of the stack the
// backtrack that the BeginSubmatch node got.
assembler->Backtrack();
@ -1588,9 +1652,12 @@ ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
}
ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
ActionNode* ActionNode::StorePosition(int reg,
bool is_capture,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(STORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
result->data_.u_position_register.is_capture = is_capture;
return result;
}
@ -1616,10 +1683,14 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg,
ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
int position_reg,
int clear_register_count,
int clear_register_from,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
result->data_.u_submatch.stack_pointer_register = stack_reg;
result->data_.u_submatch.current_position_register = position_reg;
result->data_.u_submatch.clear_register_count = clear_register_count;
result->data_.u_submatch.clear_register_from = clear_register_from;
return result;
}
@ -3172,7 +3243,9 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
switch (type_) {
case STORE_POSITION: {
Trace::DeferredCapture
new_capture(data_.u_position_register.reg, trace);
new_capture(data_.u_position_register.reg,
data_.u_position_register.is_capture,
trace);
Trace new_trace = *trace;
new_trace.add_action(&new_capture);
return on_success()->Emit(compiler, &new_trace);
@ -3237,13 +3310,31 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
assembler->Bind(&skip_empty_check);
return on_success()->Emit(compiler, trace);
}
case POSITIVE_SUBMATCH_SUCCESS:
case POSITIVE_SUBMATCH_SUCCESS: {
if (!trace->is_trivial()) return trace->Flush(compiler, this);
assembler->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register);
assembler->ReadStackPointerFromRegister(
data_.u_submatch.stack_pointer_register);
return on_success()->Emit(compiler, trace);
int clear_register_count = data_.u_submatch.clear_register_count;
if (clear_register_count == 0) {
return on_success()->Emit(compiler, trace);
}
int clear_registers_from = data_.u_submatch.clear_register_from;
Label clear_registers_backtrack;
Trace new_trace = *trace;
new_trace.set_backtrack(&clear_registers_backtrack);
bool ok = on_success()->Emit(compiler, &new_trace);
if (!ok) { return false; }
assembler->Bind(&clear_registers_backtrack);
int clear_registers_to = clear_registers_from + clear_register_count - 1;
assembler->ClearRegisters(clear_registers_from, clear_registers_to);
ASSERT(trace->backtrack() == NULL);
assembler->Backtrack();
return true;
}
default:
UNREACHABLE();
return false;
@ -3861,7 +3952,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
if (body_can_be_empty) {
// If the body can be empty we need to store the start position
// so we can bail out if it was empty.
body_node = ActionNode::StorePosition(body_start_reg, body_node);
body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
}
if (needs_capture_clearing) {
// Before entering the body of this loop we need to clear captures.
@ -3923,6 +4014,8 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
newline_atom,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
0, // No captures inside.
-1, // Ignored if no captures.
on_success));
// Create an end-of-input matcher.
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
@ -3961,16 +4054,26 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
const int registers_per_capture = 2;
const int register_of_first_capture = 2;
int register_count = capture_count_ * registers_per_capture;
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
RegExpNode* success;
if (is_positive()) {
return ActionNode::BeginSubmatch(
RegExpNode* node = ActionNode::BeginSubmatch(
stack_pointer_register,
position_register,
body()->ToNode(
compiler,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
register_count,
register_start,
on_success)));
return node;
} else {
// We use a ChoiceNode for a negative lookahead because it has most of
// the characteristics we need. It has the body of the lookahead as its
@ -3986,7 +4089,9 @@ RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
body()->ToNode(
compiler,
success = new NegativeSubmatchSuccess(stack_pointer_register,
position_register)));
position_register,
register_count,
register_start)));
ChoiceNode* choice_node =
new NegativeLookaheadChoiceNode(body_alt,
GuardedAlternative(on_success));
@ -4009,9 +4114,9 @@ RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
RegExpNode* on_success) {
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, body_node);
return ActionNode::StorePosition(start_reg, true, body_node);
}

View File

@ -719,13 +719,17 @@ class ActionNode: public SeqRegExpNode {
};
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg,
bool is_capture,
RegExpNode* on_success);
static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
int restore_reg,
int clear_capture_count,
int clear_capture_from,
RegExpNode* on_success);
static ActionNode* EmptyMatchCheck(int start_register,
int repetition_register,
@ -755,10 +759,13 @@ class ActionNode: public SeqRegExpNode {
} u_increment_register;
struct {
int reg;
bool is_capture;
} u_position_register;
struct {
int stack_pointer_register;
int current_position_register;
int clear_register_count;
int clear_register_from;
} u_submatch;
struct {
int start_register;
@ -913,15 +920,22 @@ class EndNode: public RegExpNode {
class NegativeSubmatchSuccess: public EndNode {
public:
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
NegativeSubmatchSuccess(int stack_pointer_reg,
int position_reg,
int clear_capture_count,
int clear_capture_start)
: EndNode(NEGATIVE_SUBMATCH_SUCCESS),
stack_pointer_register_(stack_pointer_reg),
current_position_register_(position_reg) { }
current_position_register_(position_reg),
clear_capture_count_(clear_capture_count),
clear_capture_start_(clear_capture_start) { }
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
private:
int stack_pointer_register_;
int current_position_register_;
int clear_capture_count_;
int clear_capture_start_;
};
@ -1087,18 +1101,20 @@ class Trace {
friend class Trace;
};
class DeferredCapture: public DeferredAction {
class DeferredCapture : public DeferredAction {
public:
DeferredCapture(int reg, Trace* trace)
DeferredCapture(int reg, bool is_capture, Trace* trace)
: DeferredAction(ActionNode::STORE_POSITION, reg),
cp_offset_(trace->cp_offset()) { }
int cp_offset() { return cp_offset_; }
bool is_capture() { return is_capture_; }
private:
int cp_offset_;
bool is_capture_;
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
};
class DeferredSetRegister :public DeferredAction {
class DeferredSetRegister : public DeferredAction {
public:
DeferredSetRegister(int reg, int value)
: DeferredAction(ActionNode::SET_REGISTER, reg),
@ -1118,7 +1134,7 @@ class Trace {
Interval range_;
};
class DeferredIncrementRegister: public DeferredAction {
class DeferredIncrementRegister : public DeferredAction {
public:
explicit DeferredIncrementRegister(int reg)
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
@ -1189,13 +1205,13 @@ class Trace {
int FindAffectedRegisters(OutSet* affected_registers);
void PerformDeferredActions(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
OutSet& affected_registers,
OutSet* registers_to_pop,
OutSet* registers_to_clear);
void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void PushAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
OutSet& registers_to_pop,
OutSet& registers_to_clear);
int cp_offset_;
DeferredAction* actions_;
Label* backtrack_;

View File

@ -4149,7 +4149,10 @@ RegExpTree* RegExpParser::ParseGroup() {
} else {
ASSERT(type == '=' || type == '!');
bool is_positive = (type == '=');
return new RegExpLookahead(body, is_positive);
return new RegExpLookahead(body,
is_positive,
end_capture_index - capture_index,
capture_index);
}
}

View File

@ -332,15 +332,29 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
__ push(ebx);
const int four_arguments = 4;
FrameAlign(four_arguments, ecx);
// Put arguments into allocated stack area.
// Put arguments into allocated stack area, last argument highest on stack.
// Parameters are
// UC16** buffer - really the String** of the input string
// int byte_offset1 - byte offset from *buffer of start of capture
// int byte_offset2 - byte offset from *buffer of current position
// size_t byte_length - length of capture in bytes(!)
// Set byte_length.
__ mov(Operand(esp, 3 * kPointerSize), ebx);
// Set byte_offset2.
// Found by adding negative string-end offset of current position (edi)
// to String** offset of end of string.
__ mov(ecx, Operand(ebp, kInputEndOffset));
__ add(edi, Operand(ecx));
__ mov(Operand(esp, 2 * kPointerSize), edi);
// Set byte_offset1.
// Start of capture, where eax already holds string-end negative offset.
__ add(eax, Operand(ecx));
__ mov(Operand(esp, 1 * kPointerSize), eax);
// Set buffer. Original String** parameter to regexp code.
__ mov(eax, Operand(ebp, kInputBuffer));
__ mov(Operand(esp, 0 * kPointerSize), eax);
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
CallCFunction(function_address, four_arguments);
// Pop original values before reacting on result value.
@ -946,9 +960,12 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
}
void RegExpMacroAssemblerIA32::ClearRegister(int reg) {
void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
__ mov(eax, Operand(ebp, kInputStartMinusOne));
__ mov(register_location(reg), eax);
for (int reg = reg_from; reg <= reg_to; reg++) {
__ mov(register_location(reg), eax);
}
}
@ -987,8 +1004,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
stack_top);
if (result < 0 && !Top::has_pending_exception()) {
// We detected a stack overflow in RegExp code, but haven't created
// the exception yet.
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE);
@ -1170,6 +1187,9 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) {
// TODO(lrn): Since we no longer use the system stack arbitrarily, we
// know the current stack alignment - esp points to the last regexp register.
// We can do this simpler then.
int frameAlignment = OS::ActivationFrameAlignment();
if (frameAlignment != 0) {
// Make stack end at alignment and make room for num_arguments words

View File

@ -110,7 +110,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Execute(Code* code,

View File

@ -104,8 +104,11 @@ void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
}
void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) {
SetRegister(reg, -1);
void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
for (int reg = reg_from; reg <= reg_to; reg++) {
SetRegister(reg, -1);
}
}

View File

@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);

View File

@ -150,9 +150,9 @@ void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
}
void RegExpMacroAssemblerTracer::ClearRegister(int reg) {
PrintF(" ClearRegister(register=%d);\n", reg);
assembler_->ClearRegister(reg);
void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) {
PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to);
assembler_->ClearRegisters(reg_from, reg_to);
}

View File

@ -107,7 +107,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
private:
RegExpMacroAssembler* assembler_;

View File

@ -174,7 +174,7 @@ class RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegister(int reg) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:

52
test/mjsunit/regexp-capture.js Executable file
View File

@ -0,0 +1,52 @@
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Tests from http://blog.stevenlevithan.com/archives/npcg-javascript
assertEquals(true, /(x)?\1y/.test("y"));
assertEquals(["y", undefined], /(x)?\1y/.exec("y"));
assertEquals(["y", undefined], /(x)?y/.exec("y"));
assertEquals(["y", undefined], "y".match(/(x)?\1y/));
assertEquals(["y", undefined], "y".match(/(x)?y/));
assertEquals(["y"], "y".match(/(x)?\1y/g));
assertEquals(["", undefined, ""], "y".split(/(x)?\1y/));
assertEquals(["", undefined, ""], "y".split(/(x)?y/));
assertEquals(0, "y".search(/(x)?\1y/));
assertEquals("z", "y".replace(/(x)?\1y/, "z"));
assertEquals("", "y".replace(/(x)?y/, "$1"));
assertEquals("undefined", "y".replace(/(x)?\1y/,
function($0, $1){
return String($1);
}));
assertEquals("undefined", "y".replace(/(x)?y/,
function($0, $1){
return String($1);
}));
assertEquals("undefined", "y".replace(/(x)?y/,
function($0, $1){
return $1;
}));

View File

@ -0,0 +1,166 @@
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Tests captures in positive and negative look-ahead in regular expressions.
function stringEscape(string) {
// Converts string to source literal.
return '"' + string.replace(/["\\]/g, "\\$1") + '"';
}
function testRE(re, input, expected_result) {
var testName = re + ".test(" + stringEscape(input) +")";
if (expected_result) {
assertTrue(re.test(input), testName);
} else {
assertFalse(re.test(input), testName);
}
}
function execRE(re, input, expected_result) {
var testName = re + ".exec('" + stringEscape(input) +"')";
assertEquals(expected_result, re.exec(input), testName);
}
// Test of simple positive lookahead.
var re = /^(?=a)/;
testRE(re, "a", true);
testRE(re, "b", false);
execRE(re, "a", [""]);
re = /^(?=\woo)f\w/;
testRE(re, "foo", true);
testRE(re, "boo", false);
testRE(re, "fao", false);
testRE(re, "foa", false);
execRE(re, "foo", ["fo"]);
re = /(?=\w).(?=\W)/;
testRE(re, ".a! ", true);
testRE(re, ".! ", false);
testRE(re, ".ab! ", true);
execRE(re, ".ab! ", ["b"]);
re = /(?=f(?=[^f]o))../;
testRE(re, ", foo!", true);
testRE(re, ", fo!", false);
testRE(re, ", ffo", false);
execRE(re, ", foo!", ["fo"]);
// Positive lookahead with captures.
re = /^[^\'\"]*(?=([\'\"])).*\1(\w+)\1/;
testRE(re, " 'foo' ", true);
testRE(re, ' "foo" ', true);
testRE(re, " \" 'foo' ", false);
testRE(re, " ' \"foo\" ", false);
testRE(re, " 'foo\" ", false);
testRE(re, " \"foo' ", false);
execRE(re, " 'foo' ", [" 'foo'", "'", "foo"]);
execRE(re, ' "foo" ', [' "foo"', '"', 'foo']);
// Captures are cleared on backtrack past the look-ahead.
re = /^(?:(?=(.))a|b)\1$/;
testRE(re, "aa", true);
testRE(re, "b", true);
testRE(re, "bb", false);
testRE(re, "a", false);
execRE(re, "aa", ["aa", "a"]);
execRE(re, "b", ["b", undefined]);
re = /^(?=(.)(?=(.)\1\2)\2\1)\1\2/;
testRE(re, "abab", true);
testRE(re, "ababxxxxxxxx", true);
testRE(re, "aba", false);
execRE(re, "abab", ["ab", "a", "b"]);
re = /^(?:(?=(.))a|b|c)$/;
testRE(re, "a", true);
testRE(re, "b", true);
testRE(re, "c", true);
testRE(re, "d", false);
execRE(re, "a", ["a", "a"]);
execRE(re, "b", ["b", undefined]);
execRE(re, "c", ["c", undefined]);
execRE(/^(?=(b))b/, "b", ["b", "b"]);
execRE(/^(?:(?=(b))|a)b/, "ab", ["ab", undefined]);
execRE(/^(?:(?=(b)(?:(?=(c))|d))|)bd/, "bd", ["bd", "b", undefined]);
// Test of Negative Look-Ahead.
re = /(?!x)./;
testRE(re, "y", true);
testRE(re, "x", false);
execRE(re, "y", ["y"]);
re = /(?!(\d))|\d/;
testRE(re, "4", true);
execRE(re, "4", ["4", undefined]);
execRE(re, "x", ["", undefined]);
// Test mixed nested look-ahead with captures.
re = /^(?=(x)(?=(y)))/;
testRE(re, "xy", true);
testRE(re, "xz", false);
execRE(re, "xy", ["", "x", "y"]);
re = /^(?!(x)(?!(y)))/;
testRE(re, "xy", true);
testRE(re, "xz", false);
execRE(re, "xy", ["", undefined, undefined]);
re = /^(?=(x)(?!(y)))/;
testRE(re, "xz", true);
testRE(re, "xy", false)
execRE(re, "xz", ["", "x", undefined]);
re = /^(?!(x)(?=(y)))/;
testRE(re, "xz", true);
testRE(re, "xy", false);
execRE(re, "xz", ["", undefined, undefined]);
re = /^(?=(x)(?!(y)(?=(z))))/;
testRE(re, "xaz", true);
testRE(re, "xya", true);
testRE(re, "xyz", false);
testRE(re, "a", false);
execRE(re, "xaz", ["", "x", undefined, undefined]);
execRE(re, "xya", ["", "x", undefined, undefined]);
re = /^(?!(x)(?=(y)(?!(z))))/;
testRE(re, "a", true);
testRE(re, "xa", true);
testRE(re, "xyz", true);
testRE(re, "xya", false);
execRE(re, "a", ["", undefined, undefined, undefined]);
execRE(re, "xa", ["", undefined, undefined, undefined]);
execRE(re, "xyz", ["", undefined, undefined, undefined]);

View File

@ -0,0 +1,30 @@
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// See http://code.google.com/p/v8/issues/detail?id=187
assertEquals("f,", "foo".match(/(?:(?=(f)o)fx|)./));