Irregexp:

* Facility for generating a node several ways.  This allows
  code to be generated for a node knowing where it is trying
  to match relative to the 'current position' and it allows
  code to be generated that knows where to backtrack to.  Both
  allow dramatic reductions in the amount of popping and pushing
  on the stack and the number of indirect jumps.
* Generate special backtracking for greedy quantifiers on
  constant-length atoms.  This allows .* to run in constant
  space relative to input string size.
* When we are checking a long sequence of characters or character
  classes in the input then we do them right to left and only the
  first (rightmost) needs to check for end-of-string.
* Record the pattern in the profile instead of just <CompiledRegExp>
* Nodes no longer contain an on_failure_ node.  This was only used
  for lookaheads and they are now handled with a choice node instead.
Review URL: http://codereview.chromium.org/12900

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@930 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
erik.corry@gmail.com 2008-12-08 09:22:12 +00:00
parent f306b97855
commit ba09ec5e89
15 changed files with 1231 additions and 544 deletions

View File

@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject {
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure) = 0;
RegExpNode* on_success) = 0;
virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree {
: alternatives_(alternatives) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree {
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree {
RegExpText() : elements_(2) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpText* AsText();
virtual bool IsText();
virtual bool IsTextElement() { return true; }
@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree {
explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
Type type() { return type_; }
@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree {
}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree {
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAtom* AsAtom();
virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree {
body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
static RegExpNode* ToNode(int min,
int max,
bool is_greedy,
RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier();
int min() { return min_; }
@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree {
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
static RegExpNode* ToNode(RegExpTree* body,
int index,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpCapture* AsCapture();
virtual bool IsCapture();
RegExpTree* body() { return body_; }
@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree {
is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead();
RegExpTree* body() { return body_; }
@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree {
: capture_(capture) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
int index() { return capture_->index(); }
@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree {
RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }

View File

@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 33, 5) /* check_not_at_start addr32 */
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;

View File

@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base,
BYTECODE(GOTO)
pc = code_base + Load32(pc + 1);
break;
BYTECODE(CHECK_GREEDY)
if (current == backtrack_sp[-1]) {
backtrack_sp--;
backtrack_stack_space++;
pc = code_base + Load32(pc + 1);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1);
if (pos >= subject.length()) {
@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base,
}
break;
}
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
int pos = current + Load32(pc + 1);
current_char = subject[pos];
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
if (c == current_char) {

View File

@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) {
}
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
macro->Bind(&label_);
}
}} // namespace v8::internal

File diff suppressed because it is too large Load Diff

View File

@ -202,6 +202,7 @@ class CharacterRange {
uc16 to() const { return to_; }
void set_to(uc16 value) { to_ = value; }
bool is_valid() { return from_ <= to_; }
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
bool IsSingleton() { return (from_ == to_); }
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
static void Split(ZoneList<CharacterRange>* base,
@ -346,6 +347,7 @@ class OutSet: public ZoneObject {
uint32_t first_;
ZoneList<unsigned>* remaining_;
ZoneList<OutSet*>* successors_;
friend class GenerationVariant;
};
@ -432,7 +434,7 @@ class TextElement {
public:
enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
TextElement() : type(UNINITIALIZED) { }
explicit TextElement(Type t) : type(t) { }
explicit TextElement(Type t) : type(t), cp_offset(-1) { }
static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class);
Type type;
@ -440,9 +442,13 @@ class TextElement {
RegExpAtom* u_atom;
RegExpCharacterClass* u_char_class;
} data;
int cp_offset;
};
class GenerationVariant;
struct NodeInfo {
enum TriBool {
UNKNOWN = -1, FALSE = 0, TRUE = 1
@ -607,17 +613,17 @@ class SiblingList {
class RegExpNode: public ZoneObject {
public:
RegExpNode() : variants_generated_(0) { }
virtual ~RegExpNode() { }
virtual void Accept(NodeVisitor* visitor) = 0;
// Generates a goto to this node or actually generates the code at this point.
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool GoTo(RegExpCompiler* compiler);
Label* label();
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant) = 0;
static const int kNodeIsTooComplexForGreedyLoops = -1;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
Label* label() { return &label_; }
static const int kMaxVariantsGenerated = 10;
RegExpNode* EnsureExpanded(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
@ -630,7 +636,6 @@ class RegExpNode: public ZoneObject {
virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
@ -645,6 +650,9 @@ class RegExpNode: public ZoneObject {
void set_siblings(SiblingList* other) { siblings_ = *other; }
protected:
enum LimitResult { DONE, FAIL, CONTINUE };
LimitResult LimitVersions(RegExpCompiler* compiler,
GenerationVariant* variant);
// Returns a sibling of this node whose interests and assumptions
// match the ones in the given node info. If no sibling exists NULL
@ -663,12 +671,11 @@ class RegExpNode: public ZoneObject {
// processed before it is on a useable state.
virtual RegExpNode* Clone() = 0;
inline void Bind(RegExpMacroAssembler* macro);
private:
Label label_;
NodeInfo info_;
SiblingList siblings_;
int variants_generated_;
};
@ -678,7 +685,6 @@ class SeqRegExpNode: public RegExpNode {
: on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual bool Emit(RegExpCompiler* compiler) { return false; }
private:
RegExpNode* on_success_;
};
@ -687,29 +693,31 @@ class SeqRegExpNode: public RegExpNode {
class ActionNode: public SeqRegExpNode {
public:
enum Type {
STORE_REGISTER,
SET_REGISTER,
INCREMENT_REGISTER,
STORE_POSITION,
RESTORE_POSITION,
BEGIN_SUBMATCH,
ESCAPE_SUBMATCH
POSITIVE_SUBMATCH_SUCCESS
};
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* RestorePosition(int reg, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int stack_pointer_reg,
static ActionNode* BeginSubmatch(
int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* EscapeSubmatch(int stack_pointer_reg,
bool and_restore_position,
static ActionNode* PositiveSubmatchSuccess(
int stack_pointer_reg,
int restore_reg,
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual RegExpNode* PropagateForward(NodeInfo* info);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
virtual ActionNode* Clone() { return new ActionNode(*this); }
private:
@ -740,16 +748,12 @@ class ActionNode: public SeqRegExpNode {
class TextNode: public SeqRegExpNode {
public:
TextNode(ZoneList<TextElement>* elms,
RegExpNode* on_success,
RegExpNode* on_failure)
RegExpNode* on_success)
: SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(elms) { }
TextNode(RegExpCharacterClass* that,
RegExpNode* on_success,
RegExpNode* on_failure)
RegExpNode* on_success)
: SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(new ZoneList<TextElement>(1)) {
elms_->Add(TextElement::CharClass(that));
}
@ -757,17 +761,20 @@ class TextNode: public SeqRegExpNode {
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent();
virtual TextNode* Clone() { return new TextNode(*this); }
virtual int GreedyLoopTextLength();
virtual TextNode* Clone() {
TextNode* result = new TextNode(*this);
result->CalculateOffsets();
return result;
}
void CalculateOffsets();
private:
void ExpandAtomChildren(RegExpAtom* that);
void ExpandCharClassChildren(RegExpCharacterClass* that);
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
};
@ -776,24 +783,20 @@ class BackReferenceNode: public SeqRegExpNode {
public:
BackReferenceNode(int start_reg,
int end_reg,
RegExpNode* on_success,
RegExpNode* on_failure)
RegExpNode* on_success)
: SeqRegExpNode(on_success),
on_failure_(on_failure),
start_reg_(start_reg),
end_reg_(end_reg) { }
virtual void Accept(NodeVisitor* visitor);
RegExpNode* on_failure() { return on_failure_; }
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
private:
RegExpNode* on_failure_;
int start_reg_;
int end_reg_;
};
@ -801,22 +804,37 @@ class BackReferenceNode: public SeqRegExpNode {
class EndNode: public RegExpNode {
public:
enum Action { ACCEPT, BACKTRACK };
enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
virtual EndNode* Clone() { return new EndNode(*this); }
protected:
void EmitInfoChecks(RegExpMacroAssembler* macro, GenerationVariant* variant);
private:
Action action_;
};
class NegativeSubmatchSuccess: public EndNode {
public:
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
: EndNode(NEGATIVE_SUBMATCH_SUCCESS),
stack_pointer_register_(stack_pointer_reg),
current_position_register_(position_reg) { }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
private:
int stack_pointer_register_;
int current_position_register_;
};
class Guard: public ZoneObject {
public:
enum Relation { LT, GEQ };
@ -851,17 +869,15 @@ class GuardedAlternative {
class ChoiceNode: public RegExpNode {
public:
explicit ChoiceNode(int expected_size, RegExpNode* on_failure)
: on_failure_(on_failure),
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
explicit ChoiceNode(int expected_size)
: alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
table_(NULL),
being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* GetTable(bool ignore_case);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren();
@ -870,19 +886,128 @@ class ChoiceNode: public RegExpNode {
bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; }
protected:
int GreedyLoopTextLength(GuardedAlternative *alternative);
ZoneList<GuardedAlternative>* alternatives_;
private:
friend class DispatchTableConstructor;
friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard,
Label* on_failure);
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* alternatives_;
GenerationVariant* variant);
DispatchTable* table_;
bool being_calculated_;
};
class LoopChoiceNode: public ChoiceNode {
public:
explicit LoopChoiceNode(int expected_size) : ChoiceNode(expected_size) { }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
};
// There are many ways to generate code for a node. This class encapsulates
// the current way we should be generating. In other words it encapsulates
// the current state of the code generator.
class GenerationVariant {
public:
class DeferredAction {
public:
DeferredAction(ActionNode::Type type, int reg)
: type_(type), reg_(reg), next_(NULL) { }
DeferredAction* next() { return next_; }
int reg() { return reg_; }
ActionNode::Type type() { return type_; }
private:
ActionNode::Type type_;
int reg_;
DeferredAction* next_;
friend class GenerationVariant;
};
class DeferredCapture: public DeferredAction {
public:
DeferredCapture(int reg, GenerationVariant* variant)
: DeferredAction(ActionNode::STORE_POSITION, reg),
cp_offset_(variant->cp_offset()) { }
int cp_offset() { return cp_offset_; }
private:
int cp_offset_;
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
};
class DeferredSetRegister :public DeferredAction {
public:
DeferredSetRegister(int reg, int value)
: DeferredAction(ActionNode::SET_REGISTER, reg),
value_(value) { }
int value() { return value_; }
private:
int value_;
};
class DeferredIncrementRegister: public DeferredAction {
public:
explicit DeferredIncrementRegister(int reg)
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
};
explicit GenerationVariant(Label* backtrack)
: cp_offset_(0),
actions_(NULL),
backtrack_(backtrack),
stop_node_(NULL),
loop_label_(NULL) { }
GenerationVariant()
: cp_offset_(0),
actions_(NULL),
backtrack_(NULL),
stop_node_(NULL),
loop_label_(NULL) { }
bool Flush(RegExpCompiler* compiler, RegExpNode* successor);
int cp_offset() { return cp_offset_; }
DeferredAction* actions() { return actions_; }
bool is_trivial() {
return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0;
}
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
// These set methods should be used only on new GenerationVariants - the
// intention is that GenerationVariants are immutable after creation.
void add_action(DeferredAction* new_action) {
ASSERT(new_action->next_ == NULL);
new_action->next_ = actions_;
actions_ = new_action;
}
void set_cp_offset(int new_cp_offset) {
ASSERT(new_cp_offset >= cp_offset_);
cp_offset_ = new_cp_offset;
}
void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
void set_stop_node(RegExpNode* node) { stop_node_ = node; }
void set_loop_label(Label* label) { loop_label_ = label; }
bool mentions_reg(int reg);
private:
int FindAffectedRegisters(OutSet* affected_registers);
void PerformDeferredActions(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void PushAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
int cp_offset_;
DeferredAction* actions_;
Label* backtrack_;
RegExpNode* stop_node_;
Label* loop_label_;
};
class NodeVisitor {
public:
virtual ~NodeVisitor() { }
@ -956,7 +1081,8 @@ class RegExpEngine: public AllStatic {
static Handle<FixedArray> Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case,
bool multiline);
bool multiline,
Handle<String> pattern);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};

View File

@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
__ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (str.length() <= kMaxInlineStringTests) {
for (int i = 0; i < str.length(); i++) {
@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
}
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
Label* on_equal) {
__ cmp(edi, register_location(register_index));
BranchOrBacktrack(equal, on_equal);
void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmp(edi, Operand(esp, 0));
__ j(not_equal, &fallthrough);
__ add(Operand(esp), Immediate(4)); // Pop.
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
}
@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() {
}
Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many
// registers we need.
@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
LoadCurrentCharToRegister(-1); // Load previous char.
LoadCurrentCharacterUnchecked(-1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ mov(current_character(), '\n');
@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
NULL,
Code::ComputeFlags(Code::REGEXP),
self_);
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)"));
LOG(CodeCreateEvent("RegExp", *code, *(source->ToCString())));
return Handle<Object>::cast(code);
}
@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset * char_size());
BranchOrBacktrack(greater_equal, on_end_of_input);
LoadCurrentCharToRegister(cp_offset);
LoadCurrentCharacterUnchecked(cp_offset);
}
@ -651,9 +657,16 @@ void RegExpMacroAssemblerIA32::Succeed() {
}
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg) {
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
__ mov(register_location(reg), edi);
} else {
__ lea(eax, Operand(edi, cp_offset));
__ mov(register_location(reg), eax);
}
}
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
__ mov(register_location(reg), esp);
@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
}
void RegExpMacroAssemblerIA32::LoadCurrentCharToRegister(int cp_offset) {
void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
if (mode_ == ASCII) {
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
return;

View File

@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg);
template <typename T>
@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Read a character from input at the given offset from the current
// position.
void LoadCurrentCharToRegister(int cp_offset);
// Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers.

View File

@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
if (l == NULL) l = &backtrack_;
if (l->is_bound()) {
Emit32(l->pos());
} else {
@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index) {
int register_index, int cp_offset) {
ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_CP);
Emit(register_index);
Emit32(0); // Current position offset.
Emit32(cp_offset); // Current position offset.
}
@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
}
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition(
int register_index,
Label* on_equal) {
// TODO(erikcorry): Implement.
UNIMPLEMENTED();
void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
Label* on_tos_equals_current_position) {
Emit(BC_CHECK_GREEDY);
EmitOrLink(on_tos_equals_current_position);
}
@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
}
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
int cp_offset) {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) {
Emit(BC_CHECK_LT);
@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
Label* on_failure,
bool check_end_of_string) {
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = str.length() - 1; i >= 0; i--) {
if (check_end_of_string && i == str.length() - 1) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset + i);
EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR);
Emit16(str[i]);
EmitOrLink(on_failure);
@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
}
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() {
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Handle<ByteArray> array = Factory::NewByteArray(length());
Copy(array->GetDataStartAddress());
return array;

View File

@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void PushRegister(int register_index);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
private:
void Expand();
// Code and bitmap emission.
@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int length();
void Copy(Address a);
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
// The program counter.
int pc_;
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
Label backtrack_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp);
};

View File

@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
}
void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) {
PrintF(" CheckGreedyLoop(label[%08x]);\n\n", label);
assembler_->CheckGreedyLoop(label);
}
void RegExpMacroAssemblerTracer::PopCurrentPosition() {
PrintF(" PopCurrentPosition();\n");
assembler_->PopCurrentPosition();
@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
}
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg) {
PrintF(" WriteCurrentPositionToRegister(register=%d);\n", reg);
assembler_->WriteCurrentPositionToRegister(reg);
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
PrintF(" WriteCurrentPositionToRegister(register=%d,cp_offset=%d);\n",
reg,
cp_offset);
assembler_->WriteCurrentPositionToRegister(reg, cp_offset);
}
@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) {
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n", cp_offset,
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
cp_offset,
on_end_of_input);
assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
}
void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
cp_offset);
assembler_->LoadCurrentCharacterUnchecked(cp_offset);
}
void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less);
assembler_->CheckCharacterLT(limit, on_less);
@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
PrintF(" CheckCharacters(str=\"");
Label* on_failure,
bool check_end_of_string) {
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) {
PrintF("u%04x", str[i]);
}
PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure);
assembler_->CheckCharacters(str, cp_offset, on_failure);
}
void RegExpMacroAssemblerTracer::CheckCurrentPosition(int register_index,
Label* on_equal) {
PrintF(" CheckCurrentPosition(register=%d, label[%08x]);\n", register_index,
on_equal);
assembler_->CheckCurrentPosition(register_index, on_equal);
assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
}
@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation
}
Handle<Object> RegExpMacroAssemblerTracer::GetCode() {
PrintF(" GetCode();\n");
return assembler_->GetCode();
Handle<Object> RegExpMacroAssemblerTracer::GetCode(Handle<String> source) {
PrintF(" GetCode(%s);\n", *(source->ToCString()));
return assembler_->GetCode(source);
}
}} // namespace v8::internal

View File

@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg);
private:
RegExpMacroAssembler* assembler_;

View File

@ -62,19 +62,17 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack abnd go to that.
// fail to match then goto the on_failure label. If check_eos is set then
// the end of input always fails. If check_eos is clear then it is the
// caller's responsibility to ensure that the end of string is not hit.
// If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) = 0;
// Check the current input position against a register. If the register is
// equal to the current position then go to the label. If the label is NULL
// then backtrack instead.
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal) = 0;
Label* on_failure,
bool check_eos) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
@ -115,7 +113,7 @@ class RegExpMacroAssembler {
const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0;
virtual Handle<Object> GetCode() = 0;
virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL.
@ -125,6 +123,7 @@ class RegExpMacroAssembler {
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0;
@ -134,7 +133,7 @@ class RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg) = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:

View File

@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) {
if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL;
RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false, multiline);
Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&result, &node, false, multiline, pattern);
return node;
}
@ -520,16 +521,16 @@ TEST(MacroAssembler) {
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
@ -542,7 +543,8 @@ TEST(MacroAssembler) {
v8::HandleScope scope;
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode());
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
Handle<String> f1 =
@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) {
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) {
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail;
Label backtrack;
m.LoadCurrentCharacter(10, &fail);
@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) {
m.Bind(&backtrack);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
m.WriteCurrentPositionToRegister(0);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
Label nomatch;
m.CheckNotBackReference(0, &nomatch);
m.Fail();
@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) {
m.AdvanceCurrentPosition(2);
Label missing_match;
m.CheckNotBackReference(0, &missing_match);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(2, 0);
m.Succeed();
m.Bind(&missing_match);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start);
// Check that prevchar = '\n' and current = 'f'.
@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) {
m.CheckNotCharacter('b', &fail);
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Label fail, succ;
m.WriteCurrentPositionToRegister(0);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3);
m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
Label expected_fail;
@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
m.Fail();
m.Bind(&succ);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source =
Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input =
@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) {
enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
Label fail;
Label backtrack;
m.WriteCurrentPositionToRegister(out1); // Output: [0]
m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
m.PushRegister(out1);
m.PushBacktrack(&backtrack);
m.WriteStackPointerToRegister(sp);
// Fill stack and registers
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(out1);
m.WriteCurrentPositionToRegister(out1, 0);
m.PushRegister(out1);
m.PushBacktrack(&fail);
// Drop backtrack stack frames.
@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) {
m.PopRegister(out1);
m.ReadCurrentPositionFromRegister(out1);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(out2); // [0,3]
m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
Label loop;
m.SetRegister(loop_cnt, 0); // loop counter
@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, 1);
m.AdvanceCurrentPosition(1);
m.IfRegisterLT(loop_cnt, 3, &loop);
m.WriteCurrentPositionToRegister(out3); // [0,3,6]
m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
Label loop2;
m.SetRegister(loop_cnt, 2); // loop counter
@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, -1);
m.AdvanceCurrentPosition(1);
m.IfRegisterGE(loop_cnt, 0, &loop2);
m.WriteCurrentPositionToRegister(out4); // [0,3,6,9]
m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
Label loop3;
Label exit_loop3;
m.PushRegister(out4);
m.PushRegister(out4);
m.ReadCurrentPositionFromRegister(out3);
m.Bind(&loop3);
m.AdvanceCurrentPosition(1);
m.CheckCurrentPosition(out4, &exit_loop3);
m.CheckGreedyLoop(&exit_loop3);
m.GoTo(&loop3);
m.Bind(&exit_loop3);
m.WriteCurrentPositionToRegister(out5); // [0,3,6,9,9]
m.PopCurrentPosition();
m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9]
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source =
Factory::NewStringFromAscii(CStrVector("<loop test>"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
// String long enough for test (content doesn't matter).
@ -1291,5 +1300,5 @@ TEST(CharClassDifference) {
TEST(Graph) {
V8::Initialize(NULL);
Execute("\\b\\w", false, true);
Execute("(?=[d#.])", false, true);
}

View File

@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) {
}
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
// Check that we don't read past the end of the string.
assertFalse(/f/.test('b'));
assertFalse(/[abc]f/.test('x'));
assertFalse(/[abc]f/.test('xa'));
assertFalse(/[abc]</.test('x'));
assertFalse(/[abc]</.test('xa'));
assertFalse(/f/i.test('b'));
assertFalse(/[abc]f/i.test('x'));
assertFalse(/[abc]f/i.test('xa'));
assertFalse(/[abc]</i.test('x'));
assertFalse(/[abc]</i.test('xa'));
assertFalse(/f[abc]/.test('x'));
assertFalse(/f[abc]/.test('xa'));
assertFalse(/<[abc]/.test('x'));
assertFalse(/<[abc]/.test('xa'));
assertFalse(/f[abc]/i.test('x'));
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));