Fixed some assertion propagation issues and added non-multiline $

propagation.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@856 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
christian.plesner.hansen@gmail.com 2008-11-27 10:35:06 +00:00
parent ef59fde8de
commit 4213b46b1d
5 changed files with 145 additions and 88 deletions

View File

@ -218,7 +218,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
FlattenString(pattern);
RegExpParseResult parse_result;
FlatStringReader reader(pattern);
if (!ParseRegExp(&reader, &parse_result)) {
if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
// Throw an exception if we fail to parse the pattern.
ThrowRegExpException(re,
pattern,
@ -241,7 +241,8 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<FixedArray> irregexp_data =
RegExpEngine::Compile(&parse_result,
&node,
flags.is_ignore_case());
flags.is_ignore_case(),
flags.is_multiline());
if (irregexp_data.is_null()) {
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
@ -980,7 +981,8 @@ bool RegExpNode::GoTo(RegExpCompiler* compiler) {
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
info_.follows_start_interest) {
info_.follows_start_interest ||
info_.at_end) {
return false;
}
if (label_.is_bound()) {
@ -1004,7 +1006,8 @@ bool RegExpNode::GoTo(RegExpCompiler* compiler) {
bool EndNode::GoTo(RegExpCompiler* compiler) {
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
info()->follows_start_interest) {
info()->follows_start_interest ||
info()->at_end) {
return false;
}
if (!label()->is_bound()) {
@ -1629,14 +1632,21 @@ void DotPrinter::PrintAttributes(RegExpNode* that) {
"fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{",
that);
NodeInfo* info = that->info();
stream()->Add("{NI|%i}|{SI|%i}|{WI|%i}",
stream()->Add("{NI|%i}|{WI|%i}|{SI|%i}",
info->follows_newline_interest,
info->follows_start_interest,
info->follows_word_interest);
stream()->Add("|{DN|%i}|{DS|%i}|{DW|%i}",
info->follows_word_interest,
info->follows_start_interest);
stream()->Add("|{DN|%i}|{DW|%i}|{DS|%i}|{AE|%i}",
info->determine_newline,
info->determine_word,
info->determine_start,
info->determine_word);
info->at_end);
if (info->follows_newline != NodeInfo::UNKNOWN)
stream()->Add("|{FN|%i}", info->follows_newline);
if (info->follows_word != NodeInfo::UNKNOWN)
stream()->Add("|{FW|%i}", info->follows_word);
if (info->follows_start != NodeInfo::UNKNOWN)
stream()->Add("|{FS|%i}", info->follows_start);
Label* label = that->label();
if (label->is_bound())
stream()->Add("|{@|%x}", label->pos());
@ -1924,11 +1934,14 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
case BOUNDARY: case NON_BOUNDARY:
info.follows_word_interest = true;
break;
case END_OF_LINE: case END_OF_INPUT:
case END_OF_INPUT:
info.at_end = true;
break;
case END_OF_LINE:
// This is wrong but has the effect of making the compiler abort.
info.follows_start_interest = true;
info.at_end = true;
}
return on_success->PropagateInterest(&info);
return on_success->PropagateForward(&info);
}
@ -2216,7 +2229,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {
for (int i = 0; i < siblings_.length(); i++) {
RegExpNode* sibling = siblings_.Get(i);
if (sibling->info()->SameInterests(info))
if (sibling->info()->HasSameForwardInterests(info))
return sibling;
}
return NULL;
@ -2225,58 +2238,64 @@ RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {
template <class C>
static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
RegExpNode* sibling = node->GetSibling(info);
NodeInfo full_info(*node->info());
full_info.AddFromPreceding(info);
RegExpNode* sibling = node->GetSibling(&full_info);
if (sibling != NULL) return sibling;
node->EnsureSiblings();
sibling = new C(*node);
sibling->info()->AdoptInterests(info);
sibling->info()->AddFromPreceding(&full_info);
node->AddSibling(sibling);
return sibling;
}
RegExpNode* ActionNode::PropagateInterest(NodeInfo* info) {
RegExpNode* sibling = GetSibling(info);
RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
RegExpNode* sibling = GetSibling(&full_info);
if (sibling != NULL) return sibling;
EnsureSiblings();
ActionNode* action = new ActionNode(*this);
action->info()->AdoptInterests(info);
action->info()->AddFromPreceding(&full_info);
AddSibling(action);
action->set_on_success(action->on_success()->PropagateInterest(info));
action->set_on_success(action->on_success()->PropagateForward(info));
return action;
}
RegExpNode* ChoiceNode::PropagateInterest(NodeInfo* info) {
RegExpNode* sibling = GetSibling(info);
RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
RegExpNode* sibling = GetSibling(&full_info);
if (sibling != NULL) return sibling;
EnsureSiblings();
ChoiceNode* choice = new ChoiceNode(*this);
choice->info()->AdoptInterests(info);
choice->info()->AddFromPreceding(&full_info);
AddSibling(choice);
ZoneList<GuardedAlternative>* old_alternatives = alternatives();
int count = old_alternatives->length();
choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
for (int i = 0; i < count; i++) {
GuardedAlternative alternative = old_alternatives->at(i);
alternative.set_node(alternative.node()->PropagateInterest(info));
alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
return choice;
}
RegExpNode* EndNode::PropagateInterest(NodeInfo* info) {
RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
RegExpNode* BackReferenceNode::PropagateInterest(NodeInfo* info) {
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
RegExpNode* TextNode::PropagateInterest(NodeInfo* info) {
RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
@ -2455,15 +2474,21 @@ void Analysis::VisitText(TextNode* that) {
}
EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
NodeInfo* info = that->info();
NodeInfo* next_info = that->on_success()->info();
// If the following node is interested in what it follows then this
// node must determine it.
info->determine_newline = next_info->follows_newline_interest;
info->determine_word = next_info->follows_word_interest;
info->determine_start = next_info->follows_start_interest;
}
void Analysis::VisitAction(ActionNode* that) {
RegExpNode* next = that->on_success();
EnsureAnalyzed(next);
that->info()->determine_newline = next->info()->prev_determine_newline();
that->info()->determine_word = next->info()->prev_determine_word();
that->info()->determine_start = next->info()->prev_determine_start();
EnsureAnalyzed(that->on_success());
// If the next node is interested in what it follows then this node
// has to be interested too so it can pass the information on.
that->info()->AddFromFollowing(that->on_success()->info());
}
@ -2472,9 +2497,9 @@ void Analysis::VisitChoice(ChoiceNode* that) {
for (int i = 0; i < that->alternatives()->length(); i++) {
RegExpNode* node = that->alternatives()->at(i).node();
EnsureAnalyzed(node);
info->determine_newline |= node->info()->prev_determine_newline();
info->determine_word |= node->info()->prev_determine_word();
info->determine_start |= node->info()->prev_determine_start();
// Anything the following nodes need to know has to be known by
// this node also, so it can pass it on.
info->AddFromFollowing(node->info());
}
if (!that->table_calculated()) {
DispatchTableConstructor cons(that->table());
@ -2607,7 +2632,8 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case) {
bool ignore_case,
bool is_multiline) {
RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,

View File

@ -437,6 +437,10 @@ class TextElement {
struct NodeInfo {
enum PrecedingInfo {
UNKNOWN = -1, FALSE = 0, TRUE = 1
};
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
@ -445,34 +449,59 @@ struct NodeInfo {
determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
follows_start_interest(false) { }
bool SameInterests(NodeInfo* that) {
return (follows_word_interest == that->follows_word_interest)
follows_start_interest(false),
at_end(false),
follows_word(UNKNOWN),
follows_newline(UNKNOWN),
follows_start(UNKNOWN) { }
bool HasSameForwardInterests(NodeInfo* that) {
return (at_end == that->at_end)
&& (follows_word_interest == that->follows_word_interest)
&& (follows_newline_interest == that->follows_newline_interest)
&& (follows_start_interest == that->follows_start_interest);
}
void AdoptInterests(NodeInfo* that) {
follows_word_interest = that->follows_word_interest;
follows_newline_interest = that->follows_newline_interest;
follows_start_interest = that->follows_start_interest;
// Updates the interests of this node given the interests of the
// node preceding it.
void AddFromPreceding(NodeInfo* that) {
at_end |= that->at_end;
follows_word_interest |= that->follows_word_interest;
follows_newline_interest |= that->follows_newline_interest;
follows_start_interest |= that->follows_start_interest;
}
bool prev_determine_word() {
return determine_word || follows_word_interest;
}
bool prev_determine_newline() {
return determine_newline || follows_newline_interest;
}
bool prev_determine_start() {
return determine_start || follows_start_interest;
// Sets the interests of this node to include the interests of the
// following node.
void AddFromFollowing(NodeInfo* that) {
follows_word_interest |= that->follows_word_interest;
follows_newline_interest |= that->follows_newline_interest;
follows_start_interest |= that->follows_start_interest;
}
bool being_analyzed: 1;
bool been_analyzed: 1;
// These bits are set if this node must propagate forward information
// about the last character it consumed (or, in the case of 'start',
// if it is at the start of the input).
bool determine_word: 1;
bool determine_newline: 1;
bool determine_start: 1;
// These bits are set of this node has to know what the preceding
// character was.
bool follows_word_interest: 1;
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
bool at_end: 1;
// These bits are set if the node can make assumptions about what
// the previous character was.
PrecedingInfo follows_word: 2;
PrecedingInfo follows_newline: 2;
PrecedingInfo follows_start: 2;
};
@ -511,7 +540,13 @@ class RegExpNode: public ZoneObject {
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
// character is a letter xor the previous character was a letter.
virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
RegExpNode* GetSibling(NodeInfo* info);
@ -558,7 +593,7 @@ class ActionNode: public SeqRegExpNode {
static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual RegExpNode* PropagateForward(NodeInfo* info);
private:
union {
struct {
@ -592,7 +627,7 @@ class TextNode: public SeqRegExpNode {
on_failure_(on_failure),
elms_(elms) { }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual RegExpNode* PropagateForward(NodeInfo* info);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
@ -618,7 +653,7 @@ class BackReferenceNode: public SeqRegExpNode {
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual RegExpNode* PropagateForward(NodeInfo* info);
private:
RegExpNode* on_failure_;
int start_reg_;
@ -632,7 +667,7 @@ class EndNode: public RegExpNode {
explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
private:
@ -683,7 +718,7 @@ class ChoiceNode: public RegExpNode {
DispatchTable* table() { return &table_; }
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateInterest(NodeInfo* info);
virtual RegExpNode* PropagateForward(NodeInfo* info);
bool table_calculated() { return table_calculated_; }
void set_table_calculated(bool b) { table_calculated_ = b; }
bool being_calculated() { return being_calculated_; }
@ -770,7 +805,8 @@ class RegExpEngine: public AllStatic {
public:
static Handle<FixedArray> Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case);
bool ignore_case,
bool multiline);
static void DotPrint(const char* label, RegExpNode* node);
};

View File

@ -545,7 +545,7 @@ class RegExpParser {
bool CaptureAvailable(int index);
uc32 current_;
bool has_more_;
bool multiline_mode_;
bool multiline_;
int next_pos_;
FlatStringReader* in_;
Handle<String>* error_;
@ -3499,10 +3499,10 @@ Expression* Parser::NewThrowError(Handle<String> constructor,
RegExpParser::RegExpParser(FlatStringReader* in,
Handle<String>* error,
bool multiline_mode)
bool multiline)
: current_(kEndMarker),
has_more_(true),
multiline_mode_(multiline_mode),
multiline_(multiline),
next_pos_(0),
in_(in),
error_(error),
@ -3617,16 +3617,16 @@ RegExpTree* RegExpParser::ParseDisjunction(bool* ok) {
case '^': {
Advance();
RegExpAssertion::Type type =
multiline_mode_ ? RegExpAssertion::START_OF_LINE :
RegExpAssertion::START_OF_INPUT;
multiline_ ? RegExpAssertion::START_OF_LINE :
RegExpAssertion::START_OF_INPUT;
builder.AddAssertion(new RegExpAssertion(type));
continue;
}
case '$': {
Advance();
RegExpAssertion::Type type =
multiline_mode_ ? RegExpAssertion::END_OF_LINE :
RegExpAssertion::END_OF_INPUT;
multiline_ ? RegExpAssertion::END_OF_LINE :
RegExpAssertion::END_OF_INPUT;
builder.AddAssertion(new RegExpAssertion(type));
continue;
}
@ -4294,10 +4294,11 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
}
bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
bool ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpParseResult* result) {
ASSERT(result != NULL);
// TODO(plesner): Get multiline flag somehow
RegExpParser parser(input, &result->error, false);
RegExpParser parser(input, &result->error, multiline);
bool ok = true;
result->tree = parser.ParsePattern(&ok);
if (!ok) {

View File

@ -145,7 +145,9 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
v8::Extension* extension);
bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
bool ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpParseResult* result);
// Support for doing lazy compilation. The script is the script containing full

View File

@ -56,7 +56,7 @@ static SmartPointer<const char> Parse(const char* input) {
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
SmartPointer<const char> output = result.tree->ToString();
@ -69,7 +69,7 @@ static bool ParseEscapes(const char* input) {
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK(v8::internal::ParseRegExp(&reader, &result));
CHECK(v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.has_character_escapes;
@ -257,7 +257,7 @@ static void ExpectError(const char* input,
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@ -373,23 +373,23 @@ TEST(CharacterClassEscapes) {
}
static RegExpNode* Compile(const char* input) {
static RegExpNode* Compile(const char* input, bool multiline) {
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
if (!v8::internal::ParseRegExp(&reader, &result))
if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL;
RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false);
RegExpEngine::Compile(&result, &node, false, multiline);
return node;
}
static void Execute(const char* input,
const char* str,
bool multiline,
bool dot_output = false) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile(input);
RegExpNode* node = Compile(input, multiline);
USE(node);
#ifdef DEBUG
if (dot_output) {
@ -400,14 +400,6 @@ static void Execute(const char* input,
}
TEST(Execution) {
V8::Initialize(NULL);
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
}
class TestConfig {
public:
typedef int Key;
@ -1043,8 +1035,8 @@ TEST(LatinCanonicalize) {
TEST(SimplePropagation) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
RegExpNode* node = Compile("(a|^b|c)");
CHECK(node->info()->determine_start);
RegExpNode* node = Compile("(a|^b|c)", false);
CHECK(node->info()->follows_start_interest);
}
@ -1061,7 +1053,7 @@ static uc32 CanonRange(uc32 c) {
TEST(RangeCanonicalization) {
ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
CHECK((CanonRange(0) & CharacterRange::kStartMarker) != 0);
// Check that we arrive at the same result when using the basic
// range canonicalization primitives as when using immediate
// canonicalization.
@ -1175,5 +1167,5 @@ TEST(CharacterRangeCaseIndependence) {
TEST(Graph) {
V8::Initialize(NULL);
Execute("\\w+", "", true);
Execute("(?:foo|bar$)", false, true);
}