From 1a0bb51069881e331d5c6489d2f1b8df4d2845a1 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Mon, 26 Apr 2010 15:10:42 +0000 Subject: [PATCH] Fix bug in word-boundary-lookahead followed by end-of-input assertion. Review URL: http://codereview.chromium.org/1712013 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4504 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/jsregexp.cc | 19 +++++------ src/regexp-macro-assembler-tracer.cc | 4 +-- test/mjsunit/regexp.js | 48 ++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 0ae8bd34df..9a1f1f114c 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -4872,17 +4872,18 @@ void Analysis::VisitAssertion(AssertionNode* that) { SetRelation word_relation = CharacterRange::WordCharacterRelation(following_chars); - if (word_relation.ContainedIn()) { - // Following character is definitely a word character. - type = (type == AssertionNode::AT_BOUNDARY) ? - AssertionNode::AFTER_NONWORD_CHARACTER : - AssertionNode::AFTER_WORD_CHARACTER; - that->set_type(type); - } else if (word_relation.Disjoint()) { + if (word_relation.Disjoint()) { + // Includes the case where following_chars is empty (e.g., end-of-input). // Following character is definitely *not* a word character. type = (type == AssertionNode::AT_BOUNDARY) ? - AssertionNode::AFTER_WORD_CHARACTER : - AssertionNode::AFTER_NONWORD_CHARACTER; + AssertionNode::AFTER_WORD_CHARACTER : + AssertionNode::AFTER_NONWORD_CHARACTER; + that->set_type(type); + } else if (word_relation.ContainedIn()) { + // Following character is definitely a word character. + type = (type == AssertionNode::AT_BOUNDARY) ? + AssertionNode::AFTER_NONWORD_CHARACTER : + AssertionNode::AFTER_WORD_CHARACTER; that->set_type(type); } } diff --git a/src/regexp-macro-assembler-tracer.cc b/src/regexp-macro-assembler-tracer.cc index c5c2919c3f..522042262f 100644 --- a/src/regexp-macro-assembler-tracer.cc +++ b/src/regexp-macro-assembler-tracer.cc @@ -37,8 +37,8 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer( RegExpMacroAssembler* assembler) : assembler_(assembler) { unsigned int type = assembler->Implementation(); - ASSERT(type < 3); - const char* impl_names[3] = {"IA32", "ARM", "Bytecode"}; + ASSERT(type < 4); + const char* impl_names[3] = {"IA32", "ARM", "X64", "Bytecode"}; PrintF("RegExpMacroAssembler%s();\n", impl_names[type]); } diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js index c8dcc6fe89..a8891969f4 100644 --- a/test/mjsunit/regexp.js +++ b/test/mjsunit/regexp.js @@ -436,3 +436,51 @@ assertTrue(re.multiline); assertEquals(0, re.lastIndex); assertEquals(37, re.someOtherProperty); assertEquals(37, re[42]); + +// Test boundary-checks. +function assertRegExpTest(re, input, test) { + assertEquals(test, re.test(input), "test:" + re + ":" + input); +} + +assertRegExpTest(/b\b/, "b", true); +assertRegExpTest(/b\b$/, "b", true); +assertRegExpTest(/\bb/, "b", true); +assertRegExpTest(/^\bb/, "b", true); +assertRegExpTest(/,\b/, ",", false); +assertRegExpTest(/,\b$/, ",", false); +assertRegExpTest(/\b,/, ",", false); +assertRegExpTest(/^\b,/, ",", false); + +assertRegExpTest(/b\B/, "b", false); +assertRegExpTest(/b\B$/, "b", false); +assertRegExpTest(/\Bb/, "b", false); +assertRegExpTest(/^\Bb/, "b", false); +assertRegExpTest(/,\B/, ",", true); +assertRegExpTest(/,\B$/, ",", true); +assertRegExpTest(/\B,/, ",", true); +assertRegExpTest(/^\B,/, ",", true); + +assertRegExpTest(/b\b/, "b,", true); +assertRegExpTest(/b\b/, "ba", false); +assertRegExpTest(/b\B/, "b,", false); +assertRegExpTest(/b\B/, "ba", true); + +assertRegExpTest(/b\Bb/, "bb", true); +assertRegExpTest(/b\bb/, "bb", false); + +assertRegExpTest(/b\b[,b]/, "bb", false); +assertRegExpTest(/b\B[,b]/, "bb", true); +assertRegExpTest(/b\b[,b]/, "b,", true); +assertRegExpTest(/b\B[,b]/, "b,", false); + +assertRegExpTest(/[,b]\bb/, "bb", false); +assertRegExpTest(/[,b]\Bb/, "bb", true); +assertRegExpTest(/[,b]\bb/, ",b", true); +assertRegExpTest(/[,b]\Bb/, ",b", false); + +assertRegExpTest(/[,b]\b[,b]/, "bb", false); +assertRegExpTest(/[,b]\B[,b]/, "bb", true); +assertRegExpTest(/[,b]\b[,b]/, ",b", true); +assertRegExpTest(/[,b]\B[,b]/, ",b", false); +assertRegExpTest(/[,b]\b[,b]/, "b,", true); +assertRegExpTest(/[,b]\B[,b]/, "b,", false);