Allow the non-spec [a-\d] (i.e., ending a range with a character class) in RegExps.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@877 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2008-12-01 14:29:28 +00:00
parent 2110b71373
commit b07b40b335
2 changed files with 35 additions and 38 deletions

View File

@ -521,8 +521,7 @@ class RegExpParser {
// can be reparsed.
bool ParseBackReferenceIndex(int* index_out);
CharacterRange ParseClassAtom(bool* is_char_class,
ZoneList<CharacterRange>* ranges,
CharacterRange ParseClassAtom(uc16* char_class,
bool* ok);
RegExpTree* ReportError(Vector<const char> message, bool* ok);
void Advance();
@ -4158,19 +4157,15 @@ RegExpTree* RegExpParser::ParseGroup(bool* ok) {
}
CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,
ZoneList<CharacterRange>* ranges,
bool* ok) {
ASSERT_EQ(false, *is_char_class);
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class, bool* ok) {
ASSERT_EQ(0, *char_class);
uc32 first = current();
if (first == '\\') {
switch (Next()) {
case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
*is_char_class = true;
uc32 c = Next();
CharacterRange::AddClassEscape(c, ranges);
*char_class = Next();
Advance(2);
return NULL;
return CharacterRange::Singleton(0); // Return dummy value.
}
default:
uc32 c = ParseClassCharacterEscape(CHECK_OK);
@ -4185,7 +4180,6 @@ CharacterRange RegExpParser::ParseClassAtom(bool* is_char_class,
RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
static const char* kUnterminated = "Unterminated character class";
static const char* kIllegal = "Illegal character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
ASSERT_EQ(current(), '[');
@ -4197,32 +4191,36 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
}
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') {
bool is_char_class = false;
CharacterRange first = ParseClassAtom(&is_char_class, ranges, CHECK_OK);
if (!is_char_class) {
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
// If we reach the end we break out of the loop and let the
// following code report an error.
break;
} else if (current() == ']') {
ranges->Add(first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
CharacterRange next =
ParseClassAtom(&is_char_class, ranges, CHECK_OK);
if (is_char_class) {
return ReportError(CStrVector(kIllegal), CHECK_OK);
}
if (first.from() > next.to()) {
return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
uc16 char_class = 0;
CharacterRange first = ParseClassAtom(&char_class, CHECK_OK);
if (char_class) {
CharacterRange::AddClassEscape(char_class, ranges);
continue;
}
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
// If we reach the end we break out of the loop and let the
// following code report an error.
break;
} else if (current() == ']') {
ranges->Add(first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
CharacterRange next = ParseClassAtom(&char_class, CHECK_OK);
if (char_class) {
ranges->Add(first);
ranges->Add(CharacterRange::Singleton('-'));
CharacterRange::AddClassEscape(char_class, ranges);
continue;
}
if (first.from() > next.to()) {
return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
ranges->Add(first);
}
}
if (!has_more()) {
@ -4230,7 +4228,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
}
Advance();
if (ranges->length() == 0) {
ranges->Add(CharacterRange::Range(0, 0xffff));
ranges->Add(CharacterRange::Everything());
is_negated = !is_negated;
}
return new RegExpCharacterClass(ranges, is_negated);

View File

@ -126,6 +126,7 @@ TEST(Parser) {
CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
"'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CHECK_PARSE_EQ("\\c!", "'c!'");
@ -275,8 +276,6 @@ TEST(Errors) {
const char* kUnterminatedCharacterClass = "Unterminated character class";
ExpectError("[", kUnterminatedCharacterClass);
ExpectError("[a-", kUnterminatedCharacterClass);
const char* kIllegalCharacterClass = "Illegal character class";
ExpectError("[a-\\w]", kIllegalCharacterClass);
const char* kEndControl = "\\c at end of pattern";
ExpectError("\\c", kEndControl);
const char* kNothingToRepeat = "Nothing to repeat";