[regexp] Throw for patterns like /[\p{...}-\p{...}]/u.

Bug: v8:4743
Change-Id: Iacb7681e679faa1ece77c577a2585363f6ef87a2
Reviewed-on: https://chromium-review.googlesource.com/582010
Commit-Queue: Yang Guo <yangguo@chromium.org>
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#46857}
This commit is contained in:
Yang Guo 2017-07-25 09:38:09 +02:00 committed by Commit Bot
parent dc778a3dc5
commit 7924985f9f
6 changed files with 58 additions and 74 deletions

View File

@ -5843,7 +5843,7 @@ static void AddClassNegated(const int *elmv,
ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
}
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents,
Zone* zone) {
if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
@ -5866,7 +5866,7 @@ void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
AddClassEscape(type, ranges, zone);
}
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone) {
switch (type) {
case 's':

View File

@ -80,10 +80,10 @@ class CharacterRange {
CharacterRange() : from_(0), to_(0) {}
// For compatibility with the CHECK_OK macro
CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
Zone* zone);
// Add class escapes. Add case equivalent closure for \w and \W if necessary.
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents, Zone* zone);
static Vector<const int> GetWordBounds();
static inline CharacterRange Singleton(uc32 value) {

View File

@ -1476,11 +1476,12 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
return 0;
}
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
DCHECK_EQ(0, *char_class);
uc32 first = current();
if (first == '\\') {
void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
Zone* zone,
bool add_unicode_case_equivalents,
uc32* char_out, bool* is_class_escape) {
uc32 current_char = current();
if (current_char == '\\') {
switch (Next()) {
case 'w':
case 'W':
@ -1488,57 +1489,37 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
case 'D':
case 's':
case 'S': {
*char_class = Next();
CharacterRange::AddClassEscape(static_cast<char>(Next()), ranges,
add_unicode_case_equivalents, zone);
Advance(2);
return CharacterRange::Singleton(0); // Return dummy value.
*is_class_escape = true;
return;
}
case kEndMarker:
return ReportError(CStrVector("\\ at end of pattern"));
ReportError(CStrVector("\\ at end of pattern"));
return;
case 'p':
case 'P':
if (FLAG_harmony_regexp_property && unicode()) {
bool negate = Next() == 'P';
Advance(2);
if (!ParsePropertyClass(ranges, negate)) {
ReportError(CStrVector("Invalid property name in character class"));
}
*is_class_escape = true;
return;
}
break;
default:
first = ParseClassCharacterEscape(CHECK_FAILED);
break;
}
*char_out = ParseClassCharacterEscape();
*is_class_escape = false;
} else {
Advance();
*char_out = current_char;
*is_class_escape = false;
}
return CharacterRange::Singleton(first);
}
static const uc16 kNoCharClass = 0;
// Adds range or pre-defined character class to character ranges.
// If char_class is not kInvalidClass, it's interpreted as a class
// escape (i.e., 's' means whitespace, from '\s').
static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
uc16 char_class, CharacterRange range,
bool add_unicode_case_equivalents,
Zone* zone) {
if (char_class != kNoCharClass) {
CharacterRange::AddClassEscape(char_class, ranges,
add_unicode_case_equivalents, zone);
} else {
ranges->Add(range, zone);
}
}
bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
if (!FLAG_harmony_regexp_property) return false;
if (!unicode()) return false;
if (current() != '\\') return false;
uc32 next = Next();
bool parse_success = false;
if (next == 'p') {
Advance(2);
parse_success = ParsePropertyClass(ranges, false);
} else if (next == 'P') {
Advance(2);
parse_success = ParsePropertyClass(ranges, true);
} else {
return false;
}
if (!parse_success)
ReportError(CStrVector("Invalid property name in character class"));
return parse_success;
}
RegExpTree* RegExpParser::ParseCharacterClass() {
@ -1557,10 +1538,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
new (zone()) ZoneList<CharacterRange>(2, zone());
bool add_unicode_case_equivalents = unicode() && ignore_case();
while (has_more() && current() != ']') {
bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);
if (parsed_property) continue;
uc16 char_class = kNoCharClass;
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
uc32 char_1, char_2;
bool is_class_1, is_class_2;
ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
&is_class_1 CHECK_FAILED);
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
@ -1568,34 +1549,30 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
// following code report an error.
break;
} else if (current() == ']') {
AddRangeOrEscape(ranges, char_class, first,
add_unicode_case_equivalents, zone());
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
ranges->Add(CharacterRange::Singleton('-'), zone());
break;
}
uc16 char_class_2 = kNoCharClass;
CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
&is_class_2 CHECK_FAILED);
if (is_class_1 || is_class_2) {
// Either end is an escaped character class. Treat the '-' verbatim.
if (unicode()) {
// ES2015 21.2.2.15.1 step 1.
return ReportError(CStrVector(kRangeInvalid));
}
AddRangeOrEscape(ranges, char_class, first,
add_unicode_case_equivalents, zone());
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
ranges->Add(CharacterRange::Singleton('-'), zone());
AddRangeOrEscape(ranges, char_class_2, next,
add_unicode_case_equivalents, zone());
if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
continue;
}
// ES2015 21.2.2.15.1 step 6.
if (first.from() > next.to()) {
if (char_1 > char_2) {
return ReportError(CStrVector(kRangeOutOfOrder));
}
ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
ranges->Add(CharacterRange::Range(char_1, char_2), zone());
} else {
AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents,
zone());
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
}
}
if (!has_more()) {

View File

@ -184,8 +184,14 @@ class RegExpParser BASE_EMBEDDED {
// can be reparsed.
bool ParseBackReferenceIndex(int* index_out);
bool ParseClassProperty(ZoneList<CharacterRange>* result);
CharacterRange ParseClassAtom(uc16* char_class);
// Parse inside a class. Either add escaped class to the range, or return
// false and pass parsed single character through |char_out|.
void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
bool add_unicode_case_equivalents, uc32* char_out,
bool* is_class_escape);
char ParseClassEscape();
RegExpTree* ReportError(Vector<const char> message);
void Advance();
void Advance(int dist);

View File

@ -222,8 +222,8 @@ void TestRegExpParser(bool lookbehind) {
CheckParseEq("[\\d]", "[0-9]");
CheckParseEq("[x\\dz]", "[x 0-9 z]");
CheckParseEq("[\\d-z]", "[0-9 - z]");
CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
CheckParseEq("[z-\\d]", "[z - 0-9]");
CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
CheckParseEq("[z-\\d]", "[0-9 z -]");
// Control character outside character class.
CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
CheckParseEq("\\c!", "'\\c!'");

View File

@ -9,9 +9,10 @@ assertThrows("/[\\p{garbage}]/u");
assertThrows("/[\\p{}]/u");
assertThrows("/[\\p{]/u");
assertThrows("/[\\p}]/u");
assertThrows("/^[\\p{Lu}-\\p{Ll}]+$/u");
assertTrue(/^[\p{Lu}\p{Ll}]+$/u.test("ABCabc"));
assertTrue(/^[\p{Lu}-\p{Ll}]+$/u.test("ABC-abc"));
assertTrue(/^[\p{Lu}-]+$/u.test("ABC-"));
assertFalse(/^[\P{Lu}\p{Ll}]+$/u.test("ABCabc"));
assertTrue(/^[\P{Lu}\p{Ll}]+$/u.test("abc"));
assertTrue(/^[\P{Lu}]+$/u.test("abc123"));