[regexp] Throw for patterns like /[\p{...}-\p{...}]/u.
Bug: v8:4743 Change-Id: Iacb7681e679faa1ece77c577a2585363f6ef87a2 Reviewed-on: https://chromium-review.googlesource.com/582010 Commit-Queue: Yang Guo <yangguo@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#46857}
This commit is contained in:
parent
dc778a3dc5
commit
7924985f9f
@ -5843,7 +5843,7 @@ static void AddClassNegated(const int *elmv,
|
||||
ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
|
||||
}
|
||||
|
||||
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
|
||||
void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
|
||||
bool add_unicode_case_equivalents,
|
||||
Zone* zone) {
|
||||
if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
|
||||
@ -5866,7 +5866,7 @@ void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
|
||||
AddClassEscape(type, ranges, zone);
|
||||
}
|
||||
|
||||
void CharacterRange::AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
|
||||
void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone) {
|
||||
switch (type) {
|
||||
case 's':
|
||||
|
@ -80,10 +80,10 @@ class CharacterRange {
|
||||
CharacterRange() : from_(0), to_(0) {}
|
||||
// For compatibility with the CHECK_OK macro
|
||||
CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
|
||||
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
|
||||
static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone);
|
||||
// Add class escapes. Add case equivalent closure for \w and \W if necessary.
|
||||
static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges,
|
||||
static void AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
|
||||
bool add_unicode_case_equivalents, Zone* zone);
|
||||
static Vector<const int> GetWordBounds();
|
||||
static inline CharacterRange Singleton(uc32 value) {
|
||||
|
@ -1476,11 +1476,12 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
|
||||
DCHECK_EQ(0, *char_class);
|
||||
uc32 first = current();
|
||||
if (first == '\\') {
|
||||
void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
|
||||
Zone* zone,
|
||||
bool add_unicode_case_equivalents,
|
||||
uc32* char_out, bool* is_class_escape) {
|
||||
uc32 current_char = current();
|
||||
if (current_char == '\\') {
|
||||
switch (Next()) {
|
||||
case 'w':
|
||||
case 'W':
|
||||
@ -1488,57 +1489,37 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
|
||||
case 'D':
|
||||
case 's':
|
||||
case 'S': {
|
||||
*char_class = Next();
|
||||
CharacterRange::AddClassEscape(static_cast<char>(Next()), ranges,
|
||||
add_unicode_case_equivalents, zone);
|
||||
Advance(2);
|
||||
return CharacterRange::Singleton(0); // Return dummy value.
|
||||
*is_class_escape = true;
|
||||
return;
|
||||
}
|
||||
case kEndMarker:
|
||||
return ReportError(CStrVector("\\ at end of pattern"));
|
||||
ReportError(CStrVector("\\ at end of pattern"));
|
||||
return;
|
||||
case 'p':
|
||||
case 'P':
|
||||
if (FLAG_harmony_regexp_property && unicode()) {
|
||||
bool negate = Next() == 'P';
|
||||
Advance(2);
|
||||
if (!ParsePropertyClass(ranges, negate)) {
|
||||
ReportError(CStrVector("Invalid property name in character class"));
|
||||
}
|
||||
*is_class_escape = true;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
first = ParseClassCharacterEscape(CHECK_FAILED);
|
||||
break;
|
||||
}
|
||||
*char_out = ParseClassCharacterEscape();
|
||||
*is_class_escape = false;
|
||||
} else {
|
||||
Advance();
|
||||
*char_out = current_char;
|
||||
*is_class_escape = false;
|
||||
}
|
||||
|
||||
return CharacterRange::Singleton(first);
|
||||
}
|
||||
|
||||
static const uc16 kNoCharClass = 0;
|
||||
|
||||
// Adds range or pre-defined character class to character ranges.
|
||||
// If char_class is not kInvalidClass, it's interpreted as a class
|
||||
// escape (i.e., 's' means whitespace, from '\s').
|
||||
static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
|
||||
uc16 char_class, CharacterRange range,
|
||||
bool add_unicode_case_equivalents,
|
||||
Zone* zone) {
|
||||
if (char_class != kNoCharClass) {
|
||||
CharacterRange::AddClassEscape(char_class, ranges,
|
||||
add_unicode_case_equivalents, zone);
|
||||
} else {
|
||||
ranges->Add(range, zone);
|
||||
}
|
||||
}
|
||||
|
||||
bool RegExpParser::ParseClassProperty(ZoneList<CharacterRange>* ranges) {
|
||||
if (!FLAG_harmony_regexp_property) return false;
|
||||
if (!unicode()) return false;
|
||||
if (current() != '\\') return false;
|
||||
uc32 next = Next();
|
||||
bool parse_success = false;
|
||||
if (next == 'p') {
|
||||
Advance(2);
|
||||
parse_success = ParsePropertyClass(ranges, false);
|
||||
} else if (next == 'P') {
|
||||
Advance(2);
|
||||
parse_success = ParsePropertyClass(ranges, true);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
if (!parse_success)
|
||||
ReportError(CStrVector("Invalid property name in character class"));
|
||||
return parse_success;
|
||||
}
|
||||
|
||||
RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
@ -1557,10 +1538,10 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
new (zone()) ZoneList<CharacterRange>(2, zone());
|
||||
bool add_unicode_case_equivalents = unicode() && ignore_case();
|
||||
while (has_more() && current() != ']') {
|
||||
bool parsed_property = ParseClassProperty(ranges CHECK_FAILED);
|
||||
if (parsed_property) continue;
|
||||
uc16 char_class = kNoCharClass;
|
||||
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
|
||||
uc32 char_1, char_2;
|
||||
bool is_class_1, is_class_2;
|
||||
ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
|
||||
&is_class_1 CHECK_FAILED);
|
||||
if (current() == '-') {
|
||||
Advance();
|
||||
if (current() == kEndMarker) {
|
||||
@ -1568,34 +1549,30 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
|
||||
// following code report an error.
|
||||
break;
|
||||
} else if (current() == ']') {
|
||||
AddRangeOrEscape(ranges, char_class, first,
|
||||
add_unicode_case_equivalents, zone());
|
||||
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
|
||||
ranges->Add(CharacterRange::Singleton('-'), zone());
|
||||
break;
|
||||
}
|
||||
uc16 char_class_2 = kNoCharClass;
|
||||
CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
|
||||
if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
|
||||
ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
|
||||
&is_class_2 CHECK_FAILED);
|
||||
if (is_class_1 || is_class_2) {
|
||||
// Either end is an escaped character class. Treat the '-' verbatim.
|
||||
if (unicode()) {
|
||||
// ES2015 21.2.2.15.1 step 1.
|
||||
return ReportError(CStrVector(kRangeInvalid));
|
||||
}
|
||||
AddRangeOrEscape(ranges, char_class, first,
|
||||
add_unicode_case_equivalents, zone());
|
||||
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
|
||||
ranges->Add(CharacterRange::Singleton('-'), zone());
|
||||
AddRangeOrEscape(ranges, char_class_2, next,
|
||||
add_unicode_case_equivalents, zone());
|
||||
if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
|
||||
continue;
|
||||
}
|
||||
// ES2015 21.2.2.15.1 step 6.
|
||||
if (first.from() > next.to()) {
|
||||
if (char_1 > char_2) {
|
||||
return ReportError(CStrVector(kRangeOutOfOrder));
|
||||
}
|
||||
ranges->Add(CharacterRange::Range(first.from(), next.to()), zone());
|
||||
ranges->Add(CharacterRange::Range(char_1, char_2), zone());
|
||||
} else {
|
||||
AddRangeOrEscape(ranges, char_class, first, add_unicode_case_equivalents,
|
||||
zone());
|
||||
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
|
||||
}
|
||||
}
|
||||
if (!has_more()) {
|
||||
|
@ -184,8 +184,14 @@ class RegExpParser BASE_EMBEDDED {
|
||||
// can be reparsed.
|
||||
bool ParseBackReferenceIndex(int* index_out);
|
||||
|
||||
bool ParseClassProperty(ZoneList<CharacterRange>* result);
|
||||
CharacterRange ParseClassAtom(uc16* char_class);
|
||||
// Parse inside a class. Either add escaped class to the range, or return
|
||||
// false and pass parsed single character through |char_out|.
|
||||
void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
|
||||
bool add_unicode_case_equivalents, uc32* char_out,
|
||||
bool* is_class_escape);
|
||||
|
||||
char ParseClassEscape();
|
||||
|
||||
RegExpTree* ReportError(Vector<const char> message);
|
||||
void Advance();
|
||||
void Advance(int dist);
|
||||
|
@ -222,8 +222,8 @@ void TestRegExpParser(bool lookbehind) {
|
||||
CheckParseEq("[\\d]", "[0-9]");
|
||||
CheckParseEq("[x\\dz]", "[x 0-9 z]");
|
||||
CheckParseEq("[\\d-z]", "[0-9 - z]");
|
||||
CheckParseEq("[\\d-\\d]", "[0-9 - 0-9]");
|
||||
CheckParseEq("[z-\\d]", "[z - 0-9]");
|
||||
CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
|
||||
CheckParseEq("[z-\\d]", "[0-9 z -]");
|
||||
// Control character outside character class.
|
||||
CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
|
||||
CheckParseEq("\\c!", "'\\c!'");
|
||||
|
@ -9,9 +9,10 @@ assertThrows("/[\\p{garbage}]/u");
|
||||
assertThrows("/[\\p{}]/u");
|
||||
assertThrows("/[\\p{]/u");
|
||||
assertThrows("/[\\p}]/u");
|
||||
assertThrows("/^[\\p{Lu}-\\p{Ll}]+$/u");
|
||||
|
||||
assertTrue(/^[\p{Lu}\p{Ll}]+$/u.test("ABCabc"));
|
||||
assertTrue(/^[\p{Lu}-\p{Ll}]+$/u.test("ABC-abc"));
|
||||
assertTrue(/^[\p{Lu}-]+$/u.test("ABC-"));
|
||||
assertFalse(/^[\P{Lu}\p{Ll}]+$/u.test("ABCabc"));
|
||||
assertTrue(/^[\P{Lu}\p{Ll}]+$/u.test("abc"));
|
||||
assertTrue(/^[\P{Lu}]+$/u.test("abc123"));
|
||||
|
Loading…
Reference in New Issue
Block a user