[regexp] quantifier refers to the surrogate pair in unicode regexp.
R=rossberg@chromium.org Review URL: https://codereview.chromium.org/1571563003 Cr-Commit-Position: refs/heads/master@{#33209}
This commit is contained in:
parent
67f99ee102
commit
8645a5ccd0
@ -56,6 +56,16 @@ void RegExpParser::Advance() {
|
||||
} else {
|
||||
current_ = in()->Get(next_pos_);
|
||||
next_pos_++;
|
||||
// Read the whole surrogate pair in case of unicode flag, if possible.
|
||||
if (unicode_ && next_pos_ < in()->length() &&
|
||||
unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(current_))) {
|
||||
uc16 trail = in()->Get(next_pos_);
|
||||
if (unibrow::Utf16::IsTrailSurrogate(trail)) {
|
||||
current_ = unibrow::Utf16::CombineSurrogatePair(
|
||||
static_cast<uc16>(current_), trail);
|
||||
next_pos_++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
current_ = kEndMarker;
|
||||
@ -417,12 +427,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
Advance(2);
|
||||
uc32 value;
|
||||
if (ParseUnicodeEscape(&value)) {
|
||||
if (value > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
builder->AddCharacter(unibrow::Utf16::LeadSurrogate(value));
|
||||
builder->AddCharacter(unibrow::Utf16::TrailSurrogate(value));
|
||||
} else {
|
||||
builder->AddCharacter(static_cast<uc16>(value));
|
||||
}
|
||||
builder->AddUnicodeCharacter(value);
|
||||
} else if (!unicode_) {
|
||||
builder->AddCharacter('u');
|
||||
} else {
|
||||
@ -456,7 +461,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
|
||||
// fallthrough
|
||||
}
|
||||
default:
|
||||
builder->AddCharacter(current());
|
||||
builder->AddUnicodeCharacter(current());
|
||||
Advance();
|
||||
break;
|
||||
} // end switch(current())
|
||||
@ -1057,6 +1062,19 @@ void RegExpBuilder::AddCharacter(uc16 c) {
|
||||
}
|
||||
|
||||
|
||||
void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
|
||||
if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
|
||||
ZoneList<uc16> surrogate_pair(2, zone());
|
||||
surrogate_pair.Add(unibrow::Utf16::LeadSurrogate(c), zone());
|
||||
surrogate_pair.Add(unibrow::Utf16::TrailSurrogate(c), zone());
|
||||
RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
|
||||
AddAtom(atom);
|
||||
} else {
|
||||
AddCharacter(static_cast<uc16>(c));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
|
||||
|
||||
|
||||
|
@ -101,6 +101,7 @@ class RegExpBuilder : public ZoneObject {
|
||||
public:
|
||||
explicit RegExpBuilder(Zone* zone);
|
||||
void AddCharacter(uc16 character);
|
||||
void AddUnicodeCharacter(uc32 character);
|
||||
// "Adds" an empty expression. Does nothing except consume a
|
||||
// following quantifier
|
||||
void AddEmpty();
|
||||
|
@ -309,9 +309,12 @@ void TestRegExpParser(bool lookbehind) {
|
||||
|
||||
// Unicode regexps
|
||||
CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
|
||||
CheckParseEq("\\u{12345}\\u{23456}", "'\\ud808\\udf45\\ud84d\\udc56'", true);
|
||||
CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
|
||||
true);
|
||||
CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
|
||||
true);
|
||||
CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
|
||||
CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
|
||||
|
||||
CHECK_SIMPLE("", false);
|
||||
CHECK_SIMPLE("a", true);
|
||||
|
@ -247,3 +247,11 @@ assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}"));
|
||||
assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
|
||||
"\u{12345}b\u{23456}"));
|
||||
assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}"));
|
||||
|
||||
// Quantifier.
|
||||
assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}"));
|
||||
assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}"));
|
||||
assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45"));
|
||||
assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45"));
|
||||
assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45"));
|
||||
assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45"));
|
||||
|
Loading…
Reference in New Issue
Block a user