[regexp] Fix incorrect DCHECK in FixSingleCharacterDisjunctions

The condition only applies in unicode mode, where any lone surrogates
are desugared into a character class (and will not be considered in this
optimization). Non-unicode mode treats lone surrogates exactly like
any other codepoint.

BUG=chromium:711092

Review-Url: https://codereview.chromium.org/2808403006
Cr-Commit-Position: refs/heads/master@{#44638}
This commit is contained in:
jgruber 2017-04-13 03:33:08 -07:00 committed by Commit bot
parent b30503387f
commit 876e23c54d
2 changed files with 11 additions and 3 deletions

View File

@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
Zone* zone = compiler->zone();
ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length();
const bool unicode = compiler->unicode();
int write_posn = 0;
int i = 0;
@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++;
continue;
}
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
DCHECK_IMPLIES(unicode,
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
bool contains_trail_surrogate =
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
int first_in_run = i;
@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break;
atom = alternative->AsAtom();
if (atom->length() != 1) break;
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
DCHECK_IMPLIES(unicode,
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
contains_trail_surrogate |=
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
i++;
@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
}
RegExpCharacterClass::Flags flags;
if (compiler->unicode() && contains_trail_surrogate) {
if (unicode && contains_trail_surrogate) {
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
}
alternatives->at(write_posn++) =

View File

@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4"));
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
// Lone leading surrogates. Just here to exercise specific parsing code-paths.
assertFalse(/\uDB88|\uDBEC|aa/.test(""));
assertFalse(/\uDB88|\uDBEC|aa/u.test(""));