[regexp] Fix incorrect DCHECK in FixSingleCharacterDisjunctions

The condition only applies in unicode mode, where any lone surrogates
are desugared into a character class (and will not be considered in this
optimization). Non-unicode mode treats lone surrogates exactly like
any other codepoint.

BUG=chromium:711092

Review-Url: https://codereview.chromium.org/2808403006
Cr-Commit-Position: refs/heads/master@{#44638}
This commit is contained in:
jgruber 2017-04-13 03:33:08 -07:00 committed by Commit bot
parent b30503387f
commit 876e23c54d
2 changed files with 11 additions and 3 deletions

View File

@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
Zone* zone = compiler->zone(); Zone* zone = compiler->zone();
ZoneList<RegExpTree*>* alternatives = this->alternatives(); ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length(); int length = alternatives->length();
const bool unicode = compiler->unicode();
int write_posn = 0; int write_posn = 0;
int i = 0; int i = 0;
@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
i++; i++;
continue; continue;
} }
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); DCHECK_IMPLIES(unicode,
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
bool contains_trail_surrogate = bool contains_trail_surrogate =
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
int first_in_run = i; int first_in_run = i;
@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
if (!alternative->IsAtom()) break; if (!alternative->IsAtom()) break;
atom = alternative->AsAtom(); atom = alternative->AsAtom();
if (atom->length() != 1) break; if (atom->length() != 1) break;
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); DCHECK_IMPLIES(unicode,
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
contains_trail_surrogate |= contains_trail_surrogate |=
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
i++; i++;
@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
} }
RegExpCharacterClass::Flags flags; RegExpCharacterClass::Flags flags;
if (compiler->unicode() && contains_trail_surrogate) { if (unicode && contains_trail_surrogate) {
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
} }
alternatives->at(write_posn++) = alternatives->at(write_posn++) =

View File

@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4"));
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion. assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432")); assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432")); assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
// Lone leading surrogates. Just here to exercise specific parsing code-paths.
assertFalse(/\uDB88|\uDBEC|aa/.test(""));
assertFalse(/\uDB88|\uDBEC|aa/u.test(""));