[regexp] Fix incorrect DCHECK in FixSingleCharacterDisjunctions
The condition only applies in unicode mode, where any lone surrogates are desugared into a character class (and will not be considered in this optimization). Non-unicode mode treats lone surrogates exactly like any other codepoint. BUG=chromium:711092 Review-Url: https://codereview.chromium.org/2808403006 Cr-Commit-Position: refs/heads/master@{#44638}
This commit is contained in:
parent
b30503387f
commit
876e23c54d
@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
Zone* zone = compiler->zone();
|
||||
ZoneList<RegExpTree*>* alternatives = this->alternatives();
|
||||
int length = alternatives->length();
|
||||
const bool unicode = compiler->unicode();
|
||||
|
||||
int write_posn = 0;
|
||||
int i = 0;
|
||||
@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
DCHECK_IMPLIES(unicode,
|
||||
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
bool contains_trail_surrogate =
|
||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||
int first_in_run = i;
|
||||
@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
if (!alternative->IsAtom()) break;
|
||||
atom = alternative->AsAtom();
|
||||
if (atom->length() != 1) break;
|
||||
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
DCHECK_IMPLIES(unicode,
|
||||
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||
contains_trail_surrogate |=
|
||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||
i++;
|
||||
@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
||||
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
|
||||
}
|
||||
RegExpCharacterClass::Flags flags;
|
||||
if (compiler->unicode() && contains_trail_surrogate) {
|
||||
if (unicode && contains_trail_surrogate) {
|
||||
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
||||
}
|
||||
alternatives->at(write_posn++) =
|
||||
|
@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4"));
|
||||
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
|
||||
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
|
||||
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
|
||||
|
||||
// Lone leading surrogates. Just here to exercise specific parsing code-paths.
|
||||
|
||||
assertFalse(/\uDB88|\uDBEC|aa/.test(""));
|
||||
assertFalse(/\uDB88|\uDBEC|aa/u.test(""));
|
||||
|
Loading…
Reference in New Issue
Block a user