[regexp] Fix incorrect DCHECK in FixSingleCharacterDisjunctions
The condition only applies in unicode mode, where any lone surrogates are desugared into a character class (and will not be considered in this optimization). Non-unicode mode treats lone surrogates exactly like any other codepoint. BUG=chromium:711092 Review-Url: https://codereview.chromium.org/2808403006 Cr-Commit-Position: refs/heads/master@{#44638}
This commit is contained in:
parent
b30503387f
commit
876e23c54d
@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
|||||||
Zone* zone = compiler->zone();
|
Zone* zone = compiler->zone();
|
||||||
ZoneList<RegExpTree*>* alternatives = this->alternatives();
|
ZoneList<RegExpTree*>* alternatives = this->alternatives();
|
||||||
int length = alternatives->length();
|
int length = alternatives->length();
|
||||||
|
const bool unicode = compiler->unicode();
|
||||||
|
|
||||||
int write_posn = 0;
|
int write_posn = 0;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
|||||||
i++;
|
i++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
DCHECK_IMPLIES(unicode,
|
||||||
|
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||||
bool contains_trail_surrogate =
|
bool contains_trail_surrogate =
|
||||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||||
int first_in_run = i;
|
int first_in_run = i;
|
||||||
@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
|||||||
if (!alternative->IsAtom()) break;
|
if (!alternative->IsAtom()) break;
|
||||||
atom = alternative->AsAtom();
|
atom = alternative->AsAtom();
|
||||||
if (atom->length() != 1) break;
|
if (atom->length() != 1) break;
|
||||||
DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
DCHECK_IMPLIES(unicode,
|
||||||
|
!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
|
||||||
contains_trail_surrogate |=
|
contains_trail_surrogate |=
|
||||||
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
|
||||||
i++;
|
i++;
|
||||||
@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
|
|||||||
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
|
ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
|
||||||
}
|
}
|
||||||
RegExpCharacterClass::Flags flags;
|
RegExpCharacterClass::Flags flags;
|
||||||
if (compiler->unicode() && contains_trail_surrogate) {
|
if (unicode && contains_trail_surrogate) {
|
||||||
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
|
||||||
}
|
}
|
||||||
alternatives->at(write_posn++) =
|
alternatives->at(write_posn++) =
|
||||||
|
@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4"));
|
|||||||
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
|
assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion.
|
||||||
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
|
assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
|
||||||
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
|
assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
|
||||||
|
|
||||||
|
// Lone leading surrogates. Just here to exercise specific parsing code-paths.
|
||||||
|
|
||||||
|
assertFalse(/\uDB88|\uDBEC|aa/.test(""));
|
||||||
|
assertFalse(/\uDB88|\uDBEC|aa/u.test(""));
|
||||||
|
Loading…
Reference in New Issue
Block a user