diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc index add1d2006c..df4a15c71a 100644 --- a/src/regexp/jsregexp.cc +++ b/src/regexp/jsregexp.cc @@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( Zone* zone = compiler->zone(); ZoneList* alternatives = this->alternatives(); int length = alternatives->length(); + const bool unicode = compiler->unicode(); int write_posn = 0; int i = 0; @@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( i++; continue; } - DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); + DCHECK_IMPLIES(unicode, + !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); bool contains_trail_surrogate = unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); int first_in_run = i; @@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( if (!alternative->IsAtom()) break; atom = alternative->AsAtom(); if (atom->length() != 1) break; - DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); + DCHECK_IMPLIES(unicode, + !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0))); contains_trail_surrogate |= unibrow::Utf16::IsTrailSurrogate(atom->data().at(0)); i++; @@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions( ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone); } RegExpCharacterClass::Flags flags; - if (compiler->unicode() && contains_trail_surrogate) { + if (unicode && contains_trail_surrogate) { flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE; } alternatives->at(write_posn++) = diff --git a/test/mjsunit/regexp.js b/test/mjsunit/regexp.js index 6fb5660c08..dd4832b567 100644 --- a/test/mjsunit/regexp.js +++ b/test/mjsunit/regexp.js @@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4")); assertTrue(/^[\d-X]*$/.test("234-X-432")); // CharacterRangeOrUnion. assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432")); assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432")); + +// Lone leading surrogates. Just here to exercise specific parsing code-paths. + +assertFalse(/\uDB88|\uDBEC|aa/.test("")); +assertFalse(/\uDB88|\uDBEC|aa/u.test(""));