[regexp] Fix incorrect DCHECK in FixSingleCharacterDisjunctions

The condition only applies in unicode mode, where any lone surrogates are desugared into a character class (and will not be considered in this optimization). Non-unicode mode treats lone surrogates exactly like any other codepoint. BUG=chromium:711092 Review-Url: https://codereview.chromium.org/2808403006 Cr-Commit-Position: refs/heads/master@{#44638}
2017-04-13 03:33:08 -07:00 · 2017-04-13 03:33:08 -07:00 · 876e23c54d
commit 876e23c54d
parent b30503387f
2 changed files with 11 additions and 3 deletions
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@ -5352,6 +5352,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
  Zone* zone = compiler->zone();
  ZoneList<RegExpTree*>* alternatives = this->alternatives();
  int length = alternatives->length();
  const bool unicode = compiler->unicode();
  int write_posn = 0;
  int i = 0;
@ -5368,7 +5369,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
      i++;
      continue;
    }
-    DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+    DCHECK_IMPLIES(unicode,
                   !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
    bool contains_trail_surrogate =
        unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
    int first_in_run = i;
@ -5378,7 +5380,8 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
      if (!alternative->IsAtom()) break;
      atom = alternative->AsAtom();
      if (atom->length() != 1) break;
-      DCHECK(!unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
+      DCHECK_IMPLIES(unicode,
                     !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
      contains_trail_surrogate |=
          unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
      i++;
@ -5394,7 +5397,7 @@ void RegExpDisjunction::FixSingleCharacterDisjunctions(
        ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
      }
      RegExpCharacterClass::Flags flags;
-      if (compiler->unicode() && contains_trail_surrogate) {
+      if (unicode && contains_trail_surrogate) {
        flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
      }
      alternatives->at(write_posn++) =
--- a/test/mjsunit/regexp.js
+++ b/test/mjsunit/regexp.js
@ -803,3 +803,8 @@ assertTrue(/^[\444]*$/.test("\u{24}4"));
 assertTrue(/^[\d-X]*$/.test("234-X-432"));  // CharacterRangeOrUnion.
 assertTrue(/^[\d-X-Z]*$/.test("234-XZ-432"));
 assertFalse(/^[\d-X-Z]*$/.test("234-XYZ-432"));
 // Lone leading surrogates. Just here to exercise specific parsing code-paths.
 assertFalse(/\uDB88|\uDBEC|aa/.test(""));
 assertFalse(/\uDB88|\uDBEC|aa/u.test(""));