[regexp] implement case-insensitive unicode regexps.

BUG=v8:2952 LOG=N Review URL: https://codereview.chromium.org/1599303002 Cr-Commit-Position: refs/heads/master@{#33538}
2016-01-27 00:24:50 -08:00 · 2016-01-27 00:24:50 -08:00 · a2baaaac93
commit a2baaaac93
parent 2a0e4225dd
30 changed files with 520 additions and 237 deletions
--- a/src/isolate.h
+++ b/src/isolate.h
@ -891,7 +891,7 @@ class Isolate {
  unibrow::Mapping<unibrow::Ecma262Canonicalize>*
      interp_canonicalize_mapping() {
-    return &interp_canonicalize_mapping_;
+    return &regexp_macro_assembler_canonicalize_;
  }
  Debug* debug() { return debug_; }
@ -1245,7 +1245,6 @@ class Isolate {
      regexp_macro_assembler_canonicalize_;
  RegExpStack* regexp_stack_;
  DateCache* date_cache_;
  unibrow::Mapping<unibrow::Ecma262Canonicalize> interp_canonicalize_mapping_;
  CallInterfaceDescriptorData* call_descriptor_data_;
  base::RandomNumberGenerator* random_number_generator_;
--- a/src/regexp/arm/regexp-macro-assembler-arm.cc
+++ b/src/regexp/arm/regexp-macro-assembler-arm.cc
@ -210,7 +210,7 @@ void RegExpMacroAssemblerARM::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  __ ldr(r0, register_location(start_reg));  // Index of start of capture
  __ ldr(r1, register_location(start_reg + 1));  // Index of end of capture
@ -302,7 +302,7 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
    //   r0: Address byte_offset1 - Address captured substring's start.
    //   r1: Address byte_offset2 - Address of current character position.
    //   r2: size_t byte_length - length of capture in bytes(!)
-    //   r3: Isolate* isolate
+    //   r3: Isolate* isolate or 0 if unicode flag.
    // Address of start of capture.
    __ add(r0, r0, Operand(end_of_input_address()));
@ -316,7 +316,14 @@ void RegExpMacroAssemblerARM::CheckNotBackReferenceIgnoreCase(
      __ sub(r1, r1, r4);
    }
    // Isolate.
-    __ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
+#ifdef V8_I18N_SUPPORT
    if (unicode) {
      __ mov(r3, Operand(0));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ mov(r3, Operand(ExternalReference::isolate_address(isolate())));
    }
    {
      AllowExternalCallThatCantCauseGC scope(masm_);
--- a/src/regexp/arm/regexp-macro-assembler-arm.h
+++ b/src/regexp/arm/regexp-macro-assembler-arm.h
@ -38,7 +38,7 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(unsigned c,
--- a/src/regexp/arm64/regexp-macro-assembler-arm64.cc
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.cc
@ -274,7 +274,7 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  Register capture_start_offset = w10;
@ -388,7 +388,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
    //   x0: Address byte_offset1 - Address captured substring's start.
    //   x1: Address byte_offset2 - Address of current character position.
    //   w2: size_t byte_length - length of capture in bytes(!)
-    //   x3: Isolate* isolate
+    //   x3: Isolate* isolate or 0 if unicode flag
    // Address of start of capture.
    __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
@ -400,7 +400,14 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
      __ Sub(x1, x1, Operand(capture_length, SXTW));
    }
    // Isolate.
-    __ Mov(x3, ExternalReference::isolate_address(isolate()));
+#ifdef V8_I18N_SUPPORT
    if (unicode) {
      __ Mov(x3, Operand(0));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ Mov(x3, ExternalReference::isolate_address(isolate()));
    }
    {
      AllowExternalCallThatCantCauseGC scope(masm_);
--- a/src/regexp/arm64/regexp-macro-assembler-arm64.h
+++ b/src/regexp/arm64/regexp-macro-assembler-arm64.h
@ -43,7 +43,7 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(unsigned c,
--- a/src/regexp/bytecodes-irregexp.h
+++ b/src/regexp/bytecodes-irregexp.h
@ -20,56 +20,58 @@ const unsigned int MAX_FIRST_ARG = 0x7fffffu;
 const int BYTECODE_SHIFT = 8;
 #define BYTECODE_ITERATOR(V)                                                   \
-V(BREAK,              0, 4)   /* bc8                                        */ \
+  V(BREAK, 0, 4)              /* bc8                                        */ \
-V(PUSH_CP,            1, 4)   /* bc8 pad24                                  */ \
+  V(PUSH_CP, 1, 4)            /* bc8 pad24                                  */ \
-V(PUSH_BT,            2, 8)   /* bc8 pad24 offset32                         */ \
+  V(PUSH_BT, 2, 8)            /* bc8 pad24 offset32                         */ \
-V(PUSH_REGISTER,      3, 4)   /* bc8 reg_idx24                              */ \
+  V(PUSH_REGISTER, 3, 4)      /* bc8 reg_idx24                              */ \
-V(SET_REGISTER_TO_CP, 4, 8)   /* bc8 reg_idx24 offset32                     */ \
+  V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32                     */ \
-V(SET_CP_TO_REGISTER, 5, 4)   /* bc8 reg_idx24                              */ \
+  V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24                              */ \
-V(SET_REGISTER_TO_SP, 6, 4)   /* bc8 reg_idx24                              */ \
+  V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24                              */ \
-V(SET_SP_TO_REGISTER, 7, 4)   /* bc8 reg_idx24                              */ \
+  V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24                              */ \
-V(SET_REGISTER,       8, 8)   /* bc8 reg_idx24 value32                      */ \
+  V(SET_REGISTER, 8, 8)       /* bc8 reg_idx24 value32                      */ \
-V(ADVANCE_REGISTER,   9, 8)   /* bc8 reg_idx24 value32                      */ \
+  V(ADVANCE_REGISTER, 9, 8)   /* bc8 reg_idx24 value32                      */ \
-V(POP_CP,            10, 4)   /* bc8 pad24                                  */ \
+  V(POP_CP, 10, 4)            /* bc8 pad24                                  */ \
-V(POP_BT,            11, 4)   /* bc8 pad24                                  */ \
+  V(POP_BT, 11, 4)            /* bc8 pad24                                  */ \
-V(POP_REGISTER,      12, 4)   /* bc8 reg_idx24                              */ \
+  V(POP_REGISTER, 12, 4)      /* bc8 reg_idx24                              */ \
-V(FAIL,              13, 4)   /* bc8 pad24                                  */ \
+  V(FAIL, 13, 4)              /* bc8 pad24                                  */ \
-V(SUCCEED,           14, 4)   /* bc8 pad24                                  */ \
+  V(SUCCEED, 14, 4)           /* bc8 pad24                                  */ \
-V(ADVANCE_CP,        15, 4)   /* bc8 offset24                               */ \
+  V(ADVANCE_CP, 15, 4)        /* bc8 offset24                               */ \
-V(GOTO,              16, 8)   /* bc8 pad24 addr32                           */ \
+  V(GOTO, 16, 8)              /* bc8 pad24 addr32                           */ \
-V(LOAD_CURRENT_CHAR, 17, 8)   /* bc8 offset24 addr32                        */ \
+  V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32                        */ \
-V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24                       */ \
+  V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4)    /* bc8 offset24 */                  \
-V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32                       */ \
+  V(LOAD_2_CURRENT_CHARS, 19, 8)           /* bc8 offset24 addr32 */           \
-V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24                    */ \
+  V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */                  \
-V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32                       */ \
+  V(LOAD_4_CURRENT_CHARS, 21, 8)           /* bc8 offset24 addr32 */           \
-V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24                    */ \
+  V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */                  \
-V(CHECK_4_CHARS,     23, 12)  /* bc8 pad24 uint32 addr32                    */ \
+  V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32                    */    \
-V(CHECK_CHAR,        24, 8)   /* bc8 pad8 uint16 addr32                     */ \
+  V(CHECK_CHAR, 24, 8)     /* bc8 pad8 uint16 addr32                     */    \
-V(CHECK_NOT_4_CHARS, 25, 12)  /* bc8 pad24 uint32 addr32                    */ \
+  V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */                   \
-V(CHECK_NOT_CHAR,    26, 8)   /* bc8 pad8 uint16 addr32                     */ \
+  V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32                     */    \
-V(AND_CHECK_4_CHARS, 27, 16)  /* bc8 pad24 uint32 uint32 addr32             */ \
+  V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */            \
-V(AND_CHECK_CHAR,    28, 12)  /* bc8 pad8 uint16 uint32 addr32              */ \
+  V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32              */   \
-V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32          */ \
+  V(AND_CHECK_NOT_4_CHARS, 29, 16)    /* bc8 pad24 uint32 uint32 addr32 */     \
-V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32              */ \
+  V(AND_CHECK_NOT_CHAR, 30, 12)       /* bc8 pad8 uint16 uint32 addr32 */      \
-V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32       */ \
+  V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */     \
-V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32                */ \
+  V(CHECK_CHAR_IN_RANGE, 32, 12)      /* bc8 pad24 uc16 uc16 addr32 */         \
-V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32            */ \
+  V(CHECK_CHAR_NOT_IN_RANGE, 33, 12)  /* bc8 pad24 uc16 uc16 addr32 */         \
-V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128                   */ \
+  V(CHECK_BIT_IN_TABLE, 34, 24)       /* bc8 pad24 addr32 bits128 */           \
-V(CHECK_LT,          35, 8)   /* bc8 pad8 uc16 addr32                       */ \
+  V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32                       */          \
-V(CHECK_GT,          36, 8)   /* bc8 pad8 uc16 addr32                       */ \
+  V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32                       */          \
-V(CHECK_NOT_BACK_REF, 37, 8)  /* bc8 reg_idx24 addr32                       */ \
+  V(CHECK_NOT_BACK_REF, 37, 8)         /* bc8 reg_idx24 addr32 */              \
-V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32                */ \
+  V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */              \
-V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8)         /* bc8 reg_idx24 addr32       */ \
+  V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8)                                 \
-V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32       */ \
+  V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8)         /* bc8 reg_idx24 addr32 */     \
-V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32            */ \
+  V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */     \
-V(CHECK_REGISTER_LT, 42, 12)  /* bc8 reg_idx24 value32 addr32               */ \
+  V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8)                        \
-V(CHECK_REGISTER_GE, 43, 12)  /* bc8 reg_idx24 value32 addr32               */ \
+  V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */          \
-V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32                     */ \
+  V(CHECK_REGISTER_LT, 44, 12)    /* bc8 reg_idx24 value32 addr32 */           \
-V(CHECK_AT_START,    45, 8)   /* bc8 pad24 addr32                           */ \
+  V(CHECK_REGISTER_GE, 45, 12)    /* bc8 reg_idx24 value32 addr32 */           \
-V(CHECK_NOT_AT_START, 46, 8)  /* bc8 offset24 addr32                        */ \
+  V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */                   \
-V(CHECK_GREEDY,      47, 8)   /* bc8 pad24 addr32                           */ \
+  V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32                           */    \
-V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32                        */ \
+  V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */                       \
-V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24                        */
+  V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32                           */      \
  V(ADVANCE_CP_AND_GOTO, 50, 8)           /* bc8 offset24 addr32 */            \
  V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */
 #define DECLARE_BYTECODES(name, code, length) \
  static const int BC_##name = code;
--- a/src/regexp/ia32/regexp-macro-assembler-ia32.cc
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.cc
@ -189,7 +189,7 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  __ mov(edx, register_location(start_reg));  // Index of start of capture
  __ mov(ebx, register_location(start_reg + 1));  // Index of end of capture
@ -296,11 +296,18 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
    //   Address byte_offset1 - Address captured substring's start.
    //   Address byte_offset2 - Address of current character position.
    //   size_t byte_length - length of capture in bytes(!)
-    //   Isolate* isolate
+//   Isolate* isolate or 0 if unicode flag.
    // Set isolate.
-    __ mov(Operand(esp, 3 * kPointerSize),
+#ifdef V8_I18N_SUPPORT
-           Immediate(ExternalReference::isolate_address(isolate())));
+    if (unicode) {
      __ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ mov(Operand(esp, 3 * kPointerSize),
             Immediate(ExternalReference::isolate_address(isolate())));
    }
    // Set byte_length.
    __ mov(Operand(esp, 2 * kPointerSize), ebx);
    // Set byte_offset2.
--- a/src/regexp/ia32/regexp-macro-assembler-ia32.h
+++ b/src/regexp/ia32/regexp-macro-assembler-ia32.h
@ -37,7 +37,7 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(uint32_t c,
--- a/src/regexp/interpreter-irregexp.cc
+++ b/src/regexp/interpreter-irregexp.cc
@ -15,37 +15,32 @@
 #include "src/unicode.h"
 #include "src/utils.h"
 #ifdef V8_I18N_SUPPORT
 #include "unicode/uchar.h"
 #endif  // V8_I18N_SUPPORT
 namespace v8 {
 namespace internal {
 typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
-                                 int from,
+                                 int len, Vector<const uc16> subject,
-                                 int current,
+                                 bool unicode) {
-                                 int len,
+  Address offset_a =
-                                 Vector<const uc16> subject) {
+      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
-  for (int i = 0; i < len; i++) {
+  Address offset_b =
-    unibrow::uchar old_char = subject[from++];
+      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
-    unibrow::uchar new_char = subject[current++];
+  size_t length = len * kUC16Size;
-    if (old_char == new_char) continue;
+  return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
-    unibrow::uchar old_string[1] = { old_char };
+             offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
    unibrow::uchar new_string[1] = { new_char };
    interp_canonicalize->get(old_char, '\0', old_string);
    interp_canonicalize->get(new_char, '\0', new_string);
    if (old_string[0] != new_string[0]) {
      return false;
    }
  }
  return true;
 }
-static bool BackRefMatchesNoCase(Canonicalize* interp_canonicalize,
+static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
-                                 int from,
+                                 int len, Vector<const uint8_t> subject,
-                                 int current,
+                                 bool unicode) {
-                                 int len,
+  // For Latin1 characters the unicode flag makes no difference.
                                 Vector<const uint8_t> subject) {
  for (int i = 0; i < len; i++) {
    unsigned int old_char = subject[from++];
    unsigned int new_char = subject[current++];
@ -523,13 +518,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
        pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
        break;
      }
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
        bool unicode =
            (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
        if (from >= 0 && len > 0) {
          if (current + len > subject.length() ||
-              !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
+              !BackRefMatchesNoCase(isolate, from, current, len, subject,
-                                    from, current, len, subject)) {
+                                    unicode)) {
            pc = code_base + Load32Aligned(pc + 4);
            break;
          }
@ -538,13 +536,16 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
        pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
        break;
      }
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
        bool unicode = (insn & BYTECODE_MASK) ==
                       BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
        if (from >= 0 && len > 0) {
          if (current - len < 0 ||
-              !BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
+              !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
-                                    from, current - len, len, subject)) {
+                                    unicode)) {
            pc = code_base + Load32Aligned(pc + 4);
            break;
          }
--- a/src/regexp/jsregexp.cc
+++ b/src/regexp/jsregexp.cc
@ -25,6 +25,11 @@
 #include "src/string-search.h"
 #include "src/unicode-decoder.h"
 #ifdef V8_I18N_SUPPORT
 #include "unicode/uset.h"
 #include "unicode/utypes.h"
 #endif  // V8_I18N_SUPPORT
 #ifndef V8_INTERPRETED_REGEXP
 #if V8_TARGET_ARCH_IA32
 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
@ -3420,10 +3425,7 @@ void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
      // independent case and it slows us down if we don't know that.
      if (cc->is_standard(zone())) continue;
      ZoneList<CharacterRange>* ranges = cc->ranges(zone());
-      int range_count = ranges->length();
+      CharacterRange::AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
      for (int j = 0; j < range_count; j++) {
        ranges->at(j).AddCaseEquivalents(isolate, zone(), ranges, is_one_byte);
      }
    }
  }
 }
@ -3586,13 +3588,6 @@ class AlternativeGenerationList {
  AlternativeGeneration a_few_alt_gens_[kAFew];
 };
 static const uc32 kLeadSurrogateStart = 0xd800;
 static const uc32 kLeadSurrogateEnd = 0xdbff;
 static const uc32 kTrailSurrogateStart = 0xdc00;
 static const uc32 kTrailSurrogateEnd = 0xdfff;
 static const uc32 kNonBmpStart = 0x10000;
 static const uc32 kNonBmpEnd = 0x10ffff;
 static const uc32 kRangeEndMarker = 0x110000;
 // The '2' variant is has inclusive from and exclusive to.
@ -4395,8 +4390,8 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
  DCHECK_EQ(start_reg_ + 1, end_reg_);
  if (compiler->ignore_case()) {
-    assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
+    assembler->CheckNotBackReferenceIgnoreCase(
-                                               trace->backtrack());
+        start_reg_, read_backward(), compiler->unicode(), trace->backtrack());
  } else {
    assembler->CheckNotBackReference(start_reg_, read_backward(),
                                     trace->backtrack());
@ -4866,21 +4861,6 @@ bool RegExpCharacterClass::is_standard(Zone* zone) {
 }
 bool RegExpCharacterClass::NeedsDesugaringForUnicode(Zone* zone) {
  ZoneList<CharacterRange>* ranges = this->ranges(zone);
  CharacterRange::Canonicalize(ranges);
  for (int i = ranges->length() - 1; i >= 0; i--) {
    uc32 from = ranges->at(i).from();
    uc32 to = ranges->at(i).to();
    // Check for non-BMP characters.
    if (to >= kNonBmpStart) return true;
    // Check for lone surrogates.
    if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
  }
  return false;
 }
 UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
                                           ZoneList<CharacterRange>* base)
    : zone_(zone),
@ -5120,11 +5100,53 @@ void AddUnanchoredAdvance(RegExpCompiler* compiler, ChoiceNode* result,
 }
 void AddUnicodeCaseEquivalents(RegExpCompiler* compiler,
                               ZoneList<CharacterRange>* ranges) {
 #ifdef V8_I18N_SUPPORT
  // Use ICU to compute the case fold closure over the ranges.
  DCHECK(compiler->unicode());
  DCHECK(compiler->ignore_case());
  USet* set = uset_openEmpty();
  for (int i = 0; i < ranges->length(); i++) {
    uset_addRange(set, ranges->at(i).from(), ranges->at(i).to());
  }
  ranges->Clear();
  uset_closeOver(set, USET_CASE_INSENSITIVE);
  // Full case mapping map single characters to multiple characters.
  // Those are represented as strings in the set. Remove them so that
  // we end up with only simple and common case mappings.
  uset_removeAllStrings(set);
  int item_count = uset_getItemCount(set);
  int item_result = 0;
  UErrorCode ec = U_ZERO_ERROR;
  Zone* zone = compiler->zone();
  for (int i = 0; i < item_count; i++) {
    uc32 start = 0;
    uc32 end = 0;
    item_result += uset_getItem(set, i, &start, &end, nullptr, 0, &ec);
    ranges->Add(CharacterRange::Range(start, end), zone);
  }
  // No errors and everything we collected have been ranges.
  DCHECK_EQ(U_ZERO_ERROR, ec);
  DCHECK_EQ(0, item_result);
  uset_close(set);
 #else
  // Fallback if ICU is not included.
  CharacterRange::AddCaseEquivalents(compiler->isolate(), compiler->zone(),
                                     ranges, compiler->one_byte());
 #endif  // V8_I18N_SUPPORT
  CharacterRange::Canonicalize(ranges);
 }
 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
                                         RegExpNode* on_success) {
  set_.Canonicalize();
  Zone* zone = compiler->zone();
  ZoneList<CharacterRange>* ranges = this->ranges(zone);
  if (compiler->unicode() && compiler->ignore_case()) {
    AddUnicodeCaseEquivalents(compiler, ranges);
  }
  if (compiler->unicode() && !compiler->one_byte()) {
    if (is_negated()) {
      ZoneList<CharacterRange>* negated =
@ -5853,16 +5875,19 @@ Vector<const int> CharacterRange::GetWordBounds() {
 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
                                        ZoneList<CharacterRange>* ranges,
                                        bool is_one_byte) {
-  uc32 bottom = from();
+  int range_count = ranges->length();
-  uc32 top = to();
+  for (int i = 0; i < range_count; i++) {
-  // Nothing to be done for surrogates.
+    CharacterRange range = ranges->at(i);
-  if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
+    uc32 bottom = range.from();
-  if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
+    uc32 top = range.to();
-    if (bottom > String::kMaxOneByteCharCode) return;
+    // Nothing to be done for surrogates.
-    if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
+    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) return;
-  }
+    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
-  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+      if (bottom > String::kMaxOneByteCharCode) return;
-  if (top == bottom) {
+      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
    }
    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    if (top == bottom) {
    // If this is a singleton we just expand the one character.
    int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
    for (int i = 0; i < length; i++) {
@ -5914,6 +5939,7 @@ void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
      pos = end + 1;
    }
  }
  }
 }
@ -6284,7 +6310,7 @@ void TextNode::CalculateOffsets() {
 void Analysis::VisitText(TextNode* that) {
-  if (ignore_case_) {
+  if (ignore_case()) {
    that->MakeCaseIndependent(isolate(), is_one_byte_);
  }
  EnsureAnalyzed(that->on_success());
@ -6649,7 +6675,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
  if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
  data->node = node;
-  Analysis analysis(isolate, ignore_case, is_one_byte);
+  Analysis analysis(isolate, flags, is_one_byte);
  analysis.EnsureAnalyzed(node);
  if (analysis.has_failed()) {
    const char* error_message = analysis.error_message();
--- a/src/regexp/jsregexp.h
+++ b/src/regexp/jsregexp.h
@ -19,6 +19,15 @@ class RegExpNode;
 class RegExpTree;
 class BoyerMooreLookahead;
 static const uc32 kLeadSurrogateStart = 0xd800;
 static const uc32 kLeadSurrogateEnd = 0xdbff;
 static const uc32 kTrailSurrogateStart = 0xdc00;
 static const uc32 kTrailSurrogateEnd = 0xdfff;
 static const uc32 kNonBmpStart = 0x10000;
 static const uc32 kNonBmpEnd = 0x10ffff;
 class RegExpImpl {
 public:
  // Whether V8 is compiled with native regexp support or not.
@ -1478,9 +1487,9 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
 //   +-------+        +------------+
 class Analysis: public NodeVisitor {
 public:
-  Analysis(Isolate* isolate, bool ignore_case, bool is_one_byte)
+  Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
      : isolate_(isolate),
-        ignore_case_(ignore_case),
+        flags_(flags),
        is_one_byte_(is_one_byte),
        error_message_(NULL) {}
  void EnsureAnalyzed(RegExpNode* node);
@ -1502,9 +1511,12 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
  Isolate* isolate() const { return isolate_; }
  bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
  bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
 private:
  Isolate* isolate_;
-  bool ignore_case_;
+  JSRegExp::Flags flags_;
  bool is_one_byte_;
  const char* error_message_;
--- a/src/regexp/mips/regexp-macro-assembler-mips.cc
+++ b/src/regexp/mips/regexp-macro-assembler-mips.cc
@ -215,7 +215,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  __ lw(a0, register_location(start_reg));  // Index of start of capture.
  __ lw(a1, register_location(start_reg + 1));  // Index of end of capture.
@ -310,7 +310,7 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
    //   a0: Address byte_offset1 - Address captured substring's start.
    //   a1: Address byte_offset2 - Address of current character position.
    //   a2: size_t byte_length - length of capture in bytes(!).
-    //   a3: Isolate* isolate.
+    //   a3: Isolate* isolate or 0 if unicode flag.
    // Address of start of capture.
    __ Addu(a0, a0, Operand(end_of_input_address()));
@ -324,7 +324,14 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
      __ Subu(a1, a1, Operand(s3));
    }
    // Isolate.
-    __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
+#ifdef V8_I18N_SUPPORT
    if (unicode) {
      __ li(a3, Operand(zero_reg));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
    }
    {
      AllowExternalCallThatCantCauseGC scope(masm_);
--- a/src/regexp/mips/regexp-macro-assembler-mips.h
+++ b/src/regexp/mips/regexp-macro-assembler-mips.h
@ -37,7 +37,7 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(uint32_t c,
--- a/src/regexp/mips64/regexp-macro-assembler-mips64.cc
+++ b/src/regexp/mips64/regexp-macro-assembler-mips64.cc
@ -251,7 +251,7 @@ void RegExpMacroAssemblerMIPS::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  __ ld(a0, register_location(start_reg));  // Index of start of capture.
  __ ld(a1, register_location(start_reg + 1));  // Index of end of capture.
@ -346,7 +346,7 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
    //   a0: Address byte_offset1 - Address captured substring's start.
    //   a1: Address byte_offset2 - Address of current character position.
    //   a2: size_t byte_length - length of capture in bytes(!).
-    //   a3: Isolate* isolate.
+    //   a3: Isolate* isolate or 0 if unicode flag.
    // Address of start of capture.
    __ Daddu(a0, a0, Operand(end_of_input_address()));
@ -360,7 +360,14 @@ void RegExpMacroAssemblerMIPS::CheckNotBackReferenceIgnoreCase(
      __ Dsubu(a1, a1, Operand(s3));
    }
    // Isolate.
-    __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
+#ifdef V8_I18N_SUPPORT
    if (unicode) {
      __ li(a3, Operand(zero_reg));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
    }
    {
      AllowExternalCallThatCantCauseGC scope(masm_);
--- a/src/regexp/mips64/regexp-macro-assembler-mips64.h
+++ b/src/regexp/mips64/regexp-macro-assembler-mips64.h
@ -37,7 +37,7 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(uint32_t c,
--- a/src/regexp/regexp-ast.h
+++ b/src/regexp/regexp-ast.h
@ -108,8 +108,9 @@ class CharacterRange {
  bool is_valid() { return from_ <= to_; }
  bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
  bool IsSingleton() { return (from_ == to_); }
-  void AddCaseEquivalents(Isolate* isolate, Zone* zone,
+  static void AddCaseEquivalents(Isolate* isolate, Zone* zone,
-                          ZoneList<CharacterRange>* ranges, bool is_one_byte);
+                                 ZoneList<CharacterRange>* ranges,
                                 bool is_one_byte);
  // Whether a range list is in canonical form: Ranges ordered by from value,
  // and ranges non-overlapping and non-adjacent.
  static bool IsCanonical(ZoneList<CharacterRange>* ranges);
@ -293,7 +294,6 @@ class RegExpCharacterClass final : public RegExpTree {
  RegExpCharacterClass* AsCharacterClass() override;
  bool IsCharacterClass() override;
  bool IsTextElement() override { return true; }
  bool NeedsDesugaringForUnicode(Zone* zone);
  int min_match() override { return 1; }
  int max_match() override { return 1; }
  void AppendToText(RegExpText* text, Zone* zone) override;
@ -310,7 +310,7 @@ class RegExpCharacterClass final : public RegExpTree {
  // W : non-ASCII word character
  // d : ASCII digit
  // D : non-ASCII digit
-  // . : non-unicode non-newline
+  // . : non-newline
  // * : All characters, for advancing unanchored regexp
  uc16 standard_type() { return set_.standard_set_type(); }
  ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
--- a/src/regexp/regexp-macro-assembler-irregexp.cc
+++ b/src/regexp/regexp-macro-assembler-irregexp.cc
@ -381,11 +381,13 @@ void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
 void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_not_equal) {
+    int start_reg, bool read_backward, bool unicode, Label* on_not_equal) {
  DCHECK(start_reg >= 0);
  DCHECK(start_reg <= kMaxRegister);
-  Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
+  Emit(read_backward ? (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD
-                     : BC_CHECK_NOT_BACK_REF_NO_CASE,
+                                : BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD)
                     : (unicode ? BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE
                                : BC_CHECK_NOT_BACK_REF_NO_CASE),
       start_reg);
  EmitOrLink(on_not_equal);
 }
--- a/src/regexp/regexp-macro-assembler-irregexp.h
+++ b/src/regexp/regexp-macro-assembler-irregexp.h
@ -82,16 +82,10 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
                                        uc16 to,
                                        Label* on_not_in_range);
  virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
  virtual void CheckPosition(int cp_offset, Label* on_outside_input) {
    LoadCurrentCharacter(cp_offset, on_outside_input, true);
  }
  virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) {
    return false;  // No custom support for character classes.
  }
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
  virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
--- a/src/regexp/regexp-macro-assembler-tracer.cc
+++ b/src/regexp/regexp-macro-assembler-tracer.cc
@ -360,11 +360,11 @@ void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
 void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
-  PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
+  PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s %s, label[%08x]);\n",
         start_reg, read_backward ? "backward" : "forward",
-         LabelToInt(on_no_match));
+         unicode ? "unicode" : "non-unicode", LabelToInt(on_no_match));
-  assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
+  assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward, unicode,
                                              on_no_match);
 }
--- a/src/regexp/regexp-macro-assembler-tracer.h
+++ b/src/regexp/regexp-macro-assembler-tracer.h
@ -34,7 +34,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(unsigned c,
--- a/src/regexp/regexp-macro-assembler.cc
+++ b/src/regexp/regexp-macro-assembler.cc
@ -9,6 +9,10 @@
 #include "src/regexp/regexp-stack.h"
 #include "src/simulator.h"
 #ifdef V8_I18N_SUPPORT
 #include "unicode/uchar.h"
 #endif  // V8_I18N_SUPPORT
 namespace v8 {
 namespace internal {
@ -23,6 +27,67 @@ RegExpMacroAssembler::~RegExpMacroAssembler() {
 }
 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
                                                     Address byte_offset2,
                                                     size_t byte_length,
                                                     Isolate* isolate) {
  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
      isolate->regexp_macro_assembler_canonicalize();
  // This function is not allowed to cause a garbage collection.
  // A GC might move the calling generated code and invalidate the
  // return address on the stack.
  DCHECK(byte_length % 2 == 0);
  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
  size_t length = byte_length >> 1;
 #ifdef V8_I18N_SUPPORT
  if (isolate == nullptr) {
    for (size_t i = 0; i < length; i++) {
      uc32 c1 = substring1[i];
      uc32 c2 = substring2[i];
      if (unibrow::Utf16::IsLeadSurrogate(c1)) {
        // Non-BMP characters do not have case-equivalents in the BMP.
        // Both have to be non-BMP for them to be able to match.
        if (!unibrow::Utf16::IsLeadSurrogate(c2)) return 0;
        if (i + 1 < length) {
          uc16 c1t = substring1[i + 1];
          uc16 c2t = substring2[i + 1];
          if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
              unibrow::Utf16::IsTrailSurrogate(c2t)) {
            c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
            c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
            i++;
          }
        }
      }
      c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
      c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
      if (c1 != c2) return 0;
    }
    return 1;
  }
 #endif  // V8_I18N_SUPPORT
  DCHECK_NOT_NULL(isolate);
  for (size_t i = 0; i < length; i++) {
    unibrow::uchar c1 = substring1[i];
    unibrow::uchar c2 = substring2[i];
    if (c1 != c2) {
      unibrow::uchar s1[1] = {c1};
      canonicalize->get(c1, '\0', s1);
      if (s1[0] != c2) {
        unibrow::uchar s2[1] = {c2};
        canonicalize->get(c2, '\0', s2);
        if (s1[0] != s2[0]) {
          return 0;
        }
      }
    }
  }
  return 1;
 }
 #ifndef V8_INTERPRETED_REGEXP  // Avoid unused code, e.g., on ARM.
 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
@ -245,40 +310,6 @@ const byte NativeRegExpMacroAssembler::word_character_map[] = {
 };
 int NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16(
    Address byte_offset1,
    Address byte_offset2,
    size_t byte_length,
    Isolate* isolate) {
  unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
      isolate->regexp_macro_assembler_canonicalize();
  // This function is not allowed to cause a garbage collection.
  // A GC might move the calling generated code and invalidate the
  // return address on the stack.
  DCHECK(byte_length % 2 == 0);
  uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
  uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
  size_t length = byte_length >> 1;
  for (size_t i = 0; i < length; i++) {
    unibrow::uchar c1 = substring1[i];
    unibrow::uchar c2 = substring2[i];
    if (c1 != c2) {
      unibrow::uchar s1[1] = { c1 };
      canonicalize->get(c1, '\0', s1);
      if (s1[0] != c2) {
        unibrow::uchar s2[1] = { c2 };
        canonicalize->get(c2, '\0', s2);
        if (s1[0] != s2[0]) {
          return 0;
        }
      }
    }
  }
  return 1;
 }
 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
                                              Address* stack_base,
                                              Isolate* isolate) {
--- a/src/regexp/regexp-macro-assembler.h
+++ b/src/regexp/regexp-macro-assembler.h
@ -76,7 +76,7 @@ class RegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match) = 0;
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match) = 0;
  // Check the current character for a match with a literal character.  If we
  // fail to match then goto the on_failure label.  End of input always
@ -146,6 +146,12 @@ class RegExpMacroAssembler {
  virtual void ClearRegisters(int reg_from, int reg_to) = 0;
  virtual void WriteStackPointerToRegister(int reg) = 0;
  // Compares two-byte strings case insensitively.
  // Called from generated RegExp code.
  static int CaseInsensitiveCompareUC16(Address byte_offset1,
                                        Address byte_offset2,
                                        size_t byte_length, Isolate* isolate);
  // Controls the generation of large inlined constants in the code.
  void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
  bool slow_safe() { return slow_safe_compiler_; }
@ -199,13 +205,6 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
                      int previous_index,
                      Isolate* isolate);
  // Compares two-byte strings case insensitively.
  // Called from generated RegExp code.
  static int CaseInsensitiveCompareUC16(Address byte_offset1,
                                        Address byte_offset2,
                                        size_t byte_length,
                                        Isolate* isolate);
  // Called from RegExp if the backtrack stack limit is hit.
  // Tries to expand the stack. Returns the new stack-pointer if
  // successful, and updates the stack_top address, or returns 0 if unable
--- a/src/regexp/regexp-parser.cc
+++ b/src/regexp/regexp-parser.cc
@ -11,6 +11,10 @@
 #include "src/regexp/jsregexp.h"
 #include "src/utils.h"
 #ifdef V8_I18N_SUPPORT
 #include "unicode/uset.h"
 #endif  // V8_I18N_SUPPORT
 namespace v8 {
 namespace internal {
@ -1064,13 +1068,20 @@ void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
  DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
  if (pending_surrogate_ != kNoPendingSurrogate) {
    uc16 lead_surrogate = pending_surrogate_;
    DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
    ZoneList<uc16> surrogate_pair(2, zone());
    surrogate_pair.Add(lead_surrogate, zone());
    surrogate_pair.Add(trail_surrogate, zone());
    RegExpAtom* atom = new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
    pending_surrogate_ = kNoPendingSurrogate;
-    AddAtom(atom);
+    DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
    uc32 combined =
        unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate);
    if (NeedsDesugaringForIgnoreCase(combined)) {
      AddCharacterClass(combined);
    } else {
      ZoneList<uc16> surrogate_pair(2, zone());
      surrogate_pair.Add(lead_surrogate, zone());
      surrogate_pair.Add(trail_surrogate, zone());
      RegExpAtom* atom =
          new (zone()) RegExpAtom(surrogate_pair.ToConstVector());
      AddAtom(atom);
    }
  } else {
    pending_surrogate_ = trail_surrogate;
    FlushPendingSurrogate();
@ -1080,14 +1091,10 @@ void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
 void RegExpBuilder::FlushPendingSurrogate() {
  if (pending_surrogate_ != kNoPendingSurrogate) {
    // Use character class to desugar lone surrogate matching.
    RegExpCharacterClass* cc = new (zone()) RegExpCharacterClass(
        CharacterRange::List(zone(),
                             CharacterRange::Singleton(pending_surrogate_)),
        false);
    pending_surrogate_ = kNoPendingSurrogate;
    DCHECK(unicode());
-    AddCharacterClass(cc);
+    uc32 c = pending_surrogate_;
    pending_surrogate_ = kNoPendingSurrogate;
    AddCharacterClass(c);
  }
 }
@ -1123,11 +1130,15 @@ void RegExpBuilder::FlushText() {
 void RegExpBuilder::AddCharacter(uc16 c) {
  FlushPendingSurrogate();
  pending_empty_ = false;
-  if (characters_ == NULL) {
+  if (NeedsDesugaringForIgnoreCase(c)) {
-    characters_ = new (zone()) ZoneList<uc16>(4, zone());
+    AddCharacterClass(c);
  } else {
    if (characters_ == NULL) {
      characters_ = new (zone()) ZoneList<uc16>(4, zone());
    }
    characters_->Add(c, zone());
    LAST(ADD_CHAR);
  }
  characters_->Add(c, zone());
  LAST(ADD_CHAR);
 }
@ -1150,7 +1161,7 @@ void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
 void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
-  if (unicode() && cc->NeedsDesugaringForUnicode(zone())) {
+  if (NeedsDesugaringForUnicode(cc)) {
    // In unicode mode, character class needs to be desugared, so it
    // must be a standalone term instead of being part of a RegExpText.
    AddTerm(cc);
@ -1160,6 +1171,12 @@ void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
 }
 void RegExpBuilder::AddCharacterClass(uc32 c) {
  AddCharacterClass(new (zone()) RegExpCharacterClass(
      CharacterRange::List(zone(), CharacterRange::Singleton(c)), false));
 }
 void RegExpBuilder::AddAtom(RegExpTree* term) {
  if (term->IsEmpty()) {
    AddEmpty();
@ -1210,6 +1227,47 @@ void RegExpBuilder::FlushTerms() {
 }
 bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
  if (!unicode()) return false;
  switch (cc->standard_type()) {
    case 's':        // white space
    case 'w':        // ASCII word character
    case 'd':        // ASCII digit
      return false;  // These characters do not need desugaring.
    default:
      break;
  }
  ZoneList<CharacterRange>* ranges = cc->ranges(zone());
  CharacterRange::Canonicalize(ranges);
  for (int i = ranges->length() - 1; i >= 0; i--) {
    uc32 from = ranges->at(i).from();
    uc32 to = ranges->at(i).to();
    // Check for non-BMP characters.
    if (to >= kNonBmpStart) return true;
    // Check for lone surrogates.
    if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
  }
  return false;
 }
 bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
 #ifdef V8_I18N_SUPPORT
  if (unicode() && ignore_case()) {
    USet* set = uset_open(c, c);
    uset_closeOver(set, USET_CASE_INSENSITIVE);
    uset_removeAllStrings(set);
    bool result = uset_size(set) > 1;
    uset_close(set);
    return result;
  }
  // In the case where ICU is not included, we act as if the unicode flag is
  // not set, and do not desugar.
 #endif  // V8_I18N_SUPPORT
  return false;
 }
 RegExpTree* RegExpBuilder::ToRegExp() {
  FlushTerms();
  int num_alternatives = alternatives_.length();
--- a/src/regexp/regexp-parser.h
+++ b/src/regexp/regexp-parser.h
@ -106,6 +106,7 @@ class RegExpBuilder : public ZoneObject {
  // following quantifier
  void AddEmpty();
  void AddCharacterClass(RegExpCharacterClass* cc);
  void AddCharacterClass(uc32 c);
  void AddAtom(RegExpTree* tree);
  void AddTerm(RegExpTree* tree);
  void AddAssertion(RegExpTree* tree);
@ -122,8 +123,11 @@ class RegExpBuilder : public ZoneObject {
  void FlushCharacters();
  void FlushText();
  void FlushTerms();
  bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
  bool NeedsDesugaringForIgnoreCase(uc32 c);
  Zone* zone() const { return zone_; }
  bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
  bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
  Zone* zone_;
  bool pending_empty_;
--- a/src/regexp/x64/regexp-macro-assembler-x64.cc
+++ b/src/regexp/x64/regexp-macro-assembler-x64.cc
@ -203,7 +203,7 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
 void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
-    int start_reg, bool read_backward, Label* on_no_match) {
+    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
  Label fallthrough;
  ReadPositionFromRegister(rdx, start_reg);  // Offset of start of capture
  ReadPositionFromRegister(rbx, start_reg + 1);  // Offset of end of capture
@ -308,8 +308,10 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
    //   Address byte_offset1 - Address captured substring's start.
    //   Address byte_offset2 - Address of current character position.
    //   size_t byte_length - length of capture in bytes(!)
-    //   Isolate* isolate
+//   Isolate* isolate or 0 if unicode flag.
 #ifdef _WIN64
    DCHECK(rcx.is(arg_reg_1));
    DCHECK(rdx.is(arg_reg_2));
    // Compute and set byte_offset1 (start of capture).
    __ leap(rcx, Operand(rsi, rdx, times_1, 0));
    // Set byte_offset2.
@ -317,11 +319,9 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
    if (read_backward) {
      __ subq(rdx, rbx);
    }
    // Set byte_length.
    __ movp(r8, rbx);
    // Isolate.
    __ LoadAddress(r9, ExternalReference::isolate_address(isolate()));
 #else  // AMD64 calling convention
    DCHECK(rdi.is(arg_reg_1));
    DCHECK(rsi.is(arg_reg_2));
    // Compute byte_offset2 (current position = rsi+rdi).
    __ leap(rax, Operand(rsi, rdi, times_1, 0));
    // Compute and set byte_offset1 (start of capture).
@ -331,11 +331,19 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
    if (read_backward) {
      __ subq(rsi, rbx);
    }
 #endif  // _WIN64
    // Set byte_length.
-    __ movp(rdx, rbx);
+    __ movp(arg_reg_3, rbx);
    // Isolate.
-    __ LoadAddress(rcx, ExternalReference::isolate_address(isolate()));
+#ifdef V8_I18N_SUPPORT
-#endif
+    if (unicode) {
      __ movp(arg_reg_4, Immediate(0));
    } else  // NOLINT
 #endif      // V8_I18N_SUPPORT
    {
      __ LoadAddress(arg_reg_4, ExternalReference::isolate_address(isolate()));
    }
    { // NOLINT: Can't find a way to open this scope without confusing the
      // linter.
--- a/src/regexp/x64/regexp-macro-assembler-x64.h
+++ b/src/regexp/x64/regexp-macro-assembler-x64.h
@ -38,7 +38,7 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
  virtual void CheckNotBackReference(int start_reg, bool read_backward,
                                     Label* on_no_match);
  virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
-                                               bool read_backward,
+                                               bool read_backward, bool unicode,
                                               Label* on_no_match);
  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
  virtual void CheckNotCharacterAfterAnd(uint32_t c,
--- a/test/cctest/test-regexp.cc
+++ b/test/cctest/test-regexp.cc
@ -1186,16 +1186,16 @@ TEST(MacroAssemblerNativeBackRefNoCase) {
  m.WriteCurrentPositionToRegister(2, 0);
  m.AdvanceCurrentPosition(3);
  m.WriteCurrentPositionToRegister(3, 0);
-  m.CheckNotBackReferenceIgnoreCase(2, false, &fail);  // Match "AbC".
+  m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "AbC".
-  m.CheckNotBackReferenceIgnoreCase(2, false, &fail);  // Match "ABC".
+  m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "ABC".
  Label expected_fail;
-  m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
+  m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
  m.Bind(&fail);
  m.Fail();
  m.Bind(&expected_fail);
  m.AdvanceCurrentPosition(3);  // Skip "xYz"
-  m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
+  m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
  m.Fail();
  m.Bind(&succ);
@ -1629,7 +1629,9 @@ static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
  int count = expected.length();
  ZoneList<CharacterRange>* list =
      new(&zone) ZoneList<CharacterRange>(count, &zone);
-  input.AddCaseEquivalents(isolate, &zone, list, false);
+  list->Add(input, &zone);
  CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
  list->Remove(0);  // Remove the input before checking results.
  CHECK_EQ(count, list->length());
  for (int i = 0; i < list->length(); i++) {
    CHECK_EQ(expected[i].from(), list->at(i).from());
--- a/test/mjsunit/harmony/unicode-regexp-ignore-case-noi18n.js
+++ b/test/mjsunit/harmony/unicode-regexp-ignore-case-noi18n.js
@ -0,0 +1,52 @@
 // Copyright 2016 the V8 project authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Flags: --harmony-unicode-regexps
 // Non-unicode use toUpperCase mappings.
 assertFalse(/[\u00e5]/i.test("\u212b"));
 assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
 assertFalse(/[\u212b]/i.test("\u00e5"));
 assertTrue("\u212b".toLowerCase() == "\u00e5");
 assertTrue("\u00c5".toLowerCase() == "\u00e5");
 assertTrue("\u00e5".toUpperCase() == "\u00c5");
 // Unicode uses case folding mappings.
 assertFalse(/\u00e5/ui.test("\u212b"));
 assertTrue(/\u00e5/ui.test("\u00c5"));
 assertTrue(/\u00e5/ui.test("\u00e5"));
 assertFalse(/\u00e5/ui.test("\u212b"));
 assertTrue(/\u00c5/ui.test("\u00e5"));
 assertFalse(/\u00c5/ui.test("\u212b"));
 assertTrue(/\u00c5/ui.test("\u00c5"));
 assertFalse(/\u212b/ui.test("\u00c5"));
 assertFalse(/\u212b/ui.test("\u00e5"));
 assertTrue(/\u212b/ui.test("\u212b"));
 // Non-BMP.
 assertFalse(/\u{10400}/i.test("\u{10428}"));
 assertFalse(/\u{10400}/ui.test("\u{10428}"));
 assertFalse(/\ud801\udc00/ui.test("\u{10428}"));
 assertFalse(/[\u{10428}]/ui.test("\u{10400}"));
 assertFalse(/[\ud801\udc28]/ui.test("\u{10400}"));
 assertEquals(["\uff21\u{10400}"],
             /[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
 assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
 assertEquals(["\uff53\u24bb"],
             /[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
 // Full mappings are ignored.
 assertFalse(/\u00df/ui.test("SS"));
 assertFalse(/\u1f8d/ui.test("\u1f05\u03b9"));
 // Simple mappings.
 assertFalse(/\u1f8d/ui.test("\u1f85"));
 // Common mappings.
 assertTrue(/\u1f6b/ui.test("\u1f63"));
 // Back references.
 assertNull(/(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
 assertNull(/(.)\1/ui.exec("\u{118aa}\u{118ca}"));
--- a/test/mjsunit/harmony/unicode-regexp-ignore-case.js
+++ b/test/mjsunit/harmony/unicode-regexp-ignore-case.js
@ -0,0 +1,54 @@
 // Copyright 2016 the V8 project authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 // Flags: --harmony-unicode-regexps
 // Non-unicode use toUpperCase mappings.
 assertFalse(/[\u00e5]/i.test("\u212b"));
 assertFalse(/[\u212b]/i.test("\u00e5\u1234"));
 assertFalse(/[\u212b]/i.test("\u00e5"));
 assertTrue("\u212b".toLowerCase() == "\u00e5");
 assertTrue("\u00c5".toLowerCase() == "\u00e5");
 assertTrue("\u00e5".toUpperCase() == "\u00c5");
 // Unicode uses case folding mappings.
 assertTrue(/\u00e5/ui.test("\u212b"));
 assertTrue(/\u00e5/ui.test("\u00c5"));
 assertTrue(/\u00e5/ui.test("\u00e5"));
 assertTrue(/\u00e5/ui.test("\u212b"));
 assertTrue(/\u00c5/ui.test("\u00e5"));
 assertTrue(/\u00c5/ui.test("\u212b"));
 assertTrue(/\u00c5/ui.test("\u00c5"));
 assertTrue(/\u212b/ui.test("\u00c5"));
 assertTrue(/\u212b/ui.test("\u00e5"));
 assertTrue(/\u212b/ui.test("\u212b"));
 // Non-BMP.
 assertFalse(/\u{10400}/i.test("\u{10428}"));
 assertTrue(/\u{10400}/ui.test("\u{10428}"));
 assertTrue(/\ud801\udc00/ui.test("\u{10428}"));
 assertTrue(/[\u{10428}]/ui.test("\u{10400}"));
 assertTrue(/[\ud801\udc28]/ui.test("\u{10400}"));
 assertEquals(["\uff21\u{10400}"],
             /[\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc"));
 assertEquals(["abc"], /[^\uff40-\u{10428}]+/ui.exec("\uff21\u{10400}abc\uff23"));
 assertEquals(["\uff53\u24bb"],
             /[\u24d5-\uff33]+/ui.exec("\uff54\uff53\u24bb\u24ba"));
 // Full mappings are ignored.
 assertFalse(/\u00df/ui.test("SS"));
 assertFalse(/\u1f8d/ui.test("\u1f05\u03b9"));
 // Simple mappings work.
 assertTrue(/\u1f8d/ui.test("\u1f85"));
 // Common mappings work.
 assertTrue(/\u1f6b/ui.test("\u1f63"));
 // Back references.
 assertEquals(["\u00e5\u212b\u00c5", "\u00e5"],
             /(.)\1\1/ui.exec("\u00e5\u212b\u00c5"));
 assertEquals(["\u{118aa}\u{118ca}", "\u{118aa}"],
             /(.)\1/ui.exec("\u{118aa}\u{118ca}"));
--- a/test/mjsunit/mjsunit.status
+++ b/test/mjsunit/mjsunit.status
@ -289,6 +289,10 @@
  # TODO(titzer): SSE 4.1 required for asm-wasm test (floor).
  'wasm/asm-wasm': [SKIP],
    # case-insensitive unicode regexp relies on case mapping provided by ICU.
  'harmony/unicode-regexp-ignore-case': [PASS, ['no_i18n == True', FAIL]],
  'harmony/unicode-regexp-ignore-case-noi18n': [FAIL, ['no_i18n == True', PASS]],
 }],  # ALWAYS
 ['novfp3 == True', {