diff --git a/src/compiler/x64/code-generator-x64.cc b/src/compiler/x64/code-generator-x64.cc
index f9031fdc2c..265fa06219 100644
--- a/src/compiler/x64/code-generator-x64.cc
+++ b/src/compiler/x64/code-generator-x64.cc
@@ -946,7 +946,7 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       if (instr->InputAt(0)->IsDoubleStackSlot()) {
         __ movl(i.OutputRegister(), i.InputOperand(0));
       } else {
-        __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
       }
       break;
     case kSSEFloat64ExtractHighWord32:
@@ -972,9 +972,9 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       break;
     case kSSEFloat64LoadLowWord32:
       if (instr->InputAt(0)->IsRegister()) {
-        __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
-        __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
+        __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kAVXFloat32Cmp: {
@@ -1189,26 +1189,26 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
       if (instr->InputAt(0)->IsDoubleStackSlot()) {
         __ movl(i.OutputRegister(), i.InputOperand(0));
       } else {
-        __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
+        __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
       }
       break;
     case kX64BitcastDL:
       if (instr->InputAt(0)->IsDoubleStackSlot()) {
         __ movq(i.OutputRegister(), i.InputOperand(0));
       } else {
-        __ movq(i.OutputRegister(), i.InputDoubleRegister(0));
+        __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
       }
       break;
     case kX64BitcastIF:
       if (instr->InputAt(0)->IsRegister()) {
-        __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
+        __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
       }
       break;
     case kX64BitcastLD:
       if (instr->InputAt(0)->IsRegister()) {
-        __ movq(i.OutputDoubleRegister(), i.InputRegister(0));
+        __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
       } else {
         __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
       }
diff --git a/src/x64/assembler-x64.cc b/src/x64/assembler-x64.cc
index 096bcbd69e..44e0e2babc 100644
--- a/src/x64/assembler-x64.cc
+++ b/src/x64/assembler-x64.cc
@@ -2541,6 +2541,7 @@ void Assembler::divps(XMMRegister dst, const Operand& src) {
 // SSE 2 operations.
 
 void Assembler::movd(XMMRegister dst, Register src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   emit(0x66);
   emit_optional_rex_32(dst, src);
@@ -2551,6 +2552,7 @@ void Assembler::movd(XMMRegister dst, Register src) {
 
 
 void Assembler::movd(XMMRegister dst, const Operand& src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   emit(0x66);
   emit_optional_rex_32(dst, src);
@@ -2561,6 +2563,7 @@ void Assembler::movd(XMMRegister dst, const Operand& src) {
 
 
 void Assembler::movd(Register dst, XMMRegister src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   emit(0x66);
   emit_optional_rex_32(src, dst);
@@ -2571,6 +2574,7 @@ void Assembler::movd(Register dst, XMMRegister src) {
 
 
 void Assembler::movq(XMMRegister dst, Register src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   emit(0x66);
   emit_rex_64(dst, src);
@@ -2581,6 +2585,7 @@ void Assembler::movq(XMMRegister dst, Register src) {
 
 
 void Assembler::movq(Register dst, XMMRegister src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   emit(0x66);
   emit_rex_64(src, dst);
@@ -2591,6 +2596,7 @@ void Assembler::movq(Register dst, XMMRegister src) {
 
 
 void Assembler::movq(XMMRegister dst, XMMRegister src) {
+  DCHECK(!IsEnabled(AVX));
   EnsureSpace ensure_space(this);
   if (dst.low_bits() == 4) {
     // Avoid unnecessary SIB byte.
@@ -3481,6 +3487,64 @@ void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
 }
 
 
+void Assembler::vmovd(XMMRegister dst, Register src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  XMMRegister isrc = {src.code()};
+  emit_vex_prefix(dst, xmm0, isrc, kL128, k66, k0F, kW0);
+  emit(0x6e);
+  emit_sse_operand(dst, src);
+}
+
+
+void Assembler::vmovd(XMMRegister dst, const Operand& src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F, kW0);
+  emit(0x6e);
+  emit_sse_operand(dst, src);
+}
+
+
+void Assembler::vmovd(Register dst, XMMRegister src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  XMMRegister idst = {dst.code()};
+  emit_vex_prefix(src, xmm0, idst, kL128, k66, k0F, kW0);
+  emit(0x7e);
+  emit_sse_operand(src, dst);
+}
+
+
+void Assembler::vmovq(XMMRegister dst, Register src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  XMMRegister isrc = {src.code()};
+  emit_vex_prefix(dst, xmm0, isrc, kL128, k66, k0F, kW1);
+  emit(0x6e);
+  emit_sse_operand(dst, src);
+}
+
+
+void Assembler::vmovq(XMMRegister dst, const Operand& src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  emit_vex_prefix(dst, xmm0, src, kL128, k66, k0F, kW1);
+  emit(0x6e);
+  emit_sse_operand(dst, src);
+}
+
+
+void Assembler::vmovq(Register dst, XMMRegister src) {
+  DCHECK(IsEnabled(AVX));
+  EnsureSpace ensure_space(this);
+  XMMRegister idst = {dst.code()};
+  emit_vex_prefix(src, xmm0, idst, kL128, k66, k0F, kW1);
+  emit(0x7e);
+  emit_sse_operand(src, dst);
+}
+
+
 void Assembler::vmovapd(XMMRegister dst, XMMRegister src) {
   DCHECK(IsEnabled(AVX));
   EnsureSpace ensure_space(this);
diff --git a/src/x64/assembler-x64.h b/src/x64/assembler-x64.h
index 3ef0891830..56be6cc4a5 100644
--- a/src/x64/assembler-x64.h
+++ b/src/x64/assembler-x64.h
@@ -1269,6 +1269,13 @@ class Assembler : public AssemblerBase {
   void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
   void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
 
+  void vmovd(XMMRegister dst, Register src);
+  void vmovd(XMMRegister dst, const Operand& src);
+  void vmovd(Register dst, XMMRegister src);
+  void vmovq(XMMRegister dst, Register src);
+  void vmovq(XMMRegister dst, const Operand& src);
+  void vmovq(Register dst, XMMRegister src);
+
   void vmovapd(XMMRegister dst, XMMRegister src);
   void vmovsd(XMMRegister dst, const Operand& src) {
     vsd(0x10, dst, xmm0, src);
diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc
index 058e99bd4c..b1539ab8d7 100644
--- a/src/x64/code-stubs-x64.cc
+++ b/src/x64/code-stubs-x64.cc
@@ -337,7 +337,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
       // Test for 0.5.
       // Load double_scratch with 0.5.
       __ movq(scratch, V8_UINT64_C(0x3FE0000000000000));
-      __ movq(double_scratch, scratch);
+      __ Movq(double_scratch, scratch);
       // Already ruled out NaNs for exponent.
       __ ucomisd(double_scratch, double_exponent);
       __ j(not_equal, &not_plus_half, Label::kNear);
@@ -347,7 +347,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
       // According to IEEE-754, double-precision -Infinity has the highest
       // 12 bits set and the lowest 52 bits cleared.
       __ movq(scratch, V8_UINT64_C(0xFFF0000000000000));
-      __ movq(double_scratch, scratch);
+      __ Movq(double_scratch, scratch);
       __ ucomisd(double_scratch, double_base);
       // Comparing -Infinity with NaN results in "unordered", which sets the
       // zero flag as if both were equal.  However, it also sets the carry flag.
@@ -379,7 +379,7 @@ void MathPowStub::Generate(MacroAssembler* masm) {
       // According to IEEE-754, double-precision -Infinity has the highest
       // 12 bits set and the lowest 52 bits cleared.
       __ movq(scratch, V8_UINT64_C(0xFFF0000000000000));
-      __ movq(double_scratch, scratch);
+      __ Movq(double_scratch, scratch);
       __ ucomisd(double_scratch, double_base);
       // Comparing -Infinity with NaN results in "unordered", which sets the
       // zero flag as if both were equal.  However, it also sets the carry flag.
diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc
index 7ca231e4b5..404ce0484a 100644
--- a/src/x64/codegen-x64.cc
+++ b/src/x64/codegen-x64.cc
@@ -614,7 +614,7 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
   __ Movsd(result, Operand(kScratchRegister, 4 * kDoubleSize));
   __ mulsd(double_scratch, input);
   __ addsd(double_scratch, result);
-  __ movq(temp2, double_scratch);
+  __ Movq(temp2, double_scratch);
   __ subsd(double_scratch, result);
   __ Movsd(result, Operand(kScratchRegister, 6 * kDoubleSize));
   __ leaq(temp1, Operand(temp2, 0x1ff800));
@@ -630,7 +630,7 @@ void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
   __ subsd(result, double_scratch);
   __ mulsd(input, double_scratch);
   __ mulsd(result, input);
-  __ movq(input, temp1);
+  __ Movq(input, temp1);
   __ mulsd(result, Operand(kScratchRegister, 7 * kDoubleSize));
   __ subsd(result, double_scratch);
   __ addsd(result, Operand(kScratchRegister, 8 * kDoubleSize));
diff --git a/src/x64/disasm-x64.cc b/src/x64/disasm-x64.cc
index 557eaf0090..9f453d1617 100644
--- a/src/x64/disasm-x64.cc
+++ b/src/x64/disasm-x64.cc
@@ -351,6 +351,11 @@ class DisassemblerX64 {
 
   bool rex_w() { return (rex_ & 0x08) != 0; }
 
+  bool vex_w() {
+    DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
+    return vex_byte0_ == VEX3_PREFIX ? (vex_byte2_ & 0x80) != 0 : false;
+  }
+
   bool vex_128() {
     DCHECK(vex_byte0_ == VEX3_PREFIX || vex_byte0_ == VEX2_PREFIX);
     byte checked = vex_byte0_ == VEX3_PREFIX ? vex_byte2_ : vex_byte1_;
@@ -1191,6 +1196,16 @@ int DisassemblerX64::AVXInstruction(byte* data) {
                        NameOfXMMRegister(vvvv));
         current += PrintRightXMMOperand(current);
         break;
+      case 0x6e:
+        AppendToBuffer("vmov%c %s,", vex_w() ? 'q' : 'd',
+                       NameOfXMMRegister(regop));
+        current += PrintRightOperand(current);
+        break;
+      case 0x7e:
+        AppendToBuffer("vmov%c ", vex_w() ? 'q' : 'd');
+        current += PrintRightOperand(current);
+        AppendToBuffer(",%s", NameOfXMMRegister(regop));
+        break;
       default:
         UnimplementedInstruction();
     }
diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc
index 4014bdd8dd..bbb1225352 100644
--- a/src/x64/lithium-codegen-x64.cc
+++ b/src/x64/lithium-codegen-x64.cc
@@ -3604,7 +3604,7 @@ void LCodeGen::DoMathFloor(LMathFloor* instr) {
     CpuFeatureScope scope(masm(), SSE4_1);
     if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
       // Deoptimize if minus zero.
-      __ movq(output_reg, input_reg);
+      __ Movq(output_reg, input_reg);
       __ subq(output_reg, Immediate(1));
       DeoptimizeIf(overflow, instr, Deoptimizer::kMinusZero);
     }
@@ -3665,7 +3665,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
   Label done, round_to_zero, below_one_half;
   Label::Distance dist = DeoptEveryNTimes() ? Label::kFar : Label::kNear;
   __ movq(kScratchRegister, one_half);
-  __ movq(xmm_scratch, kScratchRegister);
+  __ Movq(xmm_scratch, kScratchRegister);
   __ ucomisd(xmm_scratch, input_reg);
   __ j(above, &below_one_half, Label::kNear);
 
@@ -3679,13 +3679,13 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
 
   __ bind(&below_one_half);
   __ movq(kScratchRegister, minus_one_half);
-  __ movq(xmm_scratch, kScratchRegister);
+  __ Movq(xmm_scratch, kScratchRegister);
   __ ucomisd(xmm_scratch, input_reg);
   __ j(below_equal, &round_to_zero, Label::kNear);
 
   // CVTTSD2SI rounds towards zero, we use ceil(x - (-0.5)) and then
   // compare and compensate.
-  __ movq(input_temp, input_reg);  // Do not alter input_reg.
+  __ Movapd(input_temp, input_reg);  // Do not alter input_reg.
   __ subsd(input_temp, xmm_scratch);
   __ cvttsd2si(output_reg, input_temp);
   // Catch minint due to overflow, and to prevent overflow when compensating.
@@ -3703,7 +3703,7 @@ void LCodeGen::DoMathRound(LMathRound* instr) {
   // We return 0 for the input range [+0, 0.5[, or [-0.5, 0.5[ if
   // we can ignore the difference between a result of -0 and +0.
   if (instr->hydrogen()->CheckFlag(HValue::kBailoutOnMinusZero)) {
-    __ movq(output_reg, input_reg);
+    __ Movq(output_reg, input_reg);
     __ testq(output_reg, output_reg);
     DeoptimizeIf(negative, instr, Deoptimizer::kMinusZero);
   }
@@ -3744,7 +3744,7 @@ void LCodeGen::DoMathPowHalf(LMathPowHalf* instr) {
   // Check base for -Infinity.  According to IEEE-754, double-precision
   // -Infinity has the highest 12 bits set and the lowest 52 bits cleared.
   __ movq(kScratchRegister, V8_INT64_C(0xFFF0000000000000));
-  __ movq(xmm_scratch, kScratchRegister);
+  __ Movq(xmm_scratch, kScratchRegister);
   __ ucomisd(xmm_scratch, input_reg);
   // Comparing -Infinity with NaN results in "unordered", which sets the
   // zero flag as if both were equal.  However, it also sets the carry flag.
@@ -5322,10 +5322,10 @@ void LCodeGen::DoDoubleBits(LDoubleBits* instr) {
   XMMRegister value_reg = ToDoubleRegister(instr->value());
   Register result_reg = ToRegister(instr->result());
   if (instr->hydrogen()->bits() == HDoubleBits::HIGH) {
-    __ movq(result_reg, value_reg);
+    __ Movq(result_reg, value_reg);
     __ shrq(result_reg, Immediate(32));
   } else {
-    __ movd(result_reg, value_reg);
+    __ Movd(result_reg, value_reg);
   }
 }
 
@@ -5335,9 +5335,9 @@ void LCodeGen::DoConstructDouble(LConstructDouble* instr) {
   Register lo_reg = ToRegister(instr->lo());
   XMMRegister result_reg = ToDoubleRegister(instr->result());
   XMMRegister xmm_scratch = double_scratch0();
-  __ movd(result_reg, hi_reg);
+  __ Movd(result_reg, hi_reg);
   __ psllq(result_reg, 32);
-  __ movd(xmm_scratch, lo_reg);
+  __ Movd(xmm_scratch, lo_reg);
   __ orps(result_reg, xmm_scratch);
 }
 
diff --git a/src/x64/lithium-gap-resolver-x64.cc b/src/x64/lithium-gap-resolver-x64.cc
index 914e278685..fbedbfb0ca 100644
--- a/src/x64/lithium-gap-resolver-x64.cc
+++ b/src/x64/lithium-gap-resolver-x64.cc
@@ -192,7 +192,7 @@ void LGapResolver::EmitMove(int index) {
         __ xorps(dst, dst);
       } else {
         __ Set(kScratchRegister, int_val);
-        __ movq(dst, kScratchRegister);
+        __ Movq(dst, kScratchRegister);
       }
     } else {
       DCHECK(destination->IsStackSlot());
diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc
index f0efd0641e..d5abaa0e50 100644
--- a/src/x64/macro-assembler-x64.cc
+++ b/src/x64/macro-assembler-x64.cc
@@ -2413,7 +2413,7 @@ void MacroAssembler::Move(XMMRegister dst, uint32_t src) {
       pcmpeqd(dst, dst);
     } else {
       movl(kScratchRegister, Immediate(src));
-      movq(dst, kScratchRegister);
+      Movq(dst, kScratchRegister);
     }
   }
 }
@@ -2442,7 +2442,7 @@ void MacroAssembler::Move(XMMRegister dst, uint64_t src) {
         Move(dst, lower);
       } else {
         movq(kScratchRegister, src);
-        movq(dst, kScratchRegister);
+        Movq(dst, kScratchRegister);
       }
     }
   }
@@ -2489,6 +2489,56 @@ void MacroAssembler::Movsd(const Operand& dst, XMMRegister src) {
 }
 
 
+void MacroAssembler::Movd(XMMRegister dst, Register src) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope scope(this, AVX);
+    vmovd(dst, src);
+  } else {
+    movd(dst, src);
+  }
+}
+
+
+void MacroAssembler::Movd(XMMRegister dst, const Operand& src) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope scope(this, AVX);
+    vmovd(dst, src);
+  } else {
+    movd(dst, src);
+  }
+}
+
+
+void MacroAssembler::Movd(Register dst, XMMRegister src) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope scope(this, AVX);
+    vmovd(dst, src);
+  } else {
+    movd(dst, src);
+  }
+}
+
+
+void MacroAssembler::Movq(XMMRegister dst, Register src) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope scope(this, AVX);
+    vmovq(dst, src);
+  } else {
+    movq(dst, src);
+  }
+}
+
+
+void MacroAssembler::Movq(Register dst, XMMRegister src) {
+  if (CpuFeatures::IsSupported(AVX)) {
+    CpuFeatureScope scope(this, AVX);
+    vmovq(dst, src);
+  } else {
+    movq(dst, src);
+  }
+}
+
+
 void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
   AllowDeferredHandleDereference smi_check;
   if (source->IsSmi()) {
@@ -2797,7 +2847,7 @@ void MacroAssembler::Call(Handle<Code> code_object,
 
 void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
   if (imm8 == 0) {
-    movd(dst, src);
+    Movd(dst, src);
     return;
   }
   DCHECK_EQ(1, imm8);
@@ -2817,7 +2867,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
     pinsrd(dst, src, imm8);
     return;
   }
-  movd(xmm0, src);
+  Movd(xmm0, src);
   if (imm8 == 1) {
     punpckldq(dst, xmm0);
   } else {
@@ -2836,7 +2886,7 @@ void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
     pinsrd(dst, src, imm8);
     return;
   }
-  movd(xmm0, src);
+  Movd(xmm0, src);
   if (imm8 == 1) {
     punpckldq(dst, xmm0);
   } else {
diff --git a/src/x64/macro-assembler-x64.h b/src/x64/macro-assembler-x64.h
index c89b1ef7a4..c93281b65f 100644
--- a/src/x64/macro-assembler-x64.h
+++ b/src/x64/macro-assembler-x64.h
@@ -899,6 +899,12 @@ class MacroAssembler: public Assembler {
   void Movsd(XMMRegister dst, const Operand& src);
   void Movsd(const Operand& dst, XMMRegister src);
 
+  void Movd(XMMRegister dst, Register src);
+  void Movd(XMMRegister dst, const Operand& src);
+  void Movd(Register dst, XMMRegister src);
+  void Movq(XMMRegister dst, Register src);
+  void Movq(Register dst, XMMRegister src);
+
   // Control Flow
   void Jump(Address destination, RelocInfo::Mode rmode);
   void Jump(ExternalReference ext);
diff --git a/test/cctest/test-assembler-x64.cc b/test/cctest/test-assembler-x64.cc
index 8da1b7ac3a..d974496ca8 100644
--- a/test/cctest/test-assembler-x64.cc
+++ b/test/cctest/test-assembler-x64.cc
@@ -1366,6 +1366,13 @@ TEST(AssemblerX64AVX_sd) {
     __ vcvtlsi2sd(xmm7, xmm7, Operand(rsp, 0));
     __ vsubsd(xmm7, xmm6, xmm7);  // xmm7 is 1.0
     __ vmulsd(xmm1, xmm1, xmm7);
+
+    __ movq(rdx, V8_INT64_C(0x3ff0000000000000));  // 1.0
+    __ vmovq(xmm7, rdx);
+    __ vmulsd(xmm1, xmm1, xmm7);
+    __ movq(Operand(rsp, 0), rdx);
+    __ vmovq(xmm6, Operand(rsp, 0));
+    __ vmulsd(xmm1, xmm1, xmm6);
     __ addq(rsp, Immediate(kDoubleSize * 2));
 
     __ vucomisd(xmm3, xmm1);
diff --git a/test/cctest/test-disasm-x64.cc b/test/cctest/test-disasm-x64.cc
index d401389a10..83e19ba2a6 100644
--- a/test/cctest/test-disasm-x64.cc
+++ b/test/cctest/test-disasm-x64.cc
@@ -511,6 +511,13 @@ TEST(DisasmX64) {
       __ vucomiss(xmm9, xmm1);
       __ vucomiss(xmm8, Operand(rbx, rdx, times_2, 10981));
 
+      __ vmovd(xmm5, rdi);
+      __ vmovd(xmm9, Operand(rbx, rcx, times_4, 10000));
+      __ vmovd(r9, xmm6);
+      __ vmovq(xmm5, rdi);
+      __ vmovq(xmm9, Operand(rbx, rcx, times_4, 10000));
+      __ vmovq(r9, xmm6);
+
       __ vmovapd(xmm7, xmm0);
       __ vmovsd(xmm6, xmm2);
       __ vmovsd(xmm9, Operand(rbx, rcx, times_4, 10000));