[x64] Consolidate SSE/AVX Float32/Float64 Abs/Neg

1. Move Abspd, Negpd from MacroAssembler into TurboAssembler so that we can use it in code-generator 2. Add Absps and Negps (float32 versions of the instructions in 1) 3. Refactor SSE/AVX float32/float64 abs/neg to use these macro-assembler helpers. 4. Use these helpers in Liftoff too This has the benefit of not requiring to set up the masks in a temporary register, and loading the constants via an ExternalReference instead. It does require (in ins-sel) to have the input be in a Register, since the ExternalReference is an operand (and the instruction can only have 1 operand input). Bug: v8:11589 Change-Id: I68fafaf31b19ab05ee391aa3d54c45d547a85b34 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3123635 Reviewed-by: Adam Klein <adamk@chromium.org> Commit-Queue: Zhi An Ng <zhin@chromium.org> Cr-Commit-Position: refs/heads/main@{#76520}
2021-08-26 17:11:49 -07:00 · 2021-08-26 17:11:49 -07:00 · 9996d8aec9
commit 9996d8aec9
parent 5f84d2ce74
9 changed files with 461 additions and 550 deletions
--- a/src/codegen/x64/macro-assembler-x64.cc
+++ b/src/codegen/x64/macro-assembler-x64.cc
@ -1562,16 +1562,6 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t high, uint64_t low) {

 // ----------------------------------------------------------------------------

-void MacroAssembler::Absps(XMMRegister dst) {
-  Andps(dst, ExternalReferenceAsOperand(
-                 ExternalReference::address_of_float_abs_constant()));
-}
-
-void MacroAssembler::Negps(XMMRegister dst) {
-  Xorps(dst, ExternalReferenceAsOperand(
-                 ExternalReference::address_of_float_neg_constant()));
-}
-
 void MacroAssembler::Cmp(Register dst, Handle<Object> source) {
  if (source->IsSmi()) {
    Cmp(dst, Smi::cast(*source));
@ -2202,14 +2192,44 @@ void TurboAssembler::Blendvpd(XMMRegister dst, XMMRegister src1,
  }
 }

-void TurboAssembler::Abspd(XMMRegister dst) {
-  Andps(dst, ExternalReferenceAsOperand(
-                 ExternalReference::address_of_double_abs_constant()));
+void TurboAssembler::Absps(XMMRegister dst, XMMRegister src) {
+  if (!CpuFeatures::IsSupported(AVX) && dst != src) {
+    movaps(dst, src);
+    src = dst;
+  }
+  Andps(dst, src,
+        ExternalReferenceAsOperand(
+            ExternalReference::address_of_float_abs_constant()));
 }

-void TurboAssembler::Negpd(XMMRegister dst) {
-  Xorps(dst, ExternalReferenceAsOperand(
-                 ExternalReference::address_of_double_neg_constant()));
+void TurboAssembler::Negps(XMMRegister dst, XMMRegister src) {
+  if (!CpuFeatures::IsSupported(AVX) && dst != src) {
+    movaps(dst, src);
+    src = dst;
+  }
+  Xorps(dst, src,
+        ExternalReferenceAsOperand(
+            ExternalReference::address_of_float_neg_constant()));
+}
+
+void TurboAssembler::Abspd(XMMRegister dst, XMMRegister src) {
+  if (!CpuFeatures::IsSupported(AVX) && dst != src) {
+    movaps(dst, src);
+    src = dst;
+  }
+  Andps(dst, src,
+        ExternalReferenceAsOperand(
+            ExternalReference::address_of_double_abs_constant()));
+}
+
+void TurboAssembler::Negpd(XMMRegister dst, XMMRegister src) {
+  if (!CpuFeatures::IsSupported(AVX) && dst != src) {
+    movaps(dst, src);
+    src = dst;
+  }
+  Xorps(dst, src,
+        ExternalReferenceAsOperand(
+            ExternalReference::address_of_double_neg_constant()));
 }

 void TurboAssembler::Lzcntl(Register dst, Register src) {
--- a/src/codegen/x64/macro-assembler-x64.h
+++ b/src/codegen/x64/macro-assembler-x64.h
@ -470,8 +470,10 @@ class V8_EXPORT_PRIVATE TurboAssembler
  void Blendvpd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
                XMMRegister mask);

-  void Abspd(XMMRegister dst);
-  void Negpd(XMMRegister dst);
+  void Absps(XMMRegister dst, XMMRegister src);
+  void Negps(XMMRegister dst, XMMRegister src);
+  void Abspd(XMMRegister dst, XMMRegister src);
+  void Negpd(XMMRegister dst, XMMRegister src);

  void CompareRoot(Register with, RootIndex index);
  void CompareRoot(Operand with, RootIndex index);
@ -820,10 +822,6 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
  void Pop(Operand dst);
  void PopQuad(Operand dst);

-  // ---------------------------------------------------------------------------
-  // SIMD macros.
-  void Absps(XMMRegister dst);
-  void Negps(XMMRegister dst);
  // Generates a trampoline to jump to the off-heap instruction stream.
  void JumpToInstructionStream(Address entry);

--- a/src/compiler/backend/x64/code-generator-x64.cc
+++ b/src/compiler/backend/x64/code-generator-x64.cc
@ -1597,22 +1597,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      // when there is a (v)mulss depending on the result.
      __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
      break;
-    case kSSEFloat32Abs: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ Pcmpeqd(tmp, tmp);
-      __ Psrlq(tmp, byte{33});
-      __ Andps(i.OutputDoubleRegister(), tmp);
-      break;
-    }
-    case kSSEFloat32Neg: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ Pcmpeqd(tmp, tmp);
-      __ Psllq(tmp, byte{31});
-      __ Xorps(i.OutputDoubleRegister(), tmp);
-      break;
-    }
    case kSSEFloat32Sqrt:
      ASSEMBLE_SSE_UNOP(sqrtss);
      break;
@ -1809,16 +1793,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      __ bind(ool->exit());
      break;
    }
-    case kX64F64x2Abs:
-    case kSSEFloat64Abs: {
-      __ Abspd(i.OutputDoubleRegister());
-      break;
-    }
-    case kX64F64x2Neg:
-    case kSSEFloat64Neg: {
-      __ Negpd(i.OutputDoubleRegister());
-      break;
-    }
    case kSSEFloat64Sqrt:
      ASSEMBLE_SSE_UNOP(Sqrtsd);
      break;
@ -2071,56 +2045,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
      // when there is a (v)mulsd depending on the result.
      __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
      break;
-    case kAVXFloat32Abs: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      CpuFeatureScope avx_scope(tasm(), AVX);
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ vpcmpeqd(tmp, tmp, tmp);
-      __ vpsrlq(tmp, tmp, 33);
-      if (instr->InputAt(0)->IsFPRegister()) {
-        __ vandps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
-      } else {
-        __ vandps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
-      }
+    case kX64Float32Abs: {
+      __ Absps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
      break;
    }
-    case kAVXFloat32Neg: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      CpuFeatureScope avx_scope(tasm(), AVX);
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ vpcmpeqd(tmp, tmp, tmp);
-      __ vpsllq(tmp, tmp, 31);
-      if (instr->InputAt(0)->IsFPRegister()) {
-        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
-      } else {
-        __ vxorps(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
-      }
+    case kX64Float32Neg: {
+      __ Negps(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
      break;
    }
-    case kAVXFloat64Abs: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      CpuFeatureScope avx_scope(tasm(), AVX);
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ vpcmpeqd(tmp, tmp, tmp);
-      __ vpsrlq(tmp, tmp, 1);
-      if (instr->InputAt(0)->IsFPRegister()) {
-        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
-      } else {
-        __ vandpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
-      }
+    case kX64F64x2Abs:
+    case kX64Float64Abs: {
+      __ Abspd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
      break;
    }
-    case kAVXFloat64Neg: {
-      // TODO(bmeurer): Use RIP relative 128-bit constants.
-      CpuFeatureScope avx_scope(tasm(), AVX);
-      XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
-      __ vpcmpeqd(tmp, tmp, tmp);
-      __ vpsllq(tmp, tmp, 63);
-      if (instr->InputAt(0)->IsFPRegister()) {
-        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputDoubleRegister(0));
-      } else {
-        __ vxorpd(i.OutputDoubleRegister(), tmp, i.InputOperand(0));
-      }
+    case kX64F64x2Neg:
+    case kX64Float64Neg: {
+      __ Negpd(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
      break;
    }
    case kSSEFloat64SilenceNaN:
--- a/src/compiler/backend/x64/instruction-codes-x64.h
+++ b/src/compiler/backend/x64/instruction-codes-x64.h
@ -11,392 +11,388 @@ namespace compiler {

 // X64-specific opcodes that specify which assembly sequence to emit.
 // Most opcodes specify a single instruction.
-#define TARGET_ARCH_OPCODE_LIST(V)        \
-  V(X64Add)                               \
-  V(X64Add32)                             \
-  V(X64And)                               \
-  V(X64And32)                             \
-  V(X64Cmp)                               \
-  V(X64Cmp32)                             \
-  V(X64Cmp16)                             \
-  V(X64Cmp8)                              \
-  V(X64Test)                              \
-  V(X64Test32)                            \
-  V(X64Test16)                            \
-  V(X64Test8)                             \
-  V(X64Or)                                \
-  V(X64Or32)                              \
-  V(X64Xor)                               \
-  V(X64Xor32)                             \
-  V(X64Sub)                               \
-  V(X64Sub32)                             \
-  V(X64Imul)                              \
-  V(X64Imul32)                            \
-  V(X64ImulHigh32)                        \
-  V(X64UmulHigh32)                        \
-  V(X64Idiv)                              \
-  V(X64Idiv32)                            \
-  V(X64Udiv)                              \
-  V(X64Udiv32)                            \
-  V(X64Not)                               \
-  V(X64Not32)                             \
-  V(X64Neg)                               \
-  V(X64Neg32)                             \
-  V(X64Shl)                               \
-  V(X64Shl32)                             \
-  V(X64Shr)                               \
-  V(X64Shr32)                             \
-  V(X64Sar)                               \
-  V(X64Sar32)                             \
-  V(X64Rol)                               \
-  V(X64Rol32)                             \
-  V(X64Ror)                               \
-  V(X64Ror32)                             \
-  V(X64Lzcnt)                             \
-  V(X64Lzcnt32)                           \
-  V(X64Tzcnt)                             \
-  V(X64Tzcnt32)                           \
-  V(X64Popcnt)                            \
-  V(X64Popcnt32)                          \
-  V(X64Bswap)                             \
-  V(X64Bswap32)                           \
-  V(X64MFence)                            \
-  V(X64LFence)                            \
-  V(SSEFloat32Cmp)                        \
-  V(SSEFloat32Add)                        \
-  V(SSEFloat32Sub)                        \
-  V(SSEFloat32Mul)                        \
-  V(SSEFloat32Div)                        \
-  V(SSEFloat32Abs)                        \
-  V(SSEFloat32Neg)                        \
-  V(SSEFloat32Sqrt)                       \
-  V(SSEFloat32ToFloat64)                  \
-  V(SSEFloat32ToInt32)                    \
-  V(SSEFloat32ToUint32)                   \
-  V(SSEFloat32Round)                      \
-  V(SSEFloat64Cmp)                        \
-  V(SSEFloat64Add)                        \
-  V(SSEFloat64Sub)                        \
-  V(SSEFloat64Mul)                        \
-  V(SSEFloat64Div)                        \
-  V(SSEFloat64Mod)                        \
-  V(SSEFloat64Abs)                        \
-  V(SSEFloat64Neg)                        \
-  V(SSEFloat64Sqrt)                       \
-  V(SSEFloat64Round)                      \
-  V(SSEFloat32Max)                        \
-  V(SSEFloat64Max)                        \
-  V(SSEFloat32Min)                        \
-  V(SSEFloat64Min)                        \
-  V(SSEFloat64ToFloat32)                  \
-  V(SSEFloat64ToInt32)                    \
-  V(SSEFloat64ToUint32)                   \
-  V(SSEFloat32ToInt64)                    \
-  V(SSEFloat64ToInt64)                    \
-  V(SSEFloat32ToUint64)                   \
-  V(SSEFloat64ToUint64)                   \
-  V(SSEInt32ToFloat64)                    \
-  V(SSEInt32ToFloat32)                    \
-  V(SSEInt64ToFloat32)                    \
-  V(SSEInt64ToFloat64)                    \
-  V(SSEUint64ToFloat32)                   \
-  V(SSEUint64ToFloat64)                   \
-  V(SSEUint32ToFloat64)                   \
-  V(SSEUint32ToFloat32)                   \
-  V(SSEFloat64ExtractLowWord32)           \
-  V(SSEFloat64ExtractHighWord32)          \
-  V(SSEFloat64InsertLowWord32)            \
-  V(SSEFloat64InsertHighWord32)           \
-  V(SSEFloat64LoadLowWord32)              \
-  V(SSEFloat64SilenceNaN)                 \
-  V(AVXFloat32Cmp)                        \
-  V(AVXFloat32Add)                        \
-  V(AVXFloat32Sub)                        \
-  V(AVXFloat32Mul)                        \
-  V(AVXFloat32Div)                        \
-  V(AVXFloat64Cmp)                        \
-  V(AVXFloat64Add)                        \
-  V(AVXFloat64Sub)                        \
-  V(AVXFloat64Mul)                        \
-  V(AVXFloat64Div)                        \
-  V(AVXFloat64Abs)                        \
-  V(AVXFloat64Neg)                        \
-  V(AVXFloat32Abs)                        \
-  V(AVXFloat32Neg)                        \
-  V(X64Movsxbl)                           \
-  V(X64Movzxbl)                           \
-  V(X64Movsxbq)                           \
-  V(X64Movzxbq)                           \
-  V(X64Movb)                              \
-  V(X64Movsxwl)                           \
-  V(X64Movzxwl)                           \
-  V(X64Movsxwq)                           \
-  V(X64Movzxwq)                           \
-  V(X64Movw)                              \
-  V(X64Movl)                              \
-  V(X64Movsxlq)                           \
-  V(X64MovqDecompressTaggedSigned)        \
-  V(X64MovqDecompressTaggedPointer)       \
-  V(X64MovqDecompressAnyTagged)           \
-  V(X64MovqCompressTagged)                \
-  V(X64Movq)                              \
-  V(X64Movsd)                             \
-  V(X64Movss)                             \
-  V(X64Movdqu)                            \
-  V(X64BitcastFI)                         \
-  V(X64BitcastDL)                         \
-  V(X64BitcastIF)                         \
-  V(X64BitcastLD)                         \
-  V(X64Lea32)                             \
-  V(X64Lea)                               \
-  V(X64Dec32)                             \
-  V(X64Inc32)                             \
-  V(X64Push)                              \
-  V(X64Poke)                              \
-  V(X64Peek)                              \
-  V(X64F64x2Splat)                        \
-  V(X64F64x2ExtractLane)                  \
-  V(X64F64x2ReplaceLane)                  \
-  V(X64F64x2Abs)                          \
-  V(X64F64x2Neg)                          \
-  V(X64F64x2Sqrt)                         \
-  V(X64F64x2Add)                          \
-  V(X64F64x2Sub)                          \
-  V(X64F64x2Mul)                          \
-  V(X64F64x2Div)                          \
-  V(X64F64x2Min)                          \
-  V(X64F64x2Max)                          \
-  V(X64F64x2Eq)                           \
-  V(X64F64x2Ne)                           \
-  V(X64F64x2Lt)                           \
-  V(X64F64x2Le)                           \
-  V(X64F64x2Qfma)                         \
-  V(X64F64x2Qfms)                         \
-  V(X64F64x2Pmin)                         \
-  V(X64F64x2Pmax)                         \
-  V(X64F64x2Round)                        \
-  V(X64F64x2ConvertLowI32x4S)             \
-  V(X64F64x2ConvertLowI32x4U)             \
-  V(X64F64x2PromoteLowF32x4)              \
-  V(X64F32x4Splat)                        \
-  V(X64F32x4ExtractLane)                  \
-  V(X64F32x4ReplaceLane)                  \
-  V(X64F32x4SConvertI32x4)                \
-  V(X64F32x4UConvertI32x4)                \
-  V(X64F32x4Abs)                          \
-  V(X64F32x4Neg)                          \
-  V(X64F32x4Sqrt)                         \
-  V(X64F32x4RecipApprox)                  \
-  V(X64F32x4RecipSqrtApprox)              \
-  V(X64F32x4Add)                          \
-  V(X64F32x4Sub)                          \
-  V(X64F32x4Mul)                          \
-  V(X64F32x4Div)                          \
-  V(X64F32x4Min)                          \
-  V(X64F32x4Max)                          \
-  V(X64F32x4Eq)                           \
-  V(X64F32x4Ne)                           \
-  V(X64F32x4Lt)                           \
-  V(X64F32x4Le)                           \
-  V(X64F32x4Qfma)                         \
-  V(X64F32x4Qfms)                         \
-  V(X64F32x4Pmin)                         \
-  V(X64F32x4Pmax)                         \
-  V(X64F32x4Round)                        \
-  V(X64F32x4DemoteF64x2Zero)              \
-  V(X64I64x2Splat)                        \
-  V(X64I64x2ExtractLane)                  \
-  V(X64I64x2Abs)                          \
-  V(X64I64x2Neg)                          \
-  V(X64I64x2BitMask)                      \
-  V(X64I64x2Shl)                          \
-  V(X64I64x2ShrS)                         \
-  V(X64I64x2Add)                          \
-  V(X64I64x2Sub)                          \
-  V(X64I64x2Mul)                          \
-  V(X64I64x2Eq)                           \
-  V(X64I64x2GtS)                          \
-  V(X64I64x2GeS)                          \
-  V(X64I64x2Ne)                           \
-  V(X64I64x2ShrU)                         \
-  V(X64I64x2ExtMulLowI32x4S)              \
-  V(X64I64x2ExtMulHighI32x4S)             \
-  V(X64I64x2ExtMulLowI32x4U)              \
-  V(X64I64x2ExtMulHighI32x4U)             \
-  V(X64I64x2SConvertI32x4Low)             \
-  V(X64I64x2SConvertI32x4High)            \
-  V(X64I64x2UConvertI32x4Low)             \
-  V(X64I64x2UConvertI32x4High)            \
-  V(X64I32x4Splat)                        \
-  V(X64I32x4ExtractLane)                  \
-  V(X64I32x4SConvertF32x4)                \
-  V(X64I32x4SConvertI16x8Low)             \
-  V(X64I32x4SConvertI16x8High)            \
-  V(X64I32x4Neg)                          \
-  V(X64I32x4Shl)                          \
-  V(X64I32x4ShrS)                         \
-  V(X64I32x4Add)                          \
-  V(X64I32x4Sub)                          \
-  V(X64I32x4Mul)                          \
-  V(X64I32x4MinS)                         \
-  V(X64I32x4MaxS)                         \
-  V(X64I32x4Eq)                           \
-  V(X64I32x4Ne)                           \
-  V(X64I32x4GtS)                          \
-  V(X64I32x4GeS)                          \
-  V(X64I32x4UConvertF32x4)                \
-  V(X64I32x4UConvertI16x8Low)             \
-  V(X64I32x4UConvertI16x8High)            \
-  V(X64I32x4ShrU)                         \
-  V(X64I32x4MinU)                         \
-  V(X64I32x4MaxU)                         \
-  V(X64I32x4GtU)                          \
-  V(X64I32x4GeU)                          \
-  V(X64I32x4Abs)                          \
-  V(X64I32x4BitMask)                      \
-  V(X64I32x4DotI16x8S)                    \
-  V(X64I32x4ExtMulLowI16x8S)              \
-  V(X64I32x4ExtMulHighI16x8S)             \
-  V(X64I32x4ExtMulLowI16x8U)              \
-  V(X64I32x4ExtMulHighI16x8U)             \
-  V(X64I32x4ExtAddPairwiseI16x8S)         \
-  V(X64I32x4ExtAddPairwiseI16x8U)         \
-  V(X64I32x4TruncSatF64x2SZero)           \
-  V(X64I32x4TruncSatF64x2UZero)           \
-  V(X64I16x8Splat)                        \
-  V(X64I16x8ExtractLaneS)                 \
-  V(X64I16x8SConvertI8x16Low)             \
-  V(X64I16x8SConvertI8x16High)            \
-  V(X64I16x8Neg)                          \
-  V(X64I16x8Shl)                          \
-  V(X64I16x8ShrS)                         \
-  V(X64I16x8SConvertI32x4)                \
-  V(X64I16x8Add)                          \
-  V(X64I16x8AddSatS)                      \
-  V(X64I16x8Sub)                          \
-  V(X64I16x8SubSatS)                      \
-  V(X64I16x8Mul)                          \
-  V(X64I16x8MinS)                         \
-  V(X64I16x8MaxS)                         \
-  V(X64I16x8Eq)                           \
-  V(X64I16x8Ne)                           \
-  V(X64I16x8GtS)                          \
-  V(X64I16x8GeS)                          \
-  V(X64I16x8UConvertI8x16Low)             \
-  V(X64I16x8UConvertI8x16High)            \
-  V(X64I16x8ShrU)                         \
-  V(X64I16x8UConvertI32x4)                \
-  V(X64I16x8AddSatU)                      \
-  V(X64I16x8SubSatU)                      \
-  V(X64I16x8MinU)                         \
-  V(X64I16x8MaxU)                         \
-  V(X64I16x8GtU)                          \
-  V(X64I16x8GeU)                          \
-  V(X64I16x8RoundingAverageU)             \
-  V(X64I16x8Abs)                          \
-  V(X64I16x8BitMask)                      \
-  V(X64I16x8ExtMulLowI8x16S)              \
-  V(X64I16x8ExtMulHighI8x16S)             \
-  V(X64I16x8ExtMulLowI8x16U)              \
-  V(X64I16x8ExtMulHighI8x16U)             \
-  V(X64I16x8ExtAddPairwiseI8x16S)         \
-  V(X64I16x8ExtAddPairwiseI8x16U)         \
-  V(X64I16x8Q15MulRSatS)                  \
-  V(X64I8x16Splat)                        \
-  V(X64I8x16ExtractLaneS)                 \
-  V(X64Pinsrb)                            \
-  V(X64Pinsrw)                            \
-  V(X64Pinsrd)                            \
-  V(X64Pinsrq)                            \
-  V(X64Pextrb)                            \
-  V(X64Pextrw)                            \
-  V(X64I8x16SConvertI16x8)                \
-  V(X64I8x16Neg)                          \
-  V(X64I8x16Shl)                          \
-  V(X64I8x16ShrS)                         \
-  V(X64I8x16Add)                          \
-  V(X64I8x16AddSatS)                      \
-  V(X64I8x16Sub)                          \
-  V(X64I8x16SubSatS)                      \
-  V(X64I8x16MinS)                         \
-  V(X64I8x16MaxS)                         \
-  V(X64I8x16Eq)                           \
-  V(X64I8x16Ne)                           \
-  V(X64I8x16GtS)                          \
-  V(X64I8x16GeS)                          \
-  V(X64I8x16UConvertI16x8)                \
-  V(X64I8x16AddSatU)                      \
-  V(X64I8x16SubSatU)                      \
-  V(X64I8x16ShrU)                         \
-  V(X64I8x16MinU)                         \
-  V(X64I8x16MaxU)                         \
-  V(X64I8x16GtU)                          \
-  V(X64I8x16GeU)                          \
-  V(X64I8x16RoundingAverageU)             \
-  V(X64I8x16Abs)                          \
-  V(X64I8x16BitMask)                      \
-  V(X64S128Const)                         \
-  V(X64S128Zero)                          \
-  V(X64S128AllOnes)                       \
-  V(X64S128Not)                           \
-  V(X64S128And)                           \
-  V(X64S128Or)                            \
-  V(X64S128Xor)                           \
-  V(X64S128Select)                        \
-  V(X64S128AndNot)                        \
-  V(X64I8x16Swizzle)                      \
-  V(X64I8x16Shuffle)                      \
-  V(X64I8x16Popcnt)                       \
-  V(X64S128Load8Splat)                    \
-  V(X64S128Load16Splat)                   \
-  V(X64S128Load32Splat)                   \
-  V(X64S128Load64Splat)                   \
-  V(X64S128Load8x8S)                      \
-  V(X64S128Load8x8U)                      \
-  V(X64S128Load16x4S)                     \
-  V(X64S128Load16x4U)                     \
-  V(X64S128Load32x2S)                     \
-  V(X64S128Load32x2U)                     \
-  V(X64S128Store32Lane)                   \
-  V(X64S128Store64Lane)                   \
-  V(X64Shufps)                            \
-  V(X64S32x4Rotate)                       \
-  V(X64S32x4Swizzle)                      \
-  V(X64S32x4Shuffle)                      \
-  V(X64S16x8Blend)                        \
-  V(X64S16x8HalfShuffle1)                 \
-  V(X64S16x8HalfShuffle2)                 \
-  V(X64S8x16Alignr)                       \
-  V(X64S16x8Dup)                          \
-  V(X64S8x16Dup)                          \
-  V(X64S16x8UnzipHigh)                    \
-  V(X64S16x8UnzipLow)                     \
-  V(X64S8x16UnzipHigh)                    \
-  V(X64S8x16UnzipLow)                     \
-  V(X64S64x2UnpackHigh)                   \
-  V(X64S32x4UnpackHigh)                   \
-  V(X64S16x8UnpackHigh)                   \
-  V(X64S8x16UnpackHigh)                   \
-  V(X64S64x2UnpackLow)                    \
-  V(X64S32x4UnpackLow)                    \
-  V(X64S16x8UnpackLow)                    \
-  V(X64S8x16UnpackLow)                    \
-  V(X64S8x16TransposeLow)                 \
-  V(X64S8x16TransposeHigh)                \
-  V(X64S8x8Reverse)                       \
-  V(X64S8x4Reverse)                       \
-  V(X64S8x2Reverse)                       \
-  V(X64V128AnyTrue)                       \
-  V(X64I64x2AllTrue)                      \
-  V(X64I32x4AllTrue)                      \
-  V(X64I16x8AllTrue)                      \
-  V(X64I8x16AllTrue)                      \
-  V(X64Word64AtomicAddUint64)             \
-  V(X64Word64AtomicSubUint64)             \
-  V(X64Word64AtomicAndUint64)             \
-  V(X64Word64AtomicOrUint64)              \
-  V(X64Word64AtomicXorUint64)             \
-  V(X64Word64AtomicExchangeUint64)        \
+#define TARGET_ARCH_OPCODE_LIST(V)  \
+  V(X64Add)                         \
+  V(X64Add32)                       \
+  V(X64And)                         \
+  V(X64And32)                       \
+  V(X64Cmp)                         \
+  V(X64Cmp32)                       \
+  V(X64Cmp16)                       \
+  V(X64Cmp8)                        \
+  V(X64Test)                        \
+  V(X64Test32)                      \
+  V(X64Test16)                      \
+  V(X64Test8)                       \
+  V(X64Or)                          \
+  V(X64Or32)                        \
+  V(X64Xor)                         \
+  V(X64Xor32)                       \
+  V(X64Sub)                         \
+  V(X64Sub32)                       \
+  V(X64Imul)                        \
+  V(X64Imul32)                      \
+  V(X64ImulHigh32)                  \
+  V(X64UmulHigh32)                  \
+  V(X64Idiv)                        \
+  V(X64Idiv32)                      \
+  V(X64Udiv)                        \
+  V(X64Udiv32)                      \
+  V(X64Not)                         \
+  V(X64Not32)                       \
+  V(X64Neg)                         \
+  V(X64Neg32)                       \
+  V(X64Shl)                         \
+  V(X64Shl32)                       \
+  V(X64Shr)                         \
+  V(X64Shr32)                       \
+  V(X64Sar)                         \
+  V(X64Sar32)                       \
+  V(X64Rol)                         \
+  V(X64Rol32)                       \
+  V(X64Ror)                         \
+  V(X64Ror32)                       \
+  V(X64Lzcnt)                       \
+  V(X64Lzcnt32)                     \
+  V(X64Tzcnt)                       \
+  V(X64Tzcnt32)                     \
+  V(X64Popcnt)                      \
+  V(X64Popcnt32)                    \
+  V(X64Bswap)                       \
+  V(X64Bswap32)                     \
+  V(X64MFence)                      \
+  V(X64LFence)                      \
+  V(SSEFloat32Cmp)                  \
+  V(SSEFloat32Add)                  \
+  V(SSEFloat32Sub)                  \
+  V(SSEFloat32Mul)                  \
+  V(SSEFloat32Div)                  \
+  V(SSEFloat32Sqrt)                 \
+  V(SSEFloat32ToFloat64)            \
+  V(SSEFloat32ToInt32)              \
+  V(SSEFloat32ToUint32)             \
+  V(SSEFloat32Round)                \
+  V(SSEFloat64Cmp)                  \
+  V(SSEFloat64Add)                  \
+  V(SSEFloat64Sub)                  \
+  V(SSEFloat64Mul)                  \
+  V(SSEFloat64Div)                  \
+  V(SSEFloat64Mod)                  \
+  V(SSEFloat64Sqrt)                 \
+  V(SSEFloat64Round)                \
+  V(SSEFloat32Max)                  \
+  V(SSEFloat64Max)                  \
+  V(SSEFloat32Min)                  \
+  V(SSEFloat64Min)                  \
+  V(SSEFloat64ToFloat32)            \
+  V(SSEFloat64ToInt32)              \
+  V(SSEFloat64ToUint32)             \
+  V(SSEFloat32ToInt64)              \
+  V(SSEFloat64ToInt64)              \
+  V(SSEFloat32ToUint64)             \
+  V(SSEFloat64ToUint64)             \
+  V(SSEInt32ToFloat64)              \
+  V(SSEInt32ToFloat32)              \
+  V(SSEInt64ToFloat32)              \
+  V(SSEInt64ToFloat64)              \
+  V(SSEUint64ToFloat32)             \
+  V(SSEUint64ToFloat64)             \
+  V(SSEUint32ToFloat64)             \
+  V(SSEUint32ToFloat32)             \
+  V(SSEFloat64ExtractLowWord32)     \
+  V(SSEFloat64ExtractHighWord32)    \
+  V(SSEFloat64InsertLowWord32)      \
+  V(SSEFloat64InsertHighWord32)     \
+  V(SSEFloat64LoadLowWord32)        \
+  V(SSEFloat64SilenceNaN)           \
+  V(AVXFloat32Cmp)                  \
+  V(AVXFloat32Add)                  \
+  V(AVXFloat32Sub)                  \
+  V(AVXFloat32Mul)                  \
+  V(AVXFloat32Div)                  \
+  V(AVXFloat64Cmp)                  \
+  V(AVXFloat64Add)                  \
+  V(AVXFloat64Sub)                  \
+  V(AVXFloat64Mul)                  \
+  V(AVXFloat64Div)                  \
+  V(X64Float64Abs)                  \
+  V(X64Float64Neg)                  \
+  V(X64Float32Abs)                  \
+  V(X64Float32Neg)                  \
+  V(X64Movsxbl)                     \
+  V(X64Movzxbl)                     \
+  V(X64Movsxbq)                     \
+  V(X64Movzxbq)                     \
+  V(X64Movb)                        \
+  V(X64Movsxwl)                     \
+  V(X64Movzxwl)                     \
+  V(X64Movsxwq)                     \
+  V(X64Movzxwq)                     \
+  V(X64Movw)                        \
+  V(X64Movl)                        \
+  V(X64Movsxlq)                     \
+  V(X64MovqDecompressTaggedSigned)  \
+  V(X64MovqDecompressTaggedPointer) \
+  V(X64MovqDecompressAnyTagged)     \
+  V(X64MovqCompressTagged)          \
+  V(X64Movq)                        \
+  V(X64Movsd)                       \
+  V(X64Movss)                       \
+  V(X64Movdqu)                      \
+  V(X64BitcastFI)                   \
+  V(X64BitcastDL)                   \
+  V(X64BitcastIF)                   \
+  V(X64BitcastLD)                   \
+  V(X64Lea32)                       \
+  V(X64Lea)                         \
+  V(X64Dec32)                       \
+  V(X64Inc32)                       \
+  V(X64Push)                        \
+  V(X64Poke)                        \
+  V(X64Peek)                        \
+  V(X64F64x2Splat)                  \
+  V(X64F64x2ExtractLane)            \
+  V(X64F64x2ReplaceLane)            \
+  V(X64F64x2Abs)                    \
+  V(X64F64x2Neg)                    \
+  V(X64F64x2Sqrt)                   \
+  V(X64F64x2Add)                    \
+  V(X64F64x2Sub)                    \
+  V(X64F64x2Mul)                    \
+  V(X64F64x2Div)                    \
+  V(X64F64x2Min)                    \
+  V(X64F64x2Max)                    \
+  V(X64F64x2Eq)                     \
+  V(X64F64x2Ne)                     \
+  V(X64F64x2Lt)                     \
+  V(X64F64x2Le)                     \
+  V(X64F64x2Qfma)                   \
+  V(X64F64x2Qfms)                   \
+  V(X64F64x2Pmin)                   \
+  V(X64F64x2Pmax)                   \
+  V(X64F64x2Round)                  \
+  V(X64F64x2ConvertLowI32x4S)       \
+  V(X64F64x2ConvertLowI32x4U)       \
+  V(X64F64x2PromoteLowF32x4)        \
+  V(X64F32x4Splat)                  \
+  V(X64F32x4ExtractLane)            \
+  V(X64F32x4ReplaceLane)            \
+  V(X64F32x4SConvertI32x4)          \
+  V(X64F32x4UConvertI32x4)          \
+  V(X64F32x4Abs)                    \
+  V(X64F32x4Neg)                    \
+  V(X64F32x4Sqrt)                   \
+  V(X64F32x4RecipApprox)            \
+  V(X64F32x4RecipSqrtApprox)        \
+  V(X64F32x4Add)                    \
+  V(X64F32x4Sub)                    \
+  V(X64F32x4Mul)                    \
+  V(X64F32x4Div)                    \
+  V(X64F32x4Min)                    \
+  V(X64F32x4Max)                    \
+  V(X64F32x4Eq)                     \
+  V(X64F32x4Ne)                     \
+  V(X64F32x4Lt)                     \
+  V(X64F32x4Le)                     \
+  V(X64F32x4Qfma)                   \
+  V(X64F32x4Qfms)                   \
+  V(X64F32x4Pmin)                   \
+  V(X64F32x4Pmax)                   \
+  V(X64F32x4Round)                  \
+  V(X64F32x4DemoteF64x2Zero)        \
+  V(X64I64x2Splat)                  \
+  V(X64I64x2ExtractLane)            \
+  V(X64I64x2Abs)                    \
+  V(X64I64x2Neg)                    \
+  V(X64I64x2BitMask)                \
+  V(X64I64x2Shl)                    \
+  V(X64I64x2ShrS)                   \
+  V(X64I64x2Add)                    \
+  V(X64I64x2Sub)                    \
+  V(X64I64x2Mul)                    \
+  V(X64I64x2Eq)                     \
+  V(X64I64x2GtS)                    \
+  V(X64I64x2GeS)                    \
+  V(X64I64x2Ne)                     \
+  V(X64I64x2ShrU)                   \
+  V(X64I64x2ExtMulLowI32x4S)        \
+  V(X64I64x2ExtMulHighI32x4S)       \
+  V(X64I64x2ExtMulLowI32x4U)        \
+  V(X64I64x2ExtMulHighI32x4U)       \
+  V(X64I64x2SConvertI32x4Low)       \
+  V(X64I64x2SConvertI32x4High)      \
+  V(X64I64x2UConvertI32x4Low)       \
+  V(X64I64x2UConvertI32x4High)      \
+  V(X64I32x4Splat)                  \
+  V(X64I32x4ExtractLane)            \
+  V(X64I32x4SConvertF32x4)          \
+  V(X64I32x4SConvertI16x8Low)       \
+  V(X64I32x4SConvertI16x8High)      \
+  V(X64I32x4Neg)                    \
+  V(X64I32x4Shl)                    \
+  V(X64I32x4ShrS)                   \
+  V(X64I32x4Add)                    \
+  V(X64I32x4Sub)                    \
+  V(X64I32x4Mul)                    \
+  V(X64I32x4MinS)                   \
+  V(X64I32x4MaxS)                   \
+  V(X64I32x4Eq)                     \
+  V(X64I32x4Ne)                     \
+  V(X64I32x4GtS)                    \
+  V(X64I32x4GeS)                    \
+  V(X64I32x4UConvertF32x4)          \
+  V(X64I32x4UConvertI16x8Low)       \
+  V(X64I32x4UConvertI16x8High)      \
+  V(X64I32x4ShrU)                   \
+  V(X64I32x4MinU)                   \
+  V(X64I32x4MaxU)                   \
+  V(X64I32x4GtU)                    \
+  V(X64I32x4GeU)                    \
+  V(X64I32x4Abs)                    \
+  V(X64I32x4BitMask)                \
+  V(X64I32x4DotI16x8S)              \
+  V(X64I32x4ExtMulLowI16x8S)        \
+  V(X64I32x4ExtMulHighI16x8S)       \
+  V(X64I32x4ExtMulLowI16x8U)        \
+  V(X64I32x4ExtMulHighI16x8U)       \
+  V(X64I32x4ExtAddPairwiseI16x8S)   \
+  V(X64I32x4ExtAddPairwiseI16x8U)   \
+  V(X64I32x4TruncSatF64x2SZero)     \
+  V(X64I32x4TruncSatF64x2UZero)     \
+  V(X64I16x8Splat)                  \
+  V(X64I16x8ExtractLaneS)           \
+  V(X64I16x8SConvertI8x16Low)       \
+  V(X64I16x8SConvertI8x16High)      \
+  V(X64I16x8Neg)                    \
+  V(X64I16x8Shl)                    \
+  V(X64I16x8ShrS)                   \
+  V(X64I16x8SConvertI32x4)          \
+  V(X64I16x8Add)                    \
+  V(X64I16x8AddSatS)                \
+  V(X64I16x8Sub)                    \
+  V(X64I16x8SubSatS)                \
+  V(X64I16x8Mul)                    \
+  V(X64I16x8MinS)                   \
+  V(X64I16x8MaxS)                   \
+  V(X64I16x8Eq)                     \
+  V(X64I16x8Ne)                     \
+  V(X64I16x8GtS)                    \
+  V(X64I16x8GeS)                    \
+  V(X64I16x8UConvertI8x16Low)       \
+  V(X64I16x8UConvertI8x16High)      \
+  V(X64I16x8ShrU)                   \
+  V(X64I16x8UConvertI32x4)          \
+  V(X64I16x8AddSatU)                \
+  V(X64I16x8SubSatU)                \
+  V(X64I16x8MinU)                   \
+  V(X64I16x8MaxU)                   \
+  V(X64I16x8GtU)                    \
+  V(X64I16x8GeU)                    \
+  V(X64I16x8RoundingAverageU)       \
+  V(X64I16x8Abs)                    \
+  V(X64I16x8BitMask)                \
+  V(X64I16x8ExtMulLowI8x16S)        \
+  V(X64I16x8ExtMulHighI8x16S)       \
+  V(X64I16x8ExtMulLowI8x16U)        \
+  V(X64I16x8ExtMulHighI8x16U)       \
+  V(X64I16x8ExtAddPairwiseI8x16S)   \
+  V(X64I16x8ExtAddPairwiseI8x16U)   \
+  V(X64I16x8Q15MulRSatS)            \
+  V(X64I8x16Splat)                  \
+  V(X64I8x16ExtractLaneS)           \
+  V(X64Pinsrb)                      \
+  V(X64Pinsrw)                      \
+  V(X64Pinsrd)                      \
+  V(X64Pinsrq)                      \
+  V(X64Pextrb)                      \
+  V(X64Pextrw)                      \
+  V(X64I8x16SConvertI16x8)          \
+  V(X64I8x16Neg)                    \
+  V(X64I8x16Shl)                    \
+  V(X64I8x16ShrS)                   \
+  V(X64I8x16Add)                    \
+  V(X64I8x16AddSatS)                \
+  V(X64I8x16Sub)                    \
+  V(X64I8x16SubSatS)                \
+  V(X64I8x16MinS)                   \
+  V(X64I8x16MaxS)                   \
+  V(X64I8x16Eq)                     \
+  V(X64I8x16Ne)                     \
+  V(X64I8x16GtS)                    \
+  V(X64I8x16GeS)                    \
+  V(X64I8x16UConvertI16x8)          \
+  V(X64I8x16AddSatU)                \
+  V(X64I8x16SubSatU)                \
+  V(X64I8x16ShrU)                   \
+  V(X64I8x16MinU)                   \
+  V(X64I8x16MaxU)                   \
+  V(X64I8x16GtU)                    \
+  V(X64I8x16GeU)                    \
+  V(X64I8x16RoundingAverageU)       \
+  V(X64I8x16Abs)                    \
+  V(X64I8x16BitMask)                \
+  V(X64S128Const)                   \
+  V(X64S128Zero)                    \
+  V(X64S128AllOnes)                 \
+  V(X64S128Not)                     \
+  V(X64S128And)                     \
+  V(X64S128Or)                      \
+  V(X64S128Xor)                     \
+  V(X64S128Select)                  \
+  V(X64S128AndNot)                  \
+  V(X64I8x16Swizzle)                \
+  V(X64I8x16Shuffle)                \
+  V(X64I8x16Popcnt)                 \
+  V(X64S128Load8Splat)              \
+  V(X64S128Load16Splat)             \
+  V(X64S128Load32Splat)             \
+  V(X64S128Load64Splat)             \
+  V(X64S128Load8x8S)                \
+  V(X64S128Load8x8U)                \
+  V(X64S128Load16x4S)               \
+  V(X64S128Load16x4U)               \
+  V(X64S128Load32x2S)               \
+  V(X64S128Load32x2U)               \
+  V(X64S128Store32Lane)             \
+  V(X64S128Store64Lane)             \
+  V(X64Shufps)                      \
+  V(X64S32x4Rotate)                 \
+  V(X64S32x4Swizzle)                \
+  V(X64S32x4Shuffle)                \
+  V(X64S16x8Blend)                  \
+  V(X64S16x8HalfShuffle1)           \
+  V(X64S16x8HalfShuffle2)           \
+  V(X64S8x16Alignr)                 \
+  V(X64S16x8Dup)                    \
+  V(X64S8x16Dup)                    \
+  V(X64S16x8UnzipHigh)              \
+  V(X64S16x8UnzipLow)               \
+  V(X64S8x16UnzipHigh)              \
+  V(X64S8x16UnzipLow)               \
+  V(X64S64x2UnpackHigh)             \
+  V(X64S32x4UnpackHigh)             \
+  V(X64S16x8UnpackHigh)             \
+  V(X64S8x16UnpackHigh)             \
+  V(X64S64x2UnpackLow)              \
+  V(X64S32x4UnpackLow)              \
+  V(X64S16x8UnpackLow)              \
+  V(X64S8x16UnpackLow)              \
+  V(X64S8x16TransposeLow)           \
+  V(X64S8x16TransposeHigh)          \
+  V(X64S8x8Reverse)                 \
+  V(X64S8x4Reverse)                 \
+  V(X64S8x2Reverse)                 \
+  V(X64V128AnyTrue)                 \
+  V(X64I64x2AllTrue)                \
+  V(X64I32x4AllTrue)                \
+  V(X64I16x8AllTrue)                \
+  V(X64I8x16AllTrue)                \
+  V(X64Word64AtomicAddUint64)       \
+  V(X64Word64AtomicSubUint64)       \
+  V(X64Word64AtomicAndUint64)       \
+  V(X64Word64AtomicOrUint64)        \
+  V(X64Word64AtomicXorUint64)       \
+  V(X64Word64AtomicExchangeUint64)  \
  V(X64Word64AtomicCompareExchangeUint64)

 // Addressing modes represent the "shape" of inputs to an instruction.
--- a/src/compiler/backend/x64/instruction-scheduler-x64.cc
+++ b/src/compiler/backend/x64/instruction-scheduler-x64.cc
@ -62,8 +62,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kSSEFloat32Sub:
    case kSSEFloat32Mul:
    case kSSEFloat32Div:
-    case kSSEFloat32Abs:
-    case kSSEFloat32Neg:
    case kSSEFloat32Sqrt:
    case kSSEFloat32Round:
    case kSSEFloat32ToFloat64:
@ -73,8 +71,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kSSEFloat64Mul:
    case kSSEFloat64Div:
    case kSSEFloat64Mod:
-    case kSSEFloat64Abs:
-    case kSSEFloat64Neg:
    case kSSEFloat64Sqrt:
    case kSSEFloat64Round:
    case kSSEFloat32Max:
@ -114,10 +110,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
    case kAVXFloat64Sub:
    case kAVXFloat64Mul:
    case kAVXFloat64Div:
-    case kAVXFloat64Abs:
-    case kAVXFloat64Neg:
-    case kAVXFloat32Abs:
-    case kAVXFloat32Neg:
+    case kX64Float64Abs:
+    case kX64Float64Neg:
+    case kX64Float32Abs:
+    case kX64Float32Neg:
    case kX64BitcastFI:
    case kX64BitcastDL:
    case kX64BitcastIF:
@ -451,18 +447,18 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
    case kX64Imul32:
    case kX64ImulHigh32:
    case kX64UmulHigh32:
+    case kX64Float32Abs:
+    case kX64Float32Neg:
+    case kX64Float64Abs:
+    case kX64Float64Neg:
    case kSSEFloat32Cmp:
    case kSSEFloat32Add:
    case kSSEFloat32Sub:
-    case kSSEFloat32Abs:
-    case kSSEFloat32Neg:
    case kSSEFloat64Cmp:
    case kSSEFloat64Add:
    case kSSEFloat64Sub:
    case kSSEFloat64Max:
    case kSSEFloat64Min:
-    case kSSEFloat64Abs:
-    case kSSEFloat64Neg:
      return 3;
    case kSSEFloat32Mul:
    case kSSEFloat32ToFloat64:
--- a/src/compiler/backend/x64/instruction-selector-x64.cc
+++ b/src/compiler/backend/x64/instruction-selector-x64.cc
@ -1679,15 +1679,12 @@ void VisitFloatBinop(InstructionSelector* selector, Node* node,
 }

 void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
-                    ArchOpcode avx_opcode, ArchOpcode sse_opcode) {
+                    ArchOpcode opcode) {
  X64OperandGenerator g(selector);
-  InstructionOperand temps[] = {g.TempDoubleRegister()};
  if (selector->IsSupported(AVX)) {
-    selector->Emit(avx_opcode, g.DefineAsRegister(node), g.UseUnique(input),
-                   arraysize(temps), temps);
+    selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input));
  } else {
-    selector->Emit(sse_opcode, g.DefineSameAsFirst(node), g.UseRegister(input),
-                   arraysize(temps), temps);
+    selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(input));
  }
 }

@ -1827,7 +1824,7 @@ void InstructionSelector::VisitFloat32Div(Node* node) {
 }

 void InstructionSelector::VisitFloat32Abs(Node* node) {
-  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Abs, kSSEFloat32Abs);
+  VisitFloatUnop(this, node, node->InputAt(0), kX64Float32Abs);
 }

 void InstructionSelector::VisitFloat32Max(Node* node) {
@ -1871,7 +1868,7 @@ void InstructionSelector::VisitFloat64Min(Node* node) {
 }

 void InstructionSelector::VisitFloat64Abs(Node* node) {
-  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Abs, kSSEFloat64Abs);
+  VisitFloatUnop(this, node, node->InputAt(0), kX64Float64Abs);
 }

 void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
@ -1879,11 +1876,11 @@ void InstructionSelector::VisitFloat64RoundTiesAway(Node* node) {
 }

 void InstructionSelector::VisitFloat32Neg(Node* node) {
-  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat32Neg, kSSEFloat32Neg);
+  VisitFloatUnop(this, node, node->InputAt(0), kX64Float32Neg);
 }

 void InstructionSelector::VisitFloat64Neg(Node* node) {
-  VisitFloatUnop(this, node, node->InputAt(0), kAVXFloat64Neg, kSSEFloat64Neg);
+  VisitFloatUnop(this, node, node->InputAt(0), kX64Float64Neg);
 }

 void InstructionSelector::VisitFloat64Ieee754Binop(Node* node,
@ -3285,15 +3282,11 @@ void InstructionSelector::VisitS128AndNot(Node* node) {
 }

 void InstructionSelector::VisitF64x2Abs(Node* node) {
-  X64OperandGenerator g(this);
-  Emit(kX64F64x2Abs, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)));
+  VisitFloatUnop(this, node, node->InputAt(0), kX64F64x2Abs);
 }

 void InstructionSelector::VisitF64x2Neg(Node* node) {
-  X64OperandGenerator g(this);
-  Emit(kX64F64x2Neg, g.DefineSameAsFirst(node),
-       g.UseRegister(node->InputAt(0)));
+  VisitFloatUnop(this, node, node->InputAt(0), kX64F64x2Neg);
 }

 void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
--- a/src/wasm/baseline/x64/liftoff-assembler-x64.h
+++ b/src/wasm/baseline/x64/liftoff-assembler-x64.h
@ -3484,28 +3484,12 @@ void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,

 void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
                                      LiftoffRegister src) {
-  if (dst.fp() == src.fp()) {
-    Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-    Psrld(kScratchDoubleReg, static_cast<byte>(1));
-    Andps(dst.fp(), kScratchDoubleReg);
-  } else {
-    Pcmpeqd(dst.fp(), dst.fp());
-    Psrld(dst.fp(), static_cast<byte>(1));
-    Andps(dst.fp(), src.fp());
-  }
+  Absps(dst.fp(), src.fp());
 }

 void LiftoffAssembler::emit_f32x4_neg(LiftoffRegister dst,
                                      LiftoffRegister src) {
-  if (dst.fp() == src.fp()) {
-    Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-    Pslld(kScratchDoubleReg, byte{31});
-    Xorps(dst.fp(), kScratchDoubleReg);
-  } else {
-    Pcmpeqd(dst.fp(), dst.fp());
-    Pslld(dst.fp(), byte{31});
-    Xorps(dst.fp(), src.fp());
-  }
+  Negps(dst.fp(), src.fp());
 }

 void LiftoffAssembler::emit_f32x4_sqrt(LiftoffRegister dst,
@ -3640,28 +3624,12 @@ void LiftoffAssembler::emit_f32x4_pmax(LiftoffRegister dst, LiftoffRegister lhs,

 void LiftoffAssembler::emit_f64x2_abs(LiftoffRegister dst,
                                      LiftoffRegister src) {
-  if (dst.fp() == src.fp()) {
-    Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-    Psrlq(kScratchDoubleReg, byte{1});
-    Andpd(dst.fp(), kScratchDoubleReg);
-  } else {
-    Pcmpeqd(dst.fp(), dst.fp());
-    Psrlq(dst.fp(), byte{1});
-    Andpd(dst.fp(), src.fp());
-  }
+  Abspd(dst.fp(), src.fp());
 }

 void LiftoffAssembler::emit_f64x2_neg(LiftoffRegister dst,
                                      LiftoffRegister src) {
-  if (dst.fp() == src.fp()) {
-    Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
-    Psllq(kScratchDoubleReg, static_cast<byte>(63));
-    Xorpd(dst.fp(), kScratchDoubleReg);
-  } else {
-    Pcmpeqd(dst.fp(), dst.fp());
-    Psllq(dst.fp(), static_cast<byte>(63));
-    Xorpd(dst.fp(), src.fp());
-  }
+  Negpd(dst.fp(), src.fp());
 }

 void LiftoffAssembler::emit_f64x2_sqrt(LiftoffRegister dst,
--- a/test/cctest/test-macro-assembler-x64.cc
+++ b/test/cctest/test-macro-assembler-x64.cc
@ -893,7 +893,7 @@ void TestFloat32x4Abs(MacroAssembler* masm, Label* exit, float x, float y,
  __ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
  __ Movups(xmm0, Operand(rsp, 0));

-  __ Absps(xmm0);
+  __ Absps(xmm0, xmm0);
  __ Movups(Operand(rsp, 0), xmm0);

  __ incq(rax);
@ -930,7 +930,7 @@ void TestFloat32x4Neg(MacroAssembler* masm, Label* exit, float x, float y,
  __ Movss(Operand(rsp, 3 * kFloatSize), xmm4);
  __ Movups(xmm0, Operand(rsp, 0));

-  __ Negps(xmm0);
+  __ Negps(xmm0, xmm0);
  __ Movups(Operand(rsp, 0), xmm0);

  __ incq(rax);
@ -962,7 +962,7 @@ void TestFloat64x2Abs(MacroAssembler* masm, Label* exit, double x, double y) {
  __ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
  __ movupd(xmm0, Operand(rsp, 0));

-  __ Abspd(xmm0);
+  __ Abspd(xmm0, xmm0);
  __ movupd(Operand(rsp, 0), xmm0);

  __ incq(rax);
@ -986,7 +986,7 @@ void TestFloat64x2Neg(MacroAssembler* masm, Label* exit, double x, double y) {
  __ Movsd(Operand(rsp, 1 * kDoubleSize), xmm2);
  __ movupd(xmm0, Operand(rsp, 0));

-  __ Negpd(xmm0);
+  __ Negpd(xmm0, xmm0);
  __ movupd(Operand(rsp, 0), xmm0);

  __ incq(rax);
--- a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
+++ b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
@ -1579,7 +1579,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
    m.Return(n);
    Stream s = m.Build();
    ASSERT_EQ(1U, s.size());
-    EXPECT_EQ(kSSEFloat32Abs, s[0]->arch_opcode());
+    EXPECT_EQ(kX64Float32Abs, s[0]->arch_opcode());
    ASSERT_EQ(1U, s[0]->InputCount());
    EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
    ASSERT_EQ(1U, s[0]->OutputCount());
@ -1594,7 +1594,7 @@ TEST_F(InstructionSelectorTest, Float32Abs) {
    m.Return(n);
    Stream s = m.Build(AVX);
    ASSERT_EQ(1U, s.size());
-    EXPECT_EQ(kAVXFloat32Abs, s[0]->arch_opcode());
+    EXPECT_EQ(kX64Float32Abs, s[0]->arch_opcode());
    ASSERT_EQ(1U, s[0]->InputCount());
    EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
    ASSERT_EQ(1U, s[0]->OutputCount());
@ -1612,7 +1612,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
    m.Return(n);
    Stream s = m.Build();
    ASSERT_EQ(1U, s.size());
-    EXPECT_EQ(kSSEFloat64Abs, s[0]->arch_opcode());
+    EXPECT_EQ(kX64Float64Abs, s[0]->arch_opcode());
    ASSERT_EQ(1U, s[0]->InputCount());
    EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
    ASSERT_EQ(1U, s[0]->OutputCount());
@ -1627,7 +1627,7 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
    m.Return(n);
    Stream s = m.Build(AVX);
    ASSERT_EQ(1U, s.size());
-    EXPECT_EQ(kAVXFloat64Abs, s[0]->arch_opcode());
+    EXPECT_EQ(kX64Float64Abs, s[0]->arch_opcode());
    ASSERT_EQ(1U, s[0]->InputCount());
    EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
    ASSERT_EQ(1U, s[0]->OutputCount());