[riscv64] Add tests for RVV VI VF instructions

Implement `LiftoffAssembler::emit_i16x8_sconvert_i32x4` for riscv. Add tests for rvv integer and floating-point instructions. Add simulator support for rvv instructions, e.g. `vfmadd`, `vnclip`. Fixed order of operands for `vfdiv.vv`. Bug: v8:11976 Change-Id: I0691ac66771468533c5994be1fc8a86b09d3c738 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3225319 Reviewed-by: Yahan Lu <yahan@iscas.ac.cn> Reviewed-by: Michael Lippautz <mlippautz@chromium.org> Commit-Queue: Yahan Lu <yahan@iscas.ac.cn> Cr-Commit-Position: refs/heads/main@{#77595}
2021-10-28 14:33:02 +01:00 · 2021-10-28 14:33:02 +01:00 · 4240985a1e
commit 4240985a1e
parent bddb7b02d3
11 changed files with 1020 additions and 45 deletions
--- a/1
+++ b/1
@ -251,6 +251,7 @@ Yi Wang <wangyi8848@gmail.com>
 Yong Wang <ccyongwang@tencent.com>
 Youfeng Hao <ajihyf@gmail.com>
 Yu Yin <xwafish@gmail.com>
+Yujie Wang <hex6770@gmail.com>
 Yuri Iozzelli <yuri@leaningtech.com>
 Yusif Khudhur <yusif.khudhur@gmail.com>
 Zac Hansen <xaxxon@gmail.com>
--- a/src/codegen/riscv64/assembler-riscv64.cc
+++ b/src/codegen/riscv64/assembler-riscv64.cc
@ -1172,6 +1172,17 @@ void Assembler::GenInstrV(uint8_t funct6, Opcode opcode, Register rd,
  emit(instr);
 }

+// OPFVV
+void Assembler::GenInstrV(uint8_t funct6, Opcode opcode, FPURegister fd,
+                          VRegister vs1, VRegister vs2, MaskType mask) {
+  DCHECK(opcode == OP_FVV);
+  Instr instr = (funct6 << kRvvFunct6Shift) | opcode | (mask << kRvvVmShift) |
+                ((fd.code() & 0x1F) << kRvvVdShift) |
+                ((vs1.code() & 0x1F) << kRvvVs1Shift) |
+                ((vs2.code() & 0x1F) << kRvvVs2Shift);
+  emit(instr);
+}
+
 // OPIVX OPMVX
 void Assembler::GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd,
                          Register rs1, VRegister vs2, MaskType mask) {
@ -2561,6 +2572,26 @@ void Assembler::vrgather_vx(VRegister vd, VRegister vs2, Register rs1,
    GenInstrV(funct6, OP_FVF, vd, fs1, vs2, mask);                        \
  }

+#define DEFINE_OPFVV_FMA(name, funct6)                                  \
+  void Assembler::name##_vv(VRegister vd, VRegister vs1, VRegister vs2, \
+                            MaskType mask) {                            \
+    GenInstrV(funct6, OP_FVV, vd, vs1, vs2, mask);                      \
+  }
+
+#define DEFINE_OPFVF_FMA(name, funct6)                                    \
+  void Assembler::name##_vf(VRegister vd, FPURegister fs1, VRegister vs2, \
+                            MaskType mask) {                              \
+    GenInstrV(funct6, OP_FVF, vd, fs1, vs2, mask);                        \
+  }
+
+void Assembler::vfmv_vf(VRegister vd, FPURegister fs1, MaskType mask) {
+  GenInstrV(VMV_FUNCT6, OP_FVF, vd, fs1, v0, mask);
+}
+
+void Assembler::vfmv_fs(FPURegister fd, VRegister vs2, MaskType mask) {
+  GenInstrV(VWFUNARY0_FUNCT6, OP_FVV, fd, v0, vs2, mask);
+}
+
 DEFINE_OPIVV(vadd, VADD_FUNCT6)
 DEFINE_OPIVX(vadd, VADD_FUNCT6)
 DEFINE_OPIVI(vadd, VADD_FUNCT6)
@ -2663,11 +2694,40 @@ DEFINE_OPFVV(vfsngjn, VFSGNJN_FUNCT6)
 DEFINE_OPFVF(vfsngjn, VFSGNJN_FUNCT6)
 DEFINE_OPFVV(vfsngjx, VFSGNJX_FUNCT6)
 DEFINE_OPFVF(vfsngjx, VFSGNJX_FUNCT6)
+
+// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+DEFINE_OPFVV_FMA(vfmadd, VFMADD_FUNCT6)
+DEFINE_OPFVF_FMA(vfmadd, VFMADD_FUNCT6)
+DEFINE_OPFVV_FMA(vfmsub, VFMSUB_FUNCT6)
+DEFINE_OPFVF_FMA(vfmsub, VFMSUB_FUNCT6)
+DEFINE_OPFVV_FMA(vfmacc, VFMACC_FUNCT6)
+DEFINE_OPFVF_FMA(vfmacc, VFMACC_FUNCT6)
+DEFINE_OPFVV_FMA(vfmsac, VFMSAC_FUNCT6)
+DEFINE_OPFVF_FMA(vfmsac, VFMSAC_FUNCT6)
+DEFINE_OPFVV_FMA(vfnmadd, VFNMADD_FUNCT6)
+DEFINE_OPFVF_FMA(vfnmadd, VFNMADD_FUNCT6)
+DEFINE_OPFVV_FMA(vfnmsub, VFNMSUB_FUNCT6)
+DEFINE_OPFVF_FMA(vfnmsub, VFNMSUB_FUNCT6)
+DEFINE_OPFVV_FMA(vfnmacc, VFNMACC_FUNCT6)
+DEFINE_OPFVF_FMA(vfnmacc, VFNMACC_FUNCT6)
+DEFINE_OPFVV_FMA(vfnmsac, VFNMSAC_FUNCT6)
+DEFINE_OPFVF_FMA(vfnmsac, VFNMSAC_FUNCT6)
+
+// Vector Narrowing Fixed-Point Clip Instructions
+DEFINE_OPIVV(vnclip, VNCLIP_FUNCT6)
+DEFINE_OPIVX(vnclip, VNCLIP_FUNCT6)
+DEFINE_OPIVI(vnclip, VNCLIP_FUNCT6)
+DEFINE_OPIVV(vnclipu, VNCLIPU_FUNCT6)
+DEFINE_OPIVX(vnclipu, VNCLIPU_FUNCT6)
+DEFINE_OPIVI(vnclipu, VNCLIPU_FUNCT6)
+
 #undef DEFINE_OPIVI
 #undef DEFINE_OPIVV
 #undef DEFINE_OPIVX
 #undef DEFINE_OPFVV
 #undef DEFINE_OPFVF
+#undef DEFINE_OPFVV_FMA
+#undef DEFINE_OPFVF_FMA

 void Assembler::vsetvli(Register rd, Register rs1, VSew vsew, Vlmul vlmul,
                        TailAgnosticType tail, MaskAgnosticType mask) {
--- a/src/codegen/riscv64/assembler-riscv64.h
+++ b/src/codegen/riscv64/assembler-riscv64.h
@ -746,6 +746,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  void vmadc_vx(VRegister vd, Register rs1, VRegister vs2);
  void vmadc_vi(VRegister vd, uint8_t imm5, VRegister vs2);

+  void vfmv_vf(VRegister vd, FPURegister fs1, MaskType mask = NoMask);
+  void vfmv_fs(FPURegister fd, VRegister vs2, MaskType mask = NoMask);
+
 #define DEFINE_OPIVV(name, funct6)                           \
  void name##_vv(VRegister vd, VRegister vs2, VRegister vs1, \
                 MaskType mask = NoMask);
@ -774,6 +777,14 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  void name##_vf(VRegister vd, VRegister vs2, FPURegister fs1, \
                 MaskType mask = NoMask);

+#define DEFINE_OPFVV_FMA(name, funct6)                       \
+  void name##_vv(VRegister vd, VRegister vs1, VRegister vs2, \
+                 MaskType mask = NoMask);
+
+#define DEFINE_OPFVF_FMA(name, funct6)                         \
+  void name##_vf(VRegister vd, FPURegister fs1, VRegister vs2, \
+                 MaskType mask = NoMask);
+
  DEFINE_OPIVV(vadd, VADD_FUNCT6)
  DEFINE_OPIVX(vadd, VADD_FUNCT6)
  DEFINE_OPIVI(vadd, VADD_FUNCT6)
@ -881,6 +892,32 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  DEFINE_OPFVV(vfsngjx, VFSGNJX_FUNCT6)
  DEFINE_OPFVF(vfsngjx, VFSGNJX_FUNCT6)

+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  DEFINE_OPFVV_FMA(vfmadd, VFMADD_FUNCT6)
+  DEFINE_OPFVF_FMA(vfmadd, VFMADD_FUNCT6)
+  DEFINE_OPFVV_FMA(vfmsub, VFMSUB_FUNCT6)
+  DEFINE_OPFVF_FMA(vfmsub, VFMSUB_FUNCT6)
+  DEFINE_OPFVV_FMA(vfmacc, VFMACC_FUNCT6)
+  DEFINE_OPFVF_FMA(vfmacc, VFMACC_FUNCT6)
+  DEFINE_OPFVV_FMA(vfmsac, VFMSAC_FUNCT6)
+  DEFINE_OPFVF_FMA(vfmsac, VFMSAC_FUNCT6)
+  DEFINE_OPFVV_FMA(vfnmadd, VFNMADD_FUNCT6)
+  DEFINE_OPFVF_FMA(vfnmadd, VFNMADD_FUNCT6)
+  DEFINE_OPFVV_FMA(vfnmsub, VFNMSUB_FUNCT6)
+  DEFINE_OPFVF_FMA(vfnmsub, VFNMSUB_FUNCT6)
+  DEFINE_OPFVV_FMA(vfnmacc, VFNMACC_FUNCT6)
+  DEFINE_OPFVF_FMA(vfnmacc, VFNMACC_FUNCT6)
+  DEFINE_OPFVV_FMA(vfnmsac, VFNMSAC_FUNCT6)
+  DEFINE_OPFVF_FMA(vfnmsac, VFNMSAC_FUNCT6)
+
+  // Vector Narrowing Fixed-Point Clip Instructions
+  DEFINE_OPIVV(vnclip, VNCLIP_FUNCT6)
+  DEFINE_OPIVX(vnclip, VNCLIP_FUNCT6)
+  DEFINE_OPIVI(vnclip, VNCLIP_FUNCT6)
+  DEFINE_OPIVV(vnclipu, VNCLIPU_FUNCT6)
+  DEFINE_OPIVX(vnclipu, VNCLIPU_FUNCT6)
+  DEFINE_OPIVI(vnclipu, VNCLIPU_FUNCT6)
+
 #undef DEFINE_OPIVI
 #undef DEFINE_OPIVV
 #undef DEFINE_OPIVX
@ -888,6 +925,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
 #undef DEFINE_OPMVX
 #undef DEFINE_OPFVV
 #undef DEFINE_OPFVF
+#undef DEFINE_OPFVV_FMA
+#undef DEFINE_OPFVF_FMA

 #define DEFINE_VFUNARY(name, funct6, vs1)                          \
  void name(VRegister vd, VRegister vs2, MaskType mask = NoMask) { \
@ -1497,6 +1536,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
  // OPMVV OPFVV
  void GenInstrV(uint8_t funct6, Opcode opcode, Register rd, VRegister vs1,
                 VRegister vs2, MaskType mask = NoMask);
+  // OPFVV
+  void GenInstrV(uint8_t funct6, Opcode opcode, FPURegister fd, VRegister vs1,
+                 VRegister vs2, MaskType mask = NoMask);

  // OPIVX OPMVX
  void GenInstrV(uint8_t funct6, Opcode opcode, VRegister vd, Register rs1,
--- a/src/codegen/riscv64/constants-riscv64.h
+++ b/src/codegen/riscv64/constants-riscv64.h
@ -774,6 +774,7 @@ enum Opcode : uint32_t {
  RO_V_VMV_VI = OP_IVI | (VMV_FUNCT6 << kRvvFunct6Shift),
  RO_V_VMV_VV = OP_IVV | (VMV_FUNCT6 << kRvvFunct6Shift),
  RO_V_VMV_VX = OP_IVX | (VMV_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFMV_VF = OP_FVF | (VMV_FUNCT6 << kRvvFunct6Shift),

  RO_V_VMERGE_VI = RO_V_VMV_VI,
  RO_V_VMERGE_VV = RO_V_VMV_VV,
@ -849,6 +850,9 @@ enum Opcode : uint32_t {
  RO_V_VWXUNARY0 = OP_MVV | (VWXUNARY0_FUNCT6 << kRvvFunct6Shift),
  RO_V_VRXUNARY0 = OP_MVX | (VRXUNARY0_FUNCT6 << kRvvFunct6Shift),

+  VWFUNARY0_FUNCT6 = 0b010000,
+  RO_V_VFMV_FS = OP_FVV | (VWFUNARY0_FUNCT6 << kRvvFunct6Shift),
+
  VREDMAXU_FUNCT6 = 0b000110,
  RO_V_VREDMAXU = OP_MVV | (VREDMAXU_FUNCT6 << kRvvFunct6Shift),
  VREDMAX_FUNCT6 = 0b000111,
@ -929,6 +933,48 @@ enum Opcode : uint32_t {
  VFSGNJX_FUNCT6 = 0b001010,
  RO_V_VFSGNJX_VV = OP_FVV | (VFSGNJX_FUNCT6 << kRvvFunct6Shift),
  RO_V_VFSGNJX_VF = OP_FVF | (VFSGNJX_FUNCT6 << kRvvFunct6Shift),
+
+  VFMADD_FUNCT6 = 0b101000,
+  RO_V_VFMADD_VV = OP_FVV | (VFMADD_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFMADD_VF = OP_FVF | (VFMADD_FUNCT6 << kRvvFunct6Shift),
+
+  VFNMADD_FUNCT6 = 0b101001,
+  RO_V_VFNMADD_VV = OP_FVV | (VFNMADD_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFNMADD_VF = OP_FVF | (VFNMADD_FUNCT6 << kRvvFunct6Shift),
+
+  VFMSUB_FUNCT6 = 0b101010,
+  RO_V_VFMSUB_VV = OP_FVV | (VFMSUB_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFMSUB_VF = OP_FVF | (VFMSUB_FUNCT6 << kRvvFunct6Shift),
+
+  VFNMSUB_FUNCT6 = 0b101011,
+  RO_V_VFNMSUB_VV = OP_FVV | (VFNMSUB_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFNMSUB_VF = OP_FVF | (VFNMSUB_FUNCT6 << kRvvFunct6Shift),
+
+  VFMACC_FUNCT6 = 0b101100,
+  RO_V_VFMACC_VV = OP_FVV | (VFMACC_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFMACC_VF = OP_FVF | (VFMACC_FUNCT6 << kRvvFunct6Shift),
+
+  VFNMACC_FUNCT6 = 0b101101,
+  RO_V_VFNMACC_VV = OP_FVV | (VFNMACC_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFNMACC_VF = OP_FVF | (VFNMACC_FUNCT6 << kRvvFunct6Shift),
+
+  VFMSAC_FUNCT6 = 0b101110,
+  RO_V_VFMSAC_VV = OP_FVV | (VFMSAC_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFMSAC_VF = OP_FVF | (VFMSAC_FUNCT6 << kRvvFunct6Shift),
+
+  VFNMSAC_FUNCT6 = 0b101111,
+  RO_V_VFNMSAC_VV = OP_FVV | (VFNMSAC_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VFNMSAC_VF = OP_FVF | (VFNMSAC_FUNCT6 << kRvvFunct6Shift),
+
+  VNCLIP_FUNCT6 = 0b101111,
+  RO_V_VNCLIP_WV = OP_IVV | (VNCLIP_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VNCLIP_WX = OP_IVX | (VNCLIP_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VNCLIP_WI = OP_IVI | (VNCLIP_FUNCT6 << kRvvFunct6Shift),
+
+  VNCLIPU_FUNCT6 = 0b101110,
+  RO_V_VNCLIPU_WV = OP_IVV | (VNCLIPU_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VNCLIPU_WX = OP_IVX | (VNCLIPU_FUNCT6 << kRvvFunct6Shift),
+  RO_V_VNCLIPU_WI = OP_IVI | (VNCLIPU_FUNCT6 << kRvvFunct6Shift),
 };

 // ----- Emulated conditions.
--- a/src/diagnostics/riscv64/disasm-riscv64.cc
+++ b/src/diagnostics/riscv64/disasm-riscv64.cc
@ -1986,6 +1986,12 @@ void Decoder::DecodeRvvIVV(Instruction* instr) {
        UNREACHABLE();
      }
      break;
+    case RO_V_VNCLIP_WV:
+      Format(instr, "vnclip.wv 'vd, 'vs2, 'vs1");
+      break;
+    case RO_V_VNCLIPU_WV:
+      Format(instr, "vnclipu.wv 'vd, 'vs2, 'vs1");
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -2067,6 +2073,12 @@ void Decoder::DecodeRvvIVI(Instruction* instr) {
        UNREACHABLE();
      }
      break;
+    case RO_V_VNCLIP_WI:
+      Format(instr, "vnclip.wi 'vd, 'vs2, 'uimm5");
+      break;
+    case RO_V_VNCLIPU_WI:
+      Format(instr, "vnclipu.wi 'vd, 'vs2, 'uimm5");
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -2172,6 +2184,12 @@ void Decoder::DecodeRvvIVX(Instruction* instr) {
    case RO_V_VSRL_VX:
      Format(instr, "vsrl.vx  'vd, 'vs2, 'rs1");
      break;
+    case RO_V_VNCLIP_WX:
+      Format(instr, "vnclip.wx 'vd, 'vs2, 'rs1");
+      break;
+    case RO_V_VNCLIPU_WX:
+      Format(instr, "vnclipu.wx 'vd, 'vs2, 'rs1");
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -2303,6 +2321,37 @@ void Decoder::DecodeRvvFVV(Instruction* instr) {
    case RO_V_VFMUL_VV:
      Format(instr, "vfmul.vv  'vd, 'vs2, 'vs1'vm");
      break;
+    case RO_V_VFMADD_VV:
+      Format(instr, "vfmadd.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMADD_VV:
+      Format(instr, "vfnmadd.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFMSUB_VV:
+      Format(instr, "vfmsub.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMSUB_VV:
+      Format(instr, "vfnmsub.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFMACC_VV:
+      Format(instr, "vfmacc.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMACC_VV:
+      Format(instr, "vfnmacc.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFMSAC_VV:
+      Format(instr, "vfmsac.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMSAC_VV:
+      Format(instr, "vfnmsac.vv 'vd, 'vs1, 'vs2'vm");
+      break;
+    case RO_V_VFMV_FS:
+      if (instr->Vs1Value() == 0x0) {
+        Format(instr, "vfmv.f.s  'fd, 'vs2");
+      } else {
+        UNSUPPORTED_RISCV();
+      }
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -2321,6 +2370,33 @@ void Decoder::DecodeRvvFVF(Instruction* instr) {
    case RO_V_VFSGNJX_VF:
      Format(instr, "vfsgnjn.vf   'vd, 'vs2, 'fs1'vm");
      break;
+    case RO_V_VFMV_VF:
+      Format(instr, "vfmv.v.f  'vd, 'fs1");
+      break;
+    case RO_V_VFMADD_VF:
+      Format(instr, "vfmadd.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMADD_VF:
+      Format(instr, "vfnmadd.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFMSUB_VF:
+      Format(instr, "vfmsub.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMSUB_VF:
+      Format(instr, "vfnmsub.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFMACC_VF:
+      Format(instr, "vfmacc.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMACC_VF:
+      Format(instr, "vfnmacc.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFMSAC_VF:
+      Format(instr, "vfmsac.vf 'vd, 'fs1, 'vs2'vm");
+      break;
+    case RO_V_VFNMSAC_VF:
+      Format(instr, "vfnmsac.vf 'vd, 'fs1, 'vs2'vm");
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -2345,7 +2421,7 @@ void Decoder::DecodeVType(Instruction* instr) {
      DecodeRvvIVX(instr);
      return;
    case OP_FVF:
-      UNSUPPORTED_RISCV();
+      DecodeRvvFVF(instr);
      return;
    case OP_MVX:
      DecodeRvvMVX(instr);
--- a/src/execution/riscv64/simulator-riscv64.cc
+++ b/src/execution/riscv64/simulator-riscv64.cc
@ -59,6 +59,7 @@
 #include "src/heap/combined-heap.h"
 #include "src/runtime/runtime-utils.h"
 #include "src/utils/ostreams.h"
+#include "src/utils/utils.h"

 // The following code about RVV was based from:
 //   https://github.com/riscv/riscv-isa-sim
@ -470,31 +471,31 @@
  }                         \
  set_rvv_vstart(0);

-#define RVV_VI_VFP_VF_LOOP(BODY16, BODY32, BODY64)                   \
-  RVV_VI_VFP_LOOP_BASE                                               \
-  switch (rvv_vsew()) {                                              \
-    case E16: {                                                      \
-      UNIMPLEMENTED();                                               \
-    }                                                                \
-    case E32: {                                                      \
-      float& vd = Rvvelt<float>(rvv_vd_reg(), i, true);              \
-      float fs1 = static_cast<float>(get_fpu_register(rs1_reg()));   \
-      float vs2 = Rvvelt<float>(rvv_vs2_reg(), i);                   \
-      BODY32;                                                        \
-      break;                                                         \
-    }                                                                \
-    case E64: {                                                      \
-      double& vd = Rvvelt<double>(rvv_vd_reg(), i, true);            \
-      double fs1 = static_cast<double>(get_fpu_register(rs1_reg())); \
-      double vs2 = Rvvelt<double>(rvv_vs2_reg(), i);                 \
-      BODY64;                                                        \
-      break;                                                         \
-    }                                                                \
-    default:                                                         \
-      UNREACHABLE();                                                 \
-      break;                                                         \
-  }                                                                  \
-  RVV_VI_VFP_LOOP_END                                                \
+#define RVV_VI_VFP_VF_LOOP(BODY16, BODY32, BODY64)        \
+  RVV_VI_VFP_LOOP_BASE                                    \
+  switch (rvv_vsew()) {                                   \
+    case E16: {                                           \
+      UNIMPLEMENTED();                                    \
+    }                                                     \
+    case E32: {                                           \
+      float& vd = Rvvelt<float>(rvv_vd_reg(), i, true);   \
+      float fs1 = get_fpu_register_float(rs1_reg());      \
+      float vs2 = Rvvelt<float>(rvv_vs2_reg(), i);        \
+      BODY32;                                             \
+      break;                                              \
+    }                                                     \
+    case E64: {                                           \
+      double& vd = Rvvelt<double>(rvv_vd_reg(), i, true); \
+      double fs1 = get_fpu_register_double(rs1_reg());    \
+      double vs2 = Rvvelt<double>(rvv_vs2_reg(), i);      \
+      BODY64;                                             \
+      break;                                              \
+    }                                                     \
+    default:                                              \
+      UNREACHABLE();                                      \
+      break;                                              \
+  }                                                       \
+  RVV_VI_VFP_LOOP_END                                     \
  rvv_trace_vd();

 #define RVV_VI_VFP_VV_LOOP(BODY16, BODY32, BODY64)        \
@ -525,6 +526,64 @@
  RVV_VI_VFP_LOOP_END                                     \
  rvv_trace_vd();

+#define RVV_VI_VFP_FMA(type, _f1, _f2, _a)                                \
+  auto fn = [](type f1, type f2, type a) { return std::fma(f1, f2, a); }; \
+  vd = CanonicalizeFPUOpFMA<type>(fn, _f1, _f2, _a);
+
+#define RVV_VI_VFP_FMA_VV_LOOP(BODY32, BODY64)            \
+  RVV_VI_VFP_LOOP_BASE                                    \
+  switch (rvv_vsew()) {                                   \
+    case E16: {                                           \
+      UNIMPLEMENTED();                                    \
+    }                                                     \
+    case E32: {                                           \
+      float& vd = Rvvelt<float>(rvv_vd_reg(), i, true);   \
+      float vs1 = Rvvelt<float>(rvv_vs1_reg(), i);        \
+      float vs2 = Rvvelt<float>(rvv_vs2_reg(), i);        \
+      BODY32;                                             \
+      break;                                              \
+    }                                                     \
+    case E64: {                                           \
+      double& vd = Rvvelt<double>(rvv_vd_reg(), i, true); \
+      double vs1 = Rvvelt<double>(rvv_vs1_reg(), i);      \
+      double vs2 = Rvvelt<double>(rvv_vs2_reg(), i);      \
+      BODY64;                                             \
+      break;                                              \
+    }                                                     \
+    default:                                              \
+      require(0);                                         \
+      break;                                              \
+  }                                                       \
+  RVV_VI_VFP_LOOP_END                                     \
+  rvv_trace_vd();
+
+#define RVV_VI_VFP_FMA_VF_LOOP(BODY32, BODY64)            \
+  RVV_VI_VFP_LOOP_BASE                                    \
+  switch (rvv_vsew()) {                                   \
+    case E16: {                                           \
+      UNIMPLEMENTED();                                    \
+    }                                                     \
+    case E32: {                                           \
+      float& vd = Rvvelt<float>(rvv_vd_reg(), i, true);   \
+      float fs1 = get_fpu_register_float(rs1_reg());      \
+      float vs2 = Rvvelt<float>(rvv_vs2_reg(), i);        \
+      BODY32;                                             \
+      break;                                              \
+    }                                                     \
+    case E64: {                                           \
+      double& vd = Rvvelt<double>(rvv_vd_reg(), i, true); \
+      float fs1 = get_fpu_register_float(rs1_reg());      \
+      double vs2 = Rvvelt<double>(rvv_vs2_reg(), i);      \
+      BODY64;                                             \
+      break;                                              \
+    }                                                     \
+    default:                                              \
+      require(0);                                         \
+      break;                                              \
+  }                                                       \
+  RVV_VI_VFP_LOOP_END                                     \
+  rvv_trace_vd();
+
 #define RVV_VI_VFP_LOOP_CMP_BASE                                \
  for (reg_t i = rvv_vstart(); i < rvv_vl(); ++i) {             \
    RVV_VI_LOOP_MASK_SKIP();                                    \
@ -569,7 +628,7 @@
    default:                                                \
      UNREACHABLE();                                        \
      break;                                                \
-  }                                                        \
+  }                                                         \
  RVV_VI_VFP_LOOP_CMP_END

 // reduction loop - signed
@ -742,6 +801,103 @@
  }                                                                           \
  rvv_trace_vd();

+// calculate the value of r used in rounding
+static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) {
+  uint8_t d = v8::internal::unsigned_bitextract_64(shift, shift, v);
+  uint8_t d1;
+  uint64_t D1, D2;
+
+  if (shift == 0 || shift > 64) {
+    return 0;
+  }
+
+  d1 = v8::internal::unsigned_bitextract_64(shift - 1, shift - 1, v);
+  D1 = v8::internal::unsigned_bitextract_64(shift - 1, 0, v);
+  if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
+    return d1;
+  } else if (vxrm == 1) { /* round-to-nearest-even */
+    if (shift > 1) {
+      D2 = v8::internal::unsigned_bitextract_64(shift - 2, 0, v);
+      return d1 & ((D2 != 0) | d);
+    } else {
+      return d1 & d;
+    }
+  } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
+    return !d & (D1 != 0);
+  }
+  return 0; /* round-down (truncate) */
+}
+
+template <typename Src, typename Dst>
+inline Dst signed_saturation(Src v, uint n) {
+  Dst smax = (Dst)(INT64_MAX >> (64 - n));
+  Dst smin = (Dst)(INT64_MIN >> (64 - n));
+  return (v > smax) ? smax : ((v < smin) ? smin : (Dst)v);
+}
+
+template <typename Src, typename Dst>
+inline Dst unsigned_saturation(Src v, uint n) {
+  Dst umax = (Dst)(UINT64_MAX >> (64 - n));
+  return (v > umax) ? umax : ((v < 0) ? 0 : (Dst)v);
+}
+
+#define RVV_VN_CLIPU_VI_LOOP()                                   \
+  RVV_VI_GENERAL_LOOP_BASE                                       \
+  RVV_VI_LOOP_MASK_SKIP()                                        \
+  if (rvv_vsew() == E8) {                                        \
+    UNREACHABLE();                                               \
+    VN_UPARAMS(16);                                              \
+    vd = unsigned_saturation<uint16_t, uint8_t>(                 \
+        (static_cast<uint16_t>(vs2) >> uimm5) +                  \
+            get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        8);                                                      \
+  } else if (rvv_vsew() == E16) {                                \
+    VN_UPARAMS(32);                                              \
+    vd = unsigned_saturation<uint32_t, uint16_t>(                \
+        (static_cast<uint32_t>(vs2) >> uimm5) +                  \
+            get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        16);                                                     \
+  } else if (rvv_vsew() == E32) {                                \
+    VN_UPARAMS(64);                                              \
+    vd = unsigned_saturation<uint64_t, uint32_t>(                \
+        (static_cast<uint64_t>(vs2) >> uimm5) +                  \
+            get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        32);                                                     \
+  } else if (rvv_vsew() == E64) {                                \
+    UNREACHABLE();                                               \
+  } else {                                                       \
+    UNREACHABLE();                                               \
+  }                                                              \
+  RVV_VI_LOOP_END                                                \
+  rvv_trace_vd();
+
+#define RVV_VN_CLIP_VI_LOOP()                                                 \
+  RVV_VI_GENERAL_LOOP_BASE                                                    \
+  RVV_VI_LOOP_MASK_SKIP()                                                     \
+  if (rvv_vsew() == E8) {                                                     \
+    UNREACHABLE();                                                            \
+    VN_PARAMS(16);                                                            \
+    vd = signed_saturation<int16_t, int8_t>(                                  \
+        (vs2 >> uimm5) + get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        8);                                                                   \
+  } else if (rvv_vsew() == E16) {                                             \
+    VN_PARAMS(32);                                                            \
+    vd = signed_saturation<int32_t, int16_t>(                                 \
+        (vs2 >> uimm5) + get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        16);                                                                  \
+  } else if (rvv_vsew() == E32) {                                             \
+    VN_PARAMS(64);                                                            \
+    vd = signed_saturation<int64_t, int32_t>(                                 \
+        (vs2 >> uimm5) + get_round(static_cast<int>(rvv_vxrm()), vs2, uimm5), \
+        32);                                                                  \
+  } else if (rvv_vsew() == E64) {                                             \
+    UNREACHABLE();                                                            \
+  } else {                                                                    \
+    UNREACHABLE();                                                            \
+  }                                                                           \
+  RVV_VI_LOOP_END                                                             \
+  rvv_trace_vd();
+
 namespace v8 {
 namespace internal {

@ -3560,7 +3716,11 @@ bool Simulator::DecodeRvvVL() {
          break;
        }
        case 16: {
-          UNIMPLEMENTED_RISCV();
+          RVV_VI_LD(0, (i * nf + fn), int16, false);
+          break;
+        }
+        case 32: {
+          RVV_VI_LD(0, (i * nf + fn), int32, false);
          break;
        }
        default:
@ -3617,7 +3777,11 @@ bool Simulator::DecodeRvvVS() {
          break;
        }
        case 16: {
-          UNIMPLEMENTED_RISCV();
+          RVV_VI_ST(0, (i * nf + fn), uint16, false);
+          break;
+        }
+        case 32: {
+          RVV_VI_ST(0, (i * nf + fn), uint32, false);
          break;
        }
        default:
@ -4561,7 +4725,7 @@ void Simulator::DecodeRvvIVI() {
      RVV_VI_LOOP_END
      break;
    }
-    case RO_V_VSADDU_VI:{
+    case RO_V_VSADDU_VI: {
      RVV_VI_VI_ULOOP({
        vd = vs2 + uimm5;
        vd |= -(vd < vs2);
@ -4664,6 +4828,12 @@ void Simulator::DecodeRvvIVI() {
        UNREACHABLE();
      }
      break;
+    case RO_V_VNCLIP_WI:
+      RVV_VN_CLIP_VI_LOOP()
+      break;
+    case RO_V_VNCLIPU_WI:
+      RVV_VN_CLIPU_VI_LOOP()
+      break;
    default:
      UNIMPLEMENTED_RISCV();
      break;
@ -4990,13 +5160,13 @@ void Simulator::DecodeRvvFVV() {
              if (is_invalid_fdiv(vs1, vs2)) {
                this->set_fflags(kInvalidOperation);
                return std::numeric_limits<float>::quiet_NaN();
-              } else if (vs2 == 0.0f) {
+              } else if (vs1 == 0.0f) {
                this->set_fflags(kDivideByZero);
                return (std::signbit(vs1) == std::signbit(vs2)
                            ? std::numeric_limits<float>::infinity()
                            : -std::numeric_limits<float>::infinity());
              } else {
-                return vs1 / vs2;
+                return vs2 / vs1;
              }
            };
            auto alu_out = fn(vs1, vs2);
@ -5316,6 +5486,59 @@ void Simulator::DecodeRvvFVV() {
            vd = alu_out;
          })
      break;
+    case RO_V_VFMADD_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, vd, vs1, vs2)},
+                             {RVV_VI_VFP_FMA(double, vd, vs1, vs2)})
+      break;
+    case RO_V_VFNMADD_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, -vd, vs1, -vs2)},
+                             {RVV_VI_VFP_FMA(double, -vd, vs1, -vs2)})
+      break;
+    case RO_V_VFMSUB_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, vd, vs1, -vs2)},
+                             {RVV_VI_VFP_FMA(double, vd, vs1, -vs2)})
+      break;
+    case RO_V_VFNMSUB_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, -vd, vs1, +vs2)},
+                             {RVV_VI_VFP_FMA(double, -vd, vs1, +vs2)})
+      break;
+    case RO_V_VFMACC_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, vs2, vs1, vd)},
+                             {RVV_VI_VFP_FMA(double, vs2, vs1, vd)})
+      break;
+    case RO_V_VFNMACC_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, -vs2, vs1, -vd)},
+                             {RVV_VI_VFP_FMA(double, -vs2, vs1, -vd)})
+      break;
+    case RO_V_VFMSAC_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, vs2, vs1, -vd)},
+                             {RVV_VI_VFP_FMA(double, vs2, vs1, -vd)})
+      break;
+    case RO_V_VFNMSAC_VV:
+      RVV_VI_VFP_FMA_VV_LOOP({RVV_VI_VFP_FMA(float, -vs2, vs1, +vd)},
+                             {RVV_VI_VFP_FMA(double, -vs2, vs1, +vd)})
+      break;
+    case RO_V_VFMV_FS:
+      switch (rvv_vsew()) {
+        case E16: {
+          UNIMPLEMENTED();
+        }
+        case E32: {
+          float fs2 = Rvvelt<float>(rvv_vs2_reg(), 0);
+          set_fpu_register_float(rd_reg(), fs2);
+          break;
+        }
+        case E64: {
+          double fs2 = Rvvelt<double>(rvv_vs2_reg(), 0);
+          set_fpu_register_double(rd_reg(), fs2);
+          break;
+        }
+        default:
+          require(0);
+          break;
+      }
+      rvv_trace_vd();
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -5340,6 +5563,50 @@ void Simulator::DecodeRvvFVF() {
          {}, { vd = fsgnj32(vs2, fs1, false, true); },
          { vd = fsgnj64(vs2, fs1, false, true); })
      break;
+    case RO_V_VFMV_VF:
+      RVV_VI_VFP_VF_LOOP(
+          {},
+          {
+            vd = fs1;
+            USE(vs2);
+          },
+          {
+            vd = fs1;
+            USE(vs2);
+          })
+      break;
+    case RO_V_VFMADD_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, vd, fs1, vs2)},
+                             {RVV_VI_VFP_FMA(double, vd, fs1, vs2)})
+      break;
+    case RO_V_VFNMADD_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, -vd, fs1, -vs2)},
+                             {RVV_VI_VFP_FMA(double, -vd, fs1, -vs2)})
+      break;
+    case RO_V_VFMSUB_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, vd, fs1, -vs2)},
+                             {RVV_VI_VFP_FMA(double, vd, fs1, -vs2)})
+      break;
+    case RO_V_VFNMSUB_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, -vd, fs1, vs2)},
+                             {RVV_VI_VFP_FMA(double, -vd, fs1, vs2)})
+      break;
+    case RO_V_VFMACC_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, vs2, fs1, vd)},
+                             {RVV_VI_VFP_FMA(double, vs2, fs1, vd)})
+      break;
+    case RO_V_VFNMACC_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, -vs2, fs1, -vd)},
+                             {RVV_VI_VFP_FMA(double, -vs2, fs1, -vd)})
+      break;
+    case RO_V_VFMSAC_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, vs2, fs1, -vd)},
+                             {RVV_VI_VFP_FMA(double, vs2, fs1, -vd)})
+      break;
+    case RO_V_VFNMSAC_VF:
+      RVV_VI_VFP_FMA_VF_LOOP({RVV_VI_VFP_FMA(float, -vs2, fs1, vd)},
+                             {RVV_VI_VFP_FMA(double, -vs2, fs1, vd)})
+      break;
    default:
      UNSUPPORTED_RISCV();
      break;
@ -5363,7 +5630,7 @@ void Simulator::DecodeVType() {
      DecodeRvvIVX();
      return;
    case OP_FVF:
-      UNIMPLEMENTED_RISCV();
+      DecodeRvvFVF();
      return;
    case OP_MVX:
      DecodeRvvMVX();
@ -5398,9 +5665,9 @@ void Simulator::DecodeVType() {
        } else {
          avl = rvv_vl();
        }
-        avl = avl <= rvv_vlmax()        ? avl
-              : avl < (rvv_vlmax() * 2) ? avl / 2
-                                        : rvv_vlmax();
+        avl = avl <= rvv_vlmax()
+                  ? avl
+                  : avl < (rvv_vlmax() * 2) ? avl / 2 : rvv_vlmax();
        set_rvv_vl(avl);
        set_rd(rvv_vl());
        rvv_trace_status();
@ -5411,9 +5678,9 @@ void Simulator::DecodeVType() {
        uint64_t avl;
        set_rvv_vtype(rvv_zimm());
        avl = instr_.Rvvuimm();
-        avl = avl <= rvv_vlmax()        ? avl
-              : avl < (rvv_vlmax() * 2) ? avl / 2
-                                        : rvv_vlmax();
+        avl = avl <= rvv_vlmax()
+                  ? avl
+                  : avl < (rvv_vlmax() * 2) ? avl / 2 : rvv_vlmax();
        set_rvv_vl(avl);
        set_rd(rvv_vl());
        rvv_trace_status();
--- a/src/execution/riscv64/simulator-riscv64.h
+++ b/src/execution/riscv64/simulator-riscv64.h
@ -762,6 +762,20 @@ class Simulator : public SimulatorBase {
  type_usew_t<x>::type uimm5 = (type_usew_t<x>::type)(instr_.RvvUimm5()); \
  type_usew_t<x>::type vs2 = Rvvelt<type_usew_t<x>::type>(rvv_vs2_reg(), i);

+#define VN_PARAMS(x)                                                    \
+  constexpr int half_x = x >> 1;                                        \
+  type_sew_t<half_x>::type& vd =                                        \
+      Rvvelt<type_sew_t<half_x>::type>(rvv_vd_reg(), i, true);          \
+  type_sew_t<x>::type uimm5 = (type_sew_t<x>::type)(instr_.RvvUimm5()); \
+  type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i);
+
+#define VN_UPARAMS(x)                                                     \
+  constexpr int half_x = x >> 1;                                          \
+  type_usew_t<half_x>::type& vd =                                         \
+      Rvvelt<type_usew_t<half_x>::type>(rvv_vd_reg(), i, true);           \
+  type_usew_t<x>::type uimm5 = (type_usew_t<x>::type)(instr_.RvvUimm5()); \
+  type_sew_t<x>::type vs2 = Rvvelt<type_sew_t<x>::type>(rvv_vs2_reg(), i);
+
 #define VXI_PARAMS(x)                                                       \
  type_sew_t<x>::type& vd =                                                 \
      Rvvelt<type_sew_t<x>::type>(rvv_vd_reg(), i, true);                   \
@ -873,9 +887,24 @@ class Simulator : public SimulatorBase {
    vlenb_ = value;
  }

+  template <typename T, typename Func>
+  inline T CanonicalizeFPUOpFMA(Func fn, T dst, T src1, T src2) {
+    static_assert(std::is_floating_point<T>::value);
+    auto alu_out = fn(dst, src1, src2);
+    // if any input or result is NaN, the result is quiet_NaN
+    if (std::isnan(alu_out) || std::isnan(src1) || std::isnan(src2) ||
+        std::isnan(dst)) {
+      // signaling_nan sets kInvalidOperation bit
+      if (isSnan(alu_out) || isSnan(src1) || isSnan(src2) || isSnan(dst))
+        set_fflags(kInvalidOperation);
+      alu_out = std::numeric_limits<T>::quiet_NaN();
+    }
+    return alu_out;
+  }
+
  template <typename T, typename Func>
  inline T CanonicalizeFPUOp3(Func fn) {
-    DCHECK(std::is_floating_point<T>::value);
+    static_assert(std::is_floating_point<T>::value);
    T src1 = std::is_same<float, T>::value ? frs1() : drs1();
    T src2 = std::is_same<float, T>::value ? frs2() : drs2();
    T src3 = std::is_same<float, T>::value ? frs3() : drs3();
@ -893,7 +922,7 @@ class Simulator : public SimulatorBase {

  template <typename T, typename Func>
  inline T CanonicalizeFPUOp2(Func fn) {
-    DCHECK(std::is_floating_point<T>::value);
+    static_assert(std::is_floating_point<T>::value);
    T src1 = std::is_same<float, T>::value ? frs1() : drs1();
    T src2 = std::is_same<float, T>::value ? frs2() : drs2();
    auto alu_out = fn(src1, src2);
@ -909,7 +938,7 @@ class Simulator : public SimulatorBase {

  template <typename T, typename Func>
  inline T CanonicalizeFPUOp1(Func fn) {
-    DCHECK(std::is_floating_point<T>::value);
+    static_assert(std::is_floating_point<T>::value);
    T src1 = std::is_same<float, T>::value ? frs1() : drs1();
    auto alu_out = fn(src1);
    // if any input or result is NaN, the result is quiet_NaN
--- a/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h
+++ b/src/wasm/baseline/riscv64/liftoff-assembler-riscv64.h
@ -2767,13 +2767,39 @@ void LiftoffAssembler::emit_i8x16_uconvert_i16x8(LiftoffRegister dst,
 void LiftoffAssembler::emit_i16x8_sconvert_i32x4(LiftoffRegister dst,
                                                 LiftoffRegister lhs,
                                                 LiftoffRegister rhs) {
-  bailout(kSimd, "emit_i16x8_sconvert_i32x4");
+  VRegister dst_v = dst.fp().toV();
+  VRegister lhs_v = lhs.fp().toV();
+  VRegister rhs_v = rhs.fp().toV();
+  VU.set(kScratchReg, E32, m2);
+  VRegister tmp_lo =
+      GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp().toV();
+  VRegister tmp_hi = VRegister::from_code(tmp_lo.code() + 1);
+  VU.set(kScratchReg, E32, m1);
+  vmv_vv(tmp_lo, rhs_v);
+  vmv_vv(tmp_hi, lhs_v);
+  VU.set(kScratchReg, E16, m1);
+  VU.set(RoundingMode::RNE);
+  vnclip_vi(dst_v, tmp_lo, 0);
 }

 void LiftoffAssembler::emit_i16x8_uconvert_i32x4(LiftoffRegister dst,
                                                 LiftoffRegister lhs,
                                                 LiftoffRegister rhs) {
-  bailout(kSimd, "emit_i16x8_uconvert_i32x4");
+  VRegister dst_v = dst.fp().toV();
+  VRegister lhs_v = lhs.fp().toV();
+  VRegister rhs_v = rhs.fp().toV();
+  VU.set(kScratchReg, E32, m2);
+  VRegister tmp_lo =
+      GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(lhs, rhs)).fp().toV();
+  VRegister tmp_hi = VRegister::from_code(tmp_lo.code() + 1);
+  VU.set(kScratchReg, E32, m1);
+  vmv_vv(tmp_lo, rhs_v);
+  vmv_vv(tmp_hi, lhs_v);
+  VU.set(kScratchReg, E32, m2);
+  vmax_vx(tmp_lo, tmp_lo, zero_reg);
+  VU.set(kScratchReg, E16, m1);
+  VU.set(RoundingMode::RNE);
+  vnclipu_vi(dst_v, tmp_lo, 0);
 }

 void LiftoffAssembler::emit_i16x8_sconvert_i8x16_low(LiftoffRegister dst,
@ -2863,7 +2889,6 @@ void LiftoffAssembler::emit_i16x8_extadd_pairwise_i8x16_u(LiftoffRegister dst,
  bailout(kSimd, "i16x8.extadd_pairwise_i8x16_u");
 }

-
 void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
                                      LiftoffRegister src) {
  VU.set(kScratchReg, E32, m1);
--- a/test/cctest/test-assembler-riscv64.cc
+++ b/test/cctest/test-assembler-riscv64.cc
@ -2008,6 +2008,396 @@ TEST(RVV_VSETIVLI) {
  };
  GenAndRunTest(fn);
 }
+
+TEST(RVV_VFMV) {
+  CcTest::InitializeVM();
+  Isolate* isolate = CcTest::i_isolate();
+  HandleScope scope(isolate);
+  for (float a : compiler::ValueHelper::GetVector<float>()) {
+    float src = a;
+    float dst[8] = {0};
+    float ref[8] = {a, a, a, a, a, a, a, a};
+    auto fn = [](MacroAssembler& assm) {
+      __ VU.set(t0, VSew::E32, Vlmul::m2);
+      __ flw(fa1, a0, 0);
+      __ vfmv_vf(v2, fa1);
+      __ vs(v2, a1, 0, VSew::E32);
+    };
+    GenAndRunTest<int32_t, int64_t>((int64_t)&src, (int64_t)dst, fn);
+    CHECK(!memcmp(ref, dst, sizeof(ref)));
+  }
+}
+
+inline int32_t ToImm5(int32_t v) {
+  int32_t smax = (int32_t)(INT64_MAX >> (64 - 5));
+  int32_t smin = (int32_t)(INT64_MIN >> (64 - 5));
+  return (v > smax) ? smax : ((v < smin) ? smin : v);
+}
+
+// Tests for vector integer arithmetic instructions between vector and vector
+#define UTEST_RVV_VI_VV_FORM_WITH_RES(instr_name, width, array, expect_res) \
+  TEST(RISCV_UTEST_##instr_name##_##width) {                                \
+    CcTest::InitializeVM();                                                 \
+    auto fn = [](MacroAssembler& assm) {                                    \
+      __ VU.set(t0, VSew::E##width, Vlmul::m1);                             \
+      __ vmv_vx(v0, a0);                                                    \
+      __ vmv_vx(v1, a1);                                                    \
+      __ instr_name(v0, v0, v1);                                            \
+      __ vmv_xs(a0, v0);                                                    \
+    };                                                                      \
+    for (int##width##_t rs1_val : array) {                                  \
+      for (int##width##_t rs2_val : array) {                                \
+        auto res = GenAndRunTest<int32_t, int32_t>(rs1_val, rs2_val, fn);   \
+        CHECK_EQ(static_cast<int##width##_t>(expect_res), res);             \
+      }                                                                     \
+    }                                                                       \
+  }
+
+// Tests for vector integer arithmetic instructions between vector and scalar
+#define UTEST_RVV_VI_VX_FORM_WITH_RES(instr_name, width, array, expect_res) \
+  TEST(RISCV_UTEST_##instr_name##_##width) {                                \
+    CcTest::InitializeVM();                                                 \
+    auto fn = [](MacroAssembler& assm) {                                    \
+      __ VU.set(t0, VSew::E##width, Vlmul::m1);                             \
+      __ vmv_vx(v0, a0);                                                    \
+      __ instr_name(v0, v0, a1);                                            \
+      __ vmv_xs(a0, v0);                                                    \
+    };                                                                      \
+    for (int##width##_t rs1_val : array) {                                  \
+      for (int##width##_t rs2_val : array) {                                \
+        auto res = GenAndRunTest<int32_t, int32_t>(rs1_val, rs2_val, fn);   \
+        CHECK_EQ(static_cast<int##width##_t>(expect_res), res);             \
+      }                                                                     \
+    }                                                                       \
+  }
+
+// Tests for vector integer arithmetic instructions between vector and 5-bit
+// immediate
+#define UTEST_RVV_VI_VI_FORM_WITH_RES(instr_name, width, array, expect_res) \
+  TEST(RISCV_UTEST_##instr_name##_##width) {                                \
+    CcTest::InitializeVM();                                                 \
+    for (int##width##_t rs1_val : array) {                                  \
+      for (int##width##_t rs2_val : array) {                                \
+        auto fn = [rs2_val](MacroAssembler& assm) {                         \
+          __ VU.set(t0, VSew::E##width, Vlmul::m1);                         \
+          __ vmv_vx(v0, a0);                                                \
+          __ instr_name(v0, v0, ToImm5(rs2_val));                           \
+          __ vmv_xs(a0, v0);                                                \
+        };                                                                  \
+        auto res = GenAndRunTest<int32_t, int32_t>(rs1_val, fn);            \
+        CHECK_EQ(static_cast<int##width##_t>(expect_res), res);             \
+      }                                                                     \
+    }                                                                       \
+  }
+
+#define UTEST_RVV_VI_VV_FORM_WITH_OP(instr_name, width, array, tested_op) \
+  UTEST_RVV_VI_VV_FORM_WITH_RES(instr_name, width, array,                 \
+                                (int##width##_t)((rs1_val)tested_op(rs2_val)))
+
+#define UTEST_RVV_VI_VX_FORM_WITH_OP(instr_name, width, array, tested_op) \
+  UTEST_RVV_VI_VX_FORM_WITH_RES(instr_name, width, array,                 \
+                                (int##width##_t)((rs1_val)tested_op(rs2_val)))
+
+#define UTEST_RVV_VI_VI_FORM_WITH_OP(instr_name, width, array, tested_op) \
+  UTEST_RVV_VI_VI_FORM_WITH_RES(                                          \
+      instr_name, width, array,                                           \
+      (int##width##_t)((rs1_val)tested_op(ToImm5(rs2_val))))
+
+#define UTEST_RVV_VI_VV_FORM_WITH_FN(instr_name, width, array, tested_fn) \
+  UTEST_RVV_VI_VV_FORM_WITH_RES(instr_name, width, array,                 \
+                                tested_fn(rs1_val, rs2_val))
+
+#define UTEST_RVV_VI_VX_FORM_WITH_FN(instr_name, width, array, tested_fn) \
+  UTEST_RVV_VI_VX_FORM_WITH_RES(instr_name, width, array,                 \
+                                tested_fn(rs1_val, rs2_val))
+
+#define ARRAY_INT32 compiler::ValueHelper::GetVector<int32_t>()
+
+#define VV(instr_name, array, tested_op)                         \
+  UTEST_RVV_VI_VV_FORM_WITH_OP(instr_name, 8, array, tested_op)  \
+  UTEST_RVV_VI_VV_FORM_WITH_OP(instr_name, 16, array, tested_op) \
+  UTEST_RVV_VI_VV_FORM_WITH_OP(instr_name, 32, array, tested_op)
+
+#define VX(instr_name, array, tested_op)                         \
+  UTEST_RVV_VI_VX_FORM_WITH_OP(instr_name, 8, array, tested_op)  \
+  UTEST_RVV_VI_VX_FORM_WITH_OP(instr_name, 16, array, tested_op) \
+  UTEST_RVV_VI_VX_FORM_WITH_OP(instr_name, 32, array, tested_op)
+
+#define VI(instr_name, array, tested_op)                         \
+  UTEST_RVV_VI_VI_FORM_WITH_OP(instr_name, 8, array, tested_op)  \
+  UTEST_RVV_VI_VI_FORM_WITH_OP(instr_name, 16, array, tested_op) \
+  UTEST_RVV_VI_VI_FORM_WITH_OP(instr_name, 32, array, tested_op)
+
+VV(vadd_vv, ARRAY_INT32, +)
+VX(vadd_vx, ARRAY_INT32, +)
+VI(vadd_vi, ARRAY_INT32, +)
+VV(vsub_vv, ARRAY_INT32, -)
+VX(vsub_vx, ARRAY_INT32, -)
+VV(vand_vv, ARRAY_INT32, &)
+VX(vand_vx, ARRAY_INT32, &)
+VI(vand_vi, ARRAY_INT32, &)
+VV(vor_vv, ARRAY_INT32, |)
+VX(vor_vx, ARRAY_INT32, |)
+VI(vor_vi, ARRAY_INT32, |)
+VV(vxor_vv, ARRAY_INT32, ^)
+VX(vxor_vx, ARRAY_INT32, ^)
+VI(vxor_vi, ARRAY_INT32, ^)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmax_vv, 8, ARRAY_INT32, std::max<int8_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmax_vx, 8, ARRAY_INT32, std::max<int8_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmax_vv, 16, ARRAY_INT32, std::max<int16_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmax_vx, 16, ARRAY_INT32, std::max<int16_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmax_vv, 32, ARRAY_INT32, std::max<int32_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmax_vx, 32, ARRAY_INT32, std::max<int32_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmin_vv, 8, ARRAY_INT32, std::min<int8_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmin_vx, 8, ARRAY_INT32, std::min<int8_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmin_vv, 16, ARRAY_INT32, std::min<int16_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmin_vx, 16, ARRAY_INT32, std::min<int16_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmin_vv, 32, ARRAY_INT32, std::min<int32_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmin_vx, 32, ARRAY_INT32, std::min<int32_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmaxu_vv, 8, ARRAY_INT32, std::max<uint8_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmaxu_vx, 8, ARRAY_INT32, std::max<uint8_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmaxu_vv, 16, ARRAY_INT32, std::max<uint16_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmaxu_vx, 16, ARRAY_INT32, std::max<uint16_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vmaxu_vv, 32, ARRAY_INT32, std::max<uint32_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vmaxu_vx, 32, ARRAY_INT32, std::max<uint32_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vminu_vv, 8, ARRAY_INT32, std::min<uint8_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vminu_vx, 8, ARRAY_INT32, std::min<uint8_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vminu_vv, 16, ARRAY_INT32, std::min<uint16_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vminu_vx, 16, ARRAY_INT32, std::min<uint16_t>)
+UTEST_RVV_VI_VV_FORM_WITH_FN(vminu_vv, 32, ARRAY_INT32, std::min<uint32_t>)
+UTEST_RVV_VI_VX_FORM_WITH_FN(vminu_vx, 32, ARRAY_INT32, std::min<uint32_t>)
+
+#undef ARRAY_INT32
+#undef VV
+#undef VX
+#undef VI
+#undef UTEST_RVV_VI_VV_FORM_WITH_FN
+#undef UTEST_RVV_VI_VX_FORM_WITH_FN
+#undef UTEST_RVV_VI_VI_FORM_WITH_OP
+#undef UTEST_RVV_VI_VX_FORM_WITH_OP
+#undef UTEST_RVV_VI_VV_FORM_WITH_OP
+#undef UTEST_RVV_VI_VI_FORM
+#undef UTEST_RVV_VI_VX_FORM
+#undef UTEST_RVV_VI_VV_FORM
+
+// Tests for vector single-width floating-point arithmetic instructions between
+// vector and vector
+#define UTEST_RVV_VF_VV_FORM_WITH_RES(instr_name, array, expect_res)    \
+  TEST(RISCV_UTEST_##instr_name) {                                      \
+    CcTest::InitializeVM();                                             \
+    auto fn = [](MacroAssembler& assm) {                                \
+      __ VU.set(t0, VSew::E32, Vlmul::m1);                              \
+      __ vfmv_vf(v0, fa0);                                              \
+      __ vfmv_vf(v1, fa1);                                              \
+      __ instr_name(v0, v0, v1);                                        \
+      __ vfmv_fs(fa0, v0);                                              \
+    };                                                                  \
+    for (float rs1_fval : array) {                                      \
+      for (float rs2_fval : array) {                                    \
+        auto res = GenAndRunTest<float, float>(rs1_fval, rs2_fval, fn); \
+        CHECK_FLOAT_EQ(UseCanonicalNan<float>(expect_res), res);        \
+      }                                                                 \
+    }                                                                   \
+  }
+
+// Tests for vector single-width floating-point arithmetic instructions between
+// vector and scalar
+#define UTEST_RVV_VF_VF_FORM_WITH_RES(instr_name, array, expect_res)    \
+  TEST(RISCV_UTEST_##instr_name) {                                      \
+    CcTest::InitializeVM();                                             \
+    auto fn = [](MacroAssembler& assm) {                                \
+      __ VU.set(t0, VSew::E32, Vlmul::m1);                              \
+      __ vfmv_vf(v0, fa0);                                              \
+      __ instr_name(v0, v0, fa1);                                       \
+      __ vfmv_fs(fa0, v0);                                              \
+    };                                                                  \
+    for (float rs1_fval : array) {                                      \
+      for (float rs2_fval : array) {                                    \
+        auto res = GenAndRunTest<float, float>(rs1_fval, rs2_fval, fn); \
+        CHECK_FLOAT_EQ(UseCanonicalNan<float>(expect_res), res);        \
+      }                                                                 \
+    }                                                                   \
+  }
+
+#define UTEST_RVV_VF_VV_FORM_WITH_OP(instr_name, array, tested_op) \
+  UTEST_RVV_VF_VV_FORM_WITH_RES(instr_name, array,                 \
+                                ((rs1_fval)tested_op(rs2_fval)))
+
+#define UTEST_RVV_VF_VF_FORM_WITH_OP(instr_name, array, tested_op) \
+  UTEST_RVV_VF_VF_FORM_WITH_RES(instr_name, array,                 \
+                                ((rs1_fval)tested_op(rs2_fval)))
+
+#define ARRAY_FLOAT compiler::ValueHelper::GetVector<float>()
+
+UTEST_RVV_VF_VV_FORM_WITH_OP(vfadd_vv, ARRAY_FLOAT, +)
+// UTEST_RVV_VF_VF_FORM_WITH_OP(vfadd_vf, ARRAY_FLOAT, +)
+UTEST_RVV_VF_VV_FORM_WITH_OP(vfsub_vv, ARRAY_FLOAT, -)
+// UTEST_RVV_VF_VF_FORM_WITH_OP(vfsub_vf, ARRAY_FLOAT, -)
+UTEST_RVV_VF_VV_FORM_WITH_OP(vfmul_vv, ARRAY_FLOAT, *)
+// UTEST_RVV_VF_VF_FORM_WITH_OP(vfmul_vf, ARRAY_FLOAT, *)
+UTEST_RVV_VF_VV_FORM_WITH_OP(vfdiv_vv, ARRAY_FLOAT, /)
+// UTEST_RVV_VF_VF_FORM_WITH_OP(vfdiv_vf, ARRAY_FLOAT, /)
+
+#undef ARRAY_FLOAT
+#undef UTEST_RVV_VF_VV_FORM_WITH_OP
+#undef UTEST_RVV_VF_VF_FORM_WITH_OP
+#undef UTEST_RVV_VF_VV_FORM
+#undef UTEST_RVV_VF_VF_FORM
+
+// Tests for vector single-width floating-point fused multiply-add Instructions
+// between vectors
+#define UTEST_RVV_FMA_VV_FORM_WITH_RES(instr_name, array, expect_res)        \
+  TEST(RISCV_UTEST_##instr_name) {                                           \
+    CcTest::InitializeVM();                                                  \
+    auto fn = [](MacroAssembler& assm) {                                     \
+      __ VU.set(t0, VSew::E32, Vlmul::m1);                                   \
+      __ vfmv_vf(v0, fa0);                                                   \
+      __ vfmv_vf(v1, fa1);                                                   \
+      __ vfmv_vf(v2, fa2);                                                   \
+      __ instr_name(v0, v1, v2);                                             \
+      __ vfmv_fs(fa0, v0);                                                   \
+    };                                                                       \
+    for (float rs1_fval : array) {                                           \
+      for (float rs2_fval : array) {                                         \
+        for (float rs3_fval : array) {                                       \
+          auto res =                                                         \
+              GenAndRunTest<float, float>(rs1_fval, rs2_fval, rs3_fval, fn); \
+          CHECK_FLOAT_EQ(expect_res, res);                                   \
+        }                                                                    \
+      }                                                                      \
+    }                                                                        \
+  }
+
+// Tests for vector single-width floating-point fused multiply-add Instructions
+// between vectors and scalar
+#define UTEST_RVV_FMA_VF_FORM_WITH_RES(instr_name, array, expect_res)        \
+  TEST(RISCV_UTEST_##instr_name) {                                           \
+    CcTest::InitializeVM();                                                  \
+    auto fn = [](MacroAssembler& assm) {                                     \
+      __ VU.set(t0, VSew::E32, Vlmul::m1);                                   \
+      __ vfmv_vf(v0, fa0);                                                   \
+      __ vfmv_vf(v2, fa2);                                                   \
+      __ instr_name(v0, fa1, v2);                                            \
+      __ vfmv_fs(fa0, v0);                                                   \
+    };                                                                       \
+    for (float rs1_fval : array) {                                           \
+      for (float rs2_fval : array) {                                         \
+        for (float rs3_fval : array) {                                       \
+          auto res =                                                         \
+              GenAndRunTest<float, float>(rs1_fval, rs2_fval, rs3_fval, fn); \
+          CHECK_FLOAT_EQ(expect_res, res);                                   \
+        }                                                                    \
+      }                                                                      \
+    }                                                                        \
+  }
+
+#define ARRAY_FLOAT compiler::ValueHelper::GetVector<float>()
+
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfmadd_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs1_fval, rs3_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfmadd_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs1_fval, rs3_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfnmadd_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs1_fval, -rs3_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfnmadd_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs1_fval, -rs3_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfmsub_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs1_fval, -rs3_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfmsub_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs1_fval, -rs3_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfnmsub_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs1_fval, rs3_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfnmsub_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs1_fval, rs3_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfmacc_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs3_fval, rs1_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfmacc_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs3_fval, rs1_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfnmacc_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs3_fval, -rs1_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfnmacc_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs3_fval, -rs1_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfmsac_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs3_fval, -rs1_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfmsac_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, rs3_fval, -rs1_fval))
+UTEST_RVV_FMA_VV_FORM_WITH_RES(vfnmsac_vv, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs3_fval, rs1_fval))
+UTEST_RVV_FMA_VF_FORM_WITH_RES(vfnmsac_vf, ARRAY_FLOAT,
+                               std::fma(rs2_fval, -rs3_fval, rs1_fval))
+
+#undef ARRAY_FLOAT
+#undef UTEST_RVV_FMA_VV_FORM
+#undef UTEST_RVV_FMA_VF_FORM
+
+// calculate the value of r used in rounding
+static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift) {
+  // uint8_t d = extract64(v, shift, 1);
+  uint8_t d = unsigned_bitextract_64(shift, shift, v);
+  uint8_t d1;
+  uint64_t D1, D2;
+
+  if (shift == 0 || shift > 64) {
+    return 0;
+  }
+
+  // d1 = extract64(v, shift - 1, 1);
+  d1 = unsigned_bitextract_64(shift - 1, shift - 1, v);
+  // D1 = extract64(v, 0, shift);
+  D1 = unsigned_bitextract_64(shift - 1, 0, v);
+  if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
+    return d1;
+  } else if (vxrm == 1) { /* round-to-nearest-even */
+    if (shift > 1) {
+      // D2 = extract64(v, 0, shift - 1);
+      D2 = unsigned_bitextract_64(shift - 2, 0, v);
+      return d1 & ((D2 != 0) | d);
+    } else {
+      return d1 & d;
+    }
+  } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
+    return !d & (D1 != 0);
+  }
+  return 0; /* round-down (truncate) */
+}
+
+#define UTEST_RVV_VNCLIP_E32M2_E16M1(instr_name, sign)                       \
+  TEST(RISCV_UTEST_##instr_name##_E32M2_E16M1) {                             \
+    constexpr RoundingMode vxrm = RNE;                                       \
+    CcTest::InitializeVM();                                                  \
+    Isolate* isolate = CcTest::i_isolate();                                  \
+    HandleScope scope(isolate);                                              \
+    for (int32_t x : compiler::ValueHelper::GetVector<int>()) {              \
+      for (uint8_t shift = 0; shift < 32; shift++) {                         \
+        auto fn = [shift](MacroAssembler& assm) {                            \
+          __ VU.set(vxrm);                                                   \
+          __ vsetvli(t0, zero_reg, VSew::E32, Vlmul::m2);                    \
+          __ vl(v2, a0, 0, VSew::E32);                                       \
+          __ vsetvli(t0, zero_reg, VSew::E16, Vlmul::m1);                    \
+          __ instr_name(v4, v2, shift);                                      \
+          __ vs(v4, a1, 0, VSew::E16);                                       \
+        };                                                                   \
+        struct T {                                                           \
+          sign##int32_t src[8] = {0};                                        \
+          sign##int16_t dst[8] = {0};                                        \
+          sign##int16_t ref[8] = {0};                                        \
+        } t;                                                                 \
+        for (auto src : t.src) src = static_cast<sign##int32_t>(x);          \
+        for (auto ref : t.ref)                                               \
+          ref = base::saturated_cast<sign##int16_t>(                         \
+              (static_cast<sign##int32_t>(x) >> shift) +                     \
+              get_round(vxrm, x, shift));                                    \
+        GenAndRunTest<int32_t, int64_t>((int64_t)t.src, (int64_t)t.dst, fn); \
+        CHECK(!memcmp(t.dst, t.ref, sizeof(t.ref)));                         \
+      }                                                                      \
+    }                                                                        \
+  }
+
+UTEST_RVV_VNCLIP_E32M2_E16M1(vnclipu_vi, u)
+UTEST_RVV_VNCLIP_E32M2_E16M1(vnclip_vi, )
+
+#undef UTEST_RVV_VNCLIP_E32M2_E16M1
+
 #endif

 #undef __
--- a/test/cctest/test-disasm-riscv64.cc
+++ b/test/cctest/test-disasm-riscv64.cc
@ -580,6 +580,40 @@ TEST(RVV) {
  COMPARE(vadc_vv(v7, v9, v6),     "406483d7       vadc.vvm  v7, v6, v9");
  COMPARE(vadc_vx(v7, t6, v9),     "409fc3d7       vadc.vxm  v7, v9, t6");
  COMPARE(vadc_vi(v7, 5, v9),      "4092b3d7       vadc.vim  v7, v9, 5");
+
+  COMPARE(vfadd_vv(v17, v14, v28), "02ee18d7       vfadd.vv  v17, v14, v28");
+  COMPARE(vfsub_vv(v17, v14, v28), "0aee18d7       vfsub.vv  v17, v14, v28");
+  COMPARE(vfmul_vv(v17, v14, v28), "92ee18d7       vfmul.vv  v17, v14, v28");
+  COMPARE(vfdiv_vv(v17, v14, v28), "82ee18d7       vfdiv.vv  v17, v14, v28");
+  COMPARE(vfmv_vf(v17, fa5), "5e07d8d7       vfmv.v.f  v17, fa5");
+  COMPARE(vfmv_fs(fa5, v17), "431017d7       vfmv.f.s  fa5, v17");
+
+  // Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+  COMPARE(vfmadd_vv(v17, v14, v28), "a3c718d7       vfmadd.vv v17, v14, v28");
+  COMPARE(vfnmadd_vv(v17, v14, v28), "a7c718d7       vfnmadd.vv v17, v14, v28");
+  COMPARE(vfmsub_vv(v17, v14, v28), "abc718d7       vfmsub.vv v17, v14, v28");
+  COMPARE(vfnmsub_vv(v17, v14, v28), "afc718d7       vfnmsub.vv v17, v14, v28");
+  COMPARE(vfmacc_vv(v17, v14, v28), "b3c718d7       vfmacc.vv v17, v14, v28");
+  COMPARE(vfnmacc_vv(v17, v14, v28), "b7c718d7       vfnmacc.vv v17, v14, v28");
+  COMPARE(vfmsac_vv(v17, v14, v28), "bbc718d7       vfmsac.vv v17, v14, v28");
+  COMPARE(vfnmsac_vv(v17, v14, v28), "bfc718d7       vfnmsac.vv v17, v14, v28");
+  COMPARE(vfmadd_vf(v17, fa5, v28), "a3c7d8d7       vfmadd.vf v17, fa5, v28");
+  COMPARE(vfnmadd_vf(v17, fa5, v28), "a7c7d8d7       vfnmadd.vf v17, fa5, v28");
+  COMPARE(vfmsub_vf(v17, fa5, v28), "abc7d8d7       vfmsub.vf v17, fa5, v28");
+  COMPARE(vfnmsub_vf(v17, fa5, v28), "afc7d8d7       vfnmsub.vf v17, fa5, v28");
+  COMPARE(vfmacc_vf(v17, fa5, v28), "b3c7d8d7       vfmacc.vf v17, fa5, v28");
+  COMPARE(vfnmacc_vf(v17, fa5, v28), "b7c7d8d7       vfnmacc.vf v17, fa5, v28");
+  COMPARE(vfmsac_vf(v17, fa5, v28), "bbc7d8d7       vfmsac.vf v17, fa5, v28");
+  COMPARE(vfnmsac_vf(v17, fa5, v28), "bfc7d8d7       vfnmsac.vf v17, fa5, v28");
+
+  // Vector Narrowing Fixed-Point Clip Instructions
+  COMPARE(vnclip_vi(v17, v14, 5), "bee2b8d7       vnclip.wi v17, v14, 5");
+  COMPARE(vnclip_vx(v17, v14, a5), "bee7c8d7       vnclip.wx v17, v14, a5");
+  COMPARE(vnclip_vv(v17, v14, v28), "beee08d7       vnclip.wv v17, v14, v28");
+  COMPARE(vnclipu_vi(v17, v14, 5), "bae2b8d7       vnclipu.wi v17, v14, 5");
+  COMPARE(vnclipu_vx(v17, v14, a5), "bae7c8d7       vnclipu.wx v17, v14, a5");
+  COMPARE(vnclipu_vv(v17, v14, v28), "baee08d7       vnclipu.wv v17, v14, v28");
+
  VERIFY_RUN();
 }
 #endif
--- a/test/cctest/test-helper-riscv64.h
+++ b/test/cctest/test-helper-riscv64.h
@ -328,6 +328,11 @@ GeneratedCode<Signature> AssembleCode(Func assemble) {
  return GeneratedCode<Signature>::FromCode(*AssembleCodeImpl(assemble));
 }

+template <typename T>
+T UseCanonicalNan(T x) {
+  return isnan(x) ? std::numeric_limits<T>::quiet_NaN() : x;
+}
+
 }  // namespace internal
 }  // namespace v8