diff --git a/riscv/decode.h b/riscv/decode.h
index be310c3b..c3feec3e 100644
--- a/riscv/decode.h
+++ b/riscv/decode.h
@@ -282,21 +282,29 @@ class wait_for_interrupt_t {};
 #define invalid_pc(pc) ((pc) & 1)
 
 /* Convenience wrappers to simplify softfloat code sequences */
+#define isBoxedF16(r) (isBoxedF32(r) && ((uint64_t)((r.v[0] >> 16) + 1) == ((uint64_t)1 << 48)))
+#define unboxF16(r) (isBoxedF16(r) ? (uint32_t)r.v[0] : defaultNaNF16UI)
 #define isBoxedF32(r) (isBoxedF64(r) && ((uint32_t)((r.v[0] >> 32) + 1) == 0))
 #define unboxF32(r) (isBoxedF32(r) ? (uint32_t)r.v[0] : defaultNaNF32UI)
 #define isBoxedF64(r) ((r.v[1] + 1) == 0)
 #define unboxF64(r) (isBoxedF64(r) ? r.v[0] : defaultNaNF64UI)
 typedef float128_t freg_t;
+inline float16_t f16(uint16_t v) { return { v }; }
 inline float32_t f32(uint32_t v) { return { v }; }
 inline float64_t f64(uint64_t v) { return { v }; }
+inline float16_t f16(freg_t r) { return f16(unboxF16(r)); }
 inline float32_t f32(freg_t r) { return f32(unboxF32(r)); }
 inline float64_t f64(freg_t r) { return f64(unboxF64(r)); }
 inline float128_t f128(freg_t r) { return r; }
+inline freg_t freg(float16_t f) { return { ((uint64_t)-1 << 16) | f.v, (uint64_t)-1 }; }
 inline freg_t freg(float32_t f) { return { ((uint64_t)-1 << 32) | f.v, (uint64_t)-1 }; }
 inline freg_t freg(float64_t f) { return { f.v, (uint64_t)-1 }; }
 inline freg_t freg(float128_t f) { return f; }
+#define F16_SIGN ((uint16_t)1 << 15)
 #define F32_SIGN ((uint32_t)1 << 31)
 #define F64_SIGN ((uint64_t)1 << 63)
+#define fsgnj16(a, b, n, x) \
+  f16((f16(a).v & ~F16_SIGN) | ((((x) ? f16(a).v : (n) ? F16_SIGN : 0) ^ f16(b).v) & F16_SIGN))
 #define fsgnj32(a, b, n, x) \
   f32((f32(a).v & ~F32_SIGN) | ((((x) ? f32(a).v : (n) ? F32_SIGN : 0) ^ f32(b).v) & F32_SIGN))
 #define fsgnj64(a, b, n, x) \
@@ -1654,7 +1662,8 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
 //
 #define VI_VFP_COMMON \
   require_fp; \
-  require((P.VU.vsew == e32 && p->supports_extension('F')) || \
+  require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) || \
+          (P.VU.vsew == e32 && p->supports_extension('F')) || \
           (P.VU.vsew == e64 && p->supports_extension('D'))); \
   require_vector;\
   reg_t vl = P.VU.vl; \
@@ -1698,11 +1707,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   } \
   P.VU.vstart = 0; \
 
-#define VI_VFP_LOOP_WIDE_END \
-  } \
-  P.VU.vstart = 0; \
-  set_fp_exceptions;
-
 #define VI_VFP_LOOP_REDUCTION_END(x) \
   } \
   P.VU.vstart = 0; \
@@ -1712,24 +1716,31 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
 
 #define VI_VFP_LOOP_CMP_END \
   switch(P.VU.vsew) { \
+    case e16: \
     case e32: \
     case e64: { \
       vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \
       break; \
     } \
-    case e16: \
     default: \
       require(0); \
       break; \
     }; \
   } \
-  P.VU.vstart = 0; \
-  set_fp_exceptions;
+  P.VU.vstart = 0;
 
-#define VI_VFP_VV_LOOP(BODY32, BODY64) \
+#define VI_VFP_VV_LOOP(BODY16, BODY32, BODY64) \
   VI_CHECK_SSS(true); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
+      float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
+      float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
       float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
@@ -1746,7 +1757,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
     default: \
       require(0); \
       break; \
@@ -1754,10 +1764,17 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   DEBUG_RVV_FP_VV; \
   VI_VFP_LOOP_END
 
-#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \
+#define VI_VFP_VV_LOOP_REDUCTION(BODY16, BODY32, BODY64) \
   VI_CHECK_REDUCTION(false) \
   VI_VFP_COMMON \
   switch(P.VU.vsew) { \
+    case e16: {\
+      VI_VFP_LOOP_REDUCTION_BASE(16) \
+        BODY16; \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_REDUCTION_END(e16) \
+      break; \
+    }\
     case e32: {\
       VI_VFP_LOOP_REDUCTION_BASE(32) \
         BODY32; \
@@ -1772,24 +1789,54 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
       VI_VFP_LOOP_REDUCTION_END(e64) \
       break; \
     }\
-    case e16: \
     default: \
       require(0); \
       break; \
   }; \
 
-#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \
-  VI_VFP_LOOP_WIDE_REDUCTION_BASE \
-  float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
-  BODY; \
-  set_fp_exceptions; \
-  DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_REDUCTION_END(e64)
+#define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY16, BODY32) \
+  VI_CHECK_REDUCTION(true) \
+  VI_VFP_COMMON \
+  require((P.VU.vsew == e16 && p->supports_extension('F')) || \
+          (P.VU.vsew == e32 && p->supports_extension('D'))); \
+  switch(P.VU.vsew) { \
+    case e16: {\
+      float32_t vd_0 = P.VU.elt<float32_t>(rs1_num, 0); \
+      for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+        VI_LOOP_ELEMENT_SKIP(); \
+        float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
+        BODY16; \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_REDUCTION_END(e32) \
+      break; \
+    }\
+    case e32: {\
+      float64_t vd_0 = P.VU.elt<float64_t>(rs1_num, 0); \
+      for (reg_t i=P.VU.vstart; i<vl; ++i) { \
+        VI_LOOP_ELEMENT_SKIP(); \
+        float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
+        BODY32; \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_REDUCTION_END(e64) \
+      break; \
+    }\
+    default: \
+      require(0); \
+      break; \
+  }; \
 
-#define VI_VFP_VF_LOOP(BODY32, BODY64) \
+#define VI_VFP_VF_LOOP(BODY16, BODY32, BODY64) \
   VI_CHECK_SSS(false); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float16_t &vd = P.VU.elt<float16_t>(rd_num, i, true); \
+      float16_t rs1 = f16(READ_FREG(rs1_num)); \
+      float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
       float32_t rs1 = f32(READ_FREG(rs1_num)); \
@@ -1806,8 +1853,6 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
-    case e8: \
     default: \
       require(0); \
       break; \
@@ -1815,10 +1860,18 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
   DEBUG_RVV_FP_VF; \
   VI_VFP_LOOP_END
 
-#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \
+#define VI_VFP_LOOP_CMP(BODY16, BODY32, BODY64, is_vs1) \
   VI_CHECK_MSS(is_vs1); \
   VI_VFP_LOOP_CMP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float16_t vs2 = P.VU.elt<float16_t>(rs2_num, i); \
+      float16_t vs1 = P.VU.elt<float16_t>(rs1_num, i); \
+      float16_t rs1 = f16(READ_FREG(rs1_num)); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
       float32_t vs1 = P.VU.elt<float32_t>(rs1_num, i); \
@@ -1835,96 +1888,146 @@ for (reg_t i = 0; i < vlmax && P.VU.vl != 0; ++i) { \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
     default: \
       require(0); \
       break; \
   }; \
   VI_VFP_LOOP_CMP_END \
 
-#define VI_VFP_VF_LOOP_WIDE(BODY) \
+#define VI_VFP_VF_LOOP_WIDE(BODY16, BODY32) \
   VI_CHECK_DSS(false); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: { \
+      float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
+      float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
+      float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    } \
     case e32: {\
       float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
       float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
       float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
-      BODY; \
+      BODY32; \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
-    case e8: \
     default: \
       require(0); \
       break; \
   }; \
   DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_WIDE_END
+  VI_VFP_LOOP_END
 
 
-#define VI_VFP_VV_LOOP_WIDE(BODY) \
+#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \
   VI_CHECK_DSS(true); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
+      float32_t vs2 = f16_to_f32(P.VU.elt<float16_t>(rs2_num, i)); \
+      float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
       float64_t vs2 = f32_to_f64(P.VU.elt<float32_t>(rs2_num, i)); \
       float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
-      BODY; \
+      BODY32; \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
-    case e8: \
     default: \
       require(0); \
       break; \
   }; \
   DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_WIDE_END
+  VI_VFP_LOOP_END
 
-#define VI_VFP_WF_LOOP_WIDE(BODY) \
+#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \
   VI_CHECK_DDS(false); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
+      float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+      float32_t rs1 = f16_to_f32(f16(READ_FREG(rs1_num))); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
       float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
       float64_t rs1 = f32_to_f64(f32(READ_FREG(rs1_num))); \
-      BODY; \
+      BODY32; \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
-    case e8: \
     default: \
       require(0); \
   }; \
   DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_WIDE_END
+  VI_VFP_LOOP_END
 
-#define VI_VFP_WV_LOOP_WIDE(BODY) \
+#define VI_VFP_WV_LOOP_WIDE(BODY16, BODY32) \
   VI_CHECK_DDS(true); \
   VI_VFP_LOOP_BASE \
   switch(P.VU.vsew) { \
+    case e16: {\
+      float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
+      float32_t vs2 = P.VU.elt<float32_t>(rs2_num, i); \
+      float32_t vs1 = f16_to_f32(P.VU.elt<float16_t>(rs1_num, i)); \
+      BODY16; \
+      set_fp_exceptions; \
+      break; \
+    }\
     case e32: {\
       float64_t &vd = P.VU.elt<float64_t>(rd_num, i, true); \
       float64_t vs2 = P.VU.elt<float64_t>(rs2_num, i); \
       float64_t vs1 = f32_to_f64(P.VU.elt<float32_t>(rs1_num, i)); \
-      BODY; \
+      BODY32; \
       set_fp_exceptions; \
       break; \
     }\
-    case e16: \
-    case e8: \
     default: \
       require(0); \
   }; \
   DEBUG_RVV_FP_VV; \
-  VI_VFP_LOOP_WIDE_END
+  VI_VFP_LOOP_END
 
+#define VI_VFP_CVT_SCALE(BODY16, BODY32, is_widen) \
+  if (is_widen) { \
+    VI_CHECK_DSS(false);\
+  } else { \
+    VI_CHECK_SDS(false); \
+  } \
+  require((P.VU.vsew == e16 && p->supports_extension('F')) || \
+          (P.VU.vsew == e32 && p->supports_extension('D'))); \
+  switch(P.VU.vsew) { \
+    case e16: {\
+      VI_VFP_LOOP_BASE \
+        BODY16 \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_END \
+      } \
+      break; \
+    case e32: {\
+      VI_VFP_LOOP_BASE \
+        BODY32 \
+        set_fp_exceptions; \
+      VI_VFP_LOOP_END \
+      } \
+      break; \
+    default: \
+      require(0); \
+      break; \
+  }
 
 #define DEBUG_START             0x0
 #define DEBUG_END               (0x1000 - 1)
diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h
index bdb7f75c..2b808e0c 100644
--- a/riscv/insns/vfadd_vf.h
+++ b/riscv/insns/vfadd_vf.h
@@ -1,6 +1,9 @@
 // vfadd.vf vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
+  vd = f16_add(rs1, vs2);
+},
+{
   vd = f32_add(rs1, vs2);
 },
 {
diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h
index b333a8a3..ce94921d 100644
--- a/riscv/insns/vfadd_vv.h
+++ b/riscv/insns/vfadd_vv.h
@@ -1,6 +1,9 @@
 // vfadd.vv vd, vs2, vs1
 VI_VFP_VV_LOOP
 ({
+  vd = f16_add(vs1, vs2);
+},
+{
   vd = f32_add(vs1, vs2);
 },
 {
diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h
index 8ee092f5..1bd5f5ff 100644
--- a/riscv/insns/vfclass_v.h
+++ b/riscv/insns/vfclass_v.h
@@ -1,6 +1,9 @@
 // vfclass.v vd, vs2, vm
 VI_VFP_VV_LOOP
 ({
+  vd.v = f16_classify(vs2);
+},
+{
   vd.v = f32_classify(vs2);
 },
 {
diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h
index fdaa697f..c53b0e1f 100644
--- a/riscv/insns/vfcvt_f_x_v.h
+++ b/riscv/insns/vfcvt_f_x_v.h
@@ -1,6 +1,10 @@
 // vfcvt.f.x.v vd, vd2, vm
 VI_VFP_VF_LOOP
 ({
+  auto vs2_i = P.VU.elt<int16_t>(rs2_num, i);
+  vd = i32_to_f16(vs2_i);
+},
+{
   auto vs2_i = P.VU.elt<int32_t>(rs2_num, i);
   vd = i32_to_f32(vs2_i);
 },
diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h
index 01ea61ca..bd03768d 100644
--- a/riscv/insns/vfcvt_f_xu_v.h
+++ b/riscv/insns/vfcvt_f_xu_v.h
@@ -1,6 +1,10 @@
 // vfcvt.f.xu.v vd, vd2, vm
 VI_VFP_VF_LOOP
 ({
+  auto vs2_u = P.VU.elt<uint16_t>(rs2_num, i);
+  vd = ui32_to_f16(vs2_u);
+},
+{
   auto vs2_u = P.VU.elt<uint32_t>(rs2_num, i);
   vd = ui32_to_f32(vs2_u);
 },
diff --git a/riscv/insns/vfcvt_rtz_x_f_v.h b/riscv/insns/vfcvt_rtz_x_f_v.h
index 89c88edb..e7241bd0 100644
--- a/riscv/insns/vfcvt_rtz_x_f_v.h
+++ b/riscv/insns/vfcvt_rtz_x_f_v.h
@@ -1,6 +1,9 @@
-// vfcvt.x.f.v vd, vd2, vm
+// vfcvt.rtz.x.f.v vd, vd2, vm
 VI_VFP_VF_LOOP
 ({
+  P.VU.elt<int16_t>(rd_num, i) = f16_to_i16(vs2, softfloat_round_minMag, true);
+},
+{
   P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, softfloat_round_minMag, true);
 },
 {
diff --git a/riscv/insns/vfcvt_rtz_xu_f_v.h b/riscv/insns/vfcvt_rtz_xu_f_v.h
index fd75fd0c..d3d266d0 100644
--- a/riscv/insns/vfcvt_rtz_xu_f_v.h
+++ b/riscv/insns/vfcvt_rtz_xu_f_v.h
@@ -1,6 +1,9 @@
-// vfcvt.xu.f.v vd, vd2, vm
+// vfcvt.rtz.xu.f.v vd, vd2, vm
 VI_VFP_VF_LOOP
 ({
+  P.VU.elt<uint16_t>(rd_num, i) = f16_to_ui16(vs2, softfloat_round_minMag, true);
+},
+{
   P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, softfloat_round_minMag, true);
 },
 {
diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h
index 96bc481d..01e5ca17 100644
--- a/riscv/insns/vfcvt_x_f_v.h
+++ b/riscv/insns/vfcvt_x_f_v.h
@@ -1,6 +1,9 @@
 // vfcvt.x.f.v vd, vd2, vm
 VI_VFP_VF_LOOP
 ({
+  P.VU.elt<int16_t>(rd_num, i) = f16_to_i16(vs2, STATE.frm, true);
+},
+{
   P.VU.elt<int32_t>(rd_num, i) = f32_to_i32(vs2, STATE.frm, true);
 },
 {
diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h
index 5f19f900..725cbda2 100644
--- a/riscv/insns/vfcvt_xu_f_v.h
+++ b/riscv/insns/vfcvt_xu_f_v.h
@@ -1,6 +1,9 @@
 // vfcvt.xu.f.v vd, vd2, vm
 VI_VFP_VV_LOOP
 ({
+  P.VU.elt<uint16_t>(rd_num, i) = f16_to_ui16(vs2, STATE.frm, true);
+},
+{
   P.VU.elt<uint32_t>(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true);
 },
 {
diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h
index ce217304..a703ef02 100644
--- a/riscv/insns/vfdiv_vf.h
+++ b/riscv/insns/vfdiv_vf.h
@@ -1,6 +1,9 @@
 // vfdiv.vf vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
+  vd = f16_div(vs2, rs1);
+},
+{
   vd = f32_div(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h
index 8a49a917..c66d7516 100644
--- a/riscv/insns/vfdiv_vv.h
+++ b/riscv/insns/vfdiv_vv.h
@@ -1,6 +1,9 @@
 // vfdiv.vv  vd, vs2, vs1
 VI_VFP_VV_LOOP
 ({
+  vd = f16_div(vs2, vs1);
+},
+{
   vd = f32_div(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h
index 85d0b8ac..8f5225ac 100644
--- a/riscv/insns/vfdot_vv.h
+++ b/riscv/insns/vfdot_vv.h
@@ -1,6 +1,9 @@
 // vfdot.vv vd, vs2, vs1
 VI_VFP_VV_LOOP
 ({
+  vd = f16_add(vd, f16_mul(vs2, vs1));
+},
+{
   vd = f32_add(vd, f32_mul(vs2, vs1));
 },
 {
diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h
index fca41840..61578d33 100644
--- a/riscv/insns/vfmacc_vf.h
+++ b/riscv/insns/vfmacc_vf.h
@@ -1,6 +1,9 @@
 // vfmacc.vf vd, rs1, vs2, vm    # vd[i] = +(vs2[i] * x[rs1]) + vd[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(rs1, vs2, vd);
+},
+{
   vd = f32_mulAdd(rs1, vs2, vd);
 },
 {
diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h
index f1caf33f..499b1d4d 100644
--- a/riscv/insns/vfmacc_vv.h
+++ b/riscv/insns/vfmacc_vv.h
@@ -1,6 +1,9 @@
 // vfmacc.vv vd, rs1, vs2, vm    # vd[i] = +(vs2[i] * vs1[i]) + vd[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(vs1, vs2, vd);
+},
+{
   vd = f32_mulAdd(vs1, vs2, vd);
 },
 {
diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h
index 7707daec..2a014295 100644
--- a/riscv/insns/vfmadd_vf.h
+++ b/riscv/insns/vfmadd_vf.h
@@ -1,6 +1,9 @@
 // vfmadd: vd[i] = +(vd[i] * f[rs1]) + vs2[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(vd, rs1, vs2);
+},
+{
   vd = f32_mulAdd(vd, rs1, vs2);
 },
 {
diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h
index a095c38d..7ef734f8 100644
--- a/riscv/insns/vfmadd_vv.h
+++ b/riscv/insns/vfmadd_vv.h
@@ -1,6 +1,9 @@
 // vfmadd: vd[i] = +(vd[i] * vs1[i]) + vs2[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(vd, vs1, vs2);
+},
+{
   vd = f32_mulAdd(vd, vs1, vs2);
 },
 {
diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h
index a8df880f..c4b74cbd 100644
--- a/riscv/insns/vfmax_vf.h
+++ b/riscv/insns/vfmax_vf.h
@@ -1,6 +1,9 @@
 // vfmax
 VI_VFP_VF_LOOP
 ({
+  vd = f16_max(vs2, rs1);
+},
+{
   vd = f32_max(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h
index 2329e746..6439c899 100644
--- a/riscv/insns/vfmax_vv.h
+++ b/riscv/insns/vfmax_vv.h
@@ -1,6 +1,9 @@
 // vfmax
 VI_VFP_VV_LOOP
 ({
+  vd = f16_max(vs2, vs1);
+},
+{
   vd = f32_max(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h
index 33dacfae..bd00e326 100644
--- a/riscv/insns/vfmerge_vfm.h
+++ b/riscv/insns/vfmerge_vfm.h
@@ -3,7 +3,20 @@ VI_CHECK_SSS(false);
 VI_VFP_COMMON;
 
 switch(P.VU.vsew) {
-  case 32:
+  case e16:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
+      auto rs1 = f16(READ_FREG(rs1_num));
+      auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+
+      int midx = (P.VU.vmlen * i) / 64;
+      int mpos = (P.VU.vmlen * i) % 64;
+      bool use_first = (P.VU.elt<uint64_t>(0, midx) >> mpos) & 0x1;
+
+      vd = use_first ? rs1 : vs2;
+    }
+    break;
+  case e32:
     for (reg_t i=P.VU.vstart; i<vl; ++i) {
       auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
       auto rs1 = f32(READ_FREG(rs1_num));
@@ -16,7 +29,7 @@ switch(P.VU.vsew) {
       vd = use_first ? rs1 : vs2;
     }
     break;
-  case 64:
+  case e64:
     for (reg_t i=P.VU.vstart; i<vl; ++i) {
       auto &vd = P.VU.elt<float64_t>(rd_num, i, true);
       auto rs1 = f64(READ_FREG(rs1_num));
diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h
index a55462b6..1560cdf7 100644
--- a/riscv/insns/vfmin_vf.h
+++ b/riscv/insns/vfmin_vf.h
@@ -1,6 +1,9 @@
 // vfmin vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
+  vd = f16_min(vs2, rs1);
+},
+{
   vd = f32_min(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h
index 399b5631..882a7740 100644
--- a/riscv/insns/vfmin_vv.h
+++ b/riscv/insns/vfmin_vv.h
@@ -1,6 +1,9 @@
 // vfmin vd, vs2, vs1
 VI_VFP_VV_LOOP
 ({
+  vd = f16_min(vs2, vs1);
+},
+{
   vd = f32_min(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h
index 0f42560e..8af397b9 100644
--- a/riscv/insns/vfmsac_vf.h
+++ b/riscv/insns/vfmsac_vf.h
@@ -1,6 +1,9 @@
 // vfmsac: vd[i] = +(f[rs1] * vs2[i]) - vd[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(rs1, vs2, f16(vd.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h
index 9b4ed9f1..3bb50e50 100644
--- a/riscv/insns/vfmsac_vv.h
+++ b/riscv/insns/vfmsac_vv.h
@@ -1,6 +1,9 @@
 // vfmsac: vd[i] = +(vs1[i] * vs2[i]) - vd[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(vs1, vs2, f16(vd.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h
index bd968e3d..ab77b4c6 100644
--- a/riscv/insns/vfmsub_vf.h
+++ b/riscv/insns/vfmsub_vf.h
@@ -1,6 +1,9 @@
 // vfmsub: vd[i] = +(vd[i] * f[rs1]) - vs2[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(vd, rs1, f16(vs2.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h
index f8e0b3dc..3cac937f 100644
--- a/riscv/insns/vfmsub_vv.h
+++ b/riscv/insns/vfmsub_vv.h
@@ -1,6 +1,9 @@
 // vfmsub: vd[i] = +(vd[i] * vs1[i]) - vs2[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(vd, vs1, f16(vs2.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h
index 9e7d481a..f5f63e49 100644
--- a/riscv/insns/vfmul_vf.h
+++ b/riscv/insns/vfmul_vf.h
@@ -1,6 +1,9 @@
 // vfmul.vf vd, vs2, rs1, vm
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mul(vs2, rs1);
+},
+{
   vd = f32_mul(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h
index 0e4d499e..7930fd03 100644
--- a/riscv/insns/vfmul_vv.h
+++ b/riscv/insns/vfmul_vv.h
@@ -1,6 +1,9 @@
 // vfmul.vv vd, vs1, vs2, vm
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mul(vs1, vs2);
+},
+{
   vd = f32_mul(vs1, vs2);
 },
 {
diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h
index 586b80ee..2f82ce8e 100644
--- a/riscv/insns/vfmv_f_s.h
+++ b/riscv/insns/vfmv_f_s.h
@@ -1,24 +1,31 @@
 // vfmv_f_s: rd = vs2[0] (rs1=0)
 require_vector;
 require_fp;
-require_extension('F');
-require(P.VU.vsew == e32 || P.VU.vsew == e64);
+require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
+        (P.VU.vsew == e32 && p->supports_extension('F')) ||
+        (P.VU.vsew == e64 && p->supports_extension('D')));
 
 reg_t rs2_num = insn.rs2();
 uint64_t vs2_0 = 0;
 const reg_t sew = P.VU.vsew;
 switch(sew) {
-case e32:
-  vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
-  break;
-default:
-  vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
-  break;
+  case e16:
+    vs2_0 = P.VU.elt<uint16_t>(rs2_num, 0);
+    break;
+  case e32:
+    vs2_0 = P.VU.elt<uint32_t>(rs2_num, 0);
+    break;
+  case e64:
+    vs2_0 = P.VU.elt<uint64_t>(rs2_num, 0);
+    break;
+  default:
+    require(0);
+    break;
 }
 
 // nan_extened
 if (FLEN > sew) {
-  vs2_0 = vs2_0 | ~((uint64_t(1) << sew) - 1);
+  vs2_0 = vs2_0 | (UINT64_MAX << sew);
 }
 
 if (FLEN == 64) {
diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h
index 84c5a3f1..d29e2457 100644
--- a/riscv/insns/vfmv_s_f.h
+++ b/riscv/insns/vfmv_s_f.h
@@ -1,21 +1,29 @@
 // vfmv_s_f: vd[0] = rs1 (vs2=0)
 require_vector;
 require_fp;
-require_extension('F');
-require(P.VU.vsew >= e32 && P.VU.vsew <= 64);
+require((P.VU.vsew == e16 && p->supports_extension(EXT_ZFH)) ||
+        (P.VU.vsew == e32 && p->supports_extension('F')) ||
+        (P.VU.vsew == e64 && p->supports_extension('D')));
+
 reg_t vl = P.VU.vl;
 
 if (vl > 0 && P.VU.vstart < vl) {
   reg_t rd_num = insn.rd();
 
   switch(P.VU.vsew) {
-    case 32:
+    case e16:
+      if (FLEN == 64)
+        P.VU.elt<uint16_t>(rd_num, 0, true) = f64(FRS1).v;
+      else
+        P.VU.elt<uint16_t>(rd_num, 0, true) = f32(FRS1).v;
+      break;
+    case e32:
       if (FLEN == 64)
         P.VU.elt<uint32_t>(rd_num, 0, true) = f64(FRS1).v;
       else
         P.VU.elt<uint32_t>(rd_num, 0, true) = f32(FRS1).v;
       break;
-    case 64:
+    case e64:
       if (FLEN == 64)
         P.VU.elt<uint64_t>(rd_num, 0, true) = f64(FRS1).v;
       else
diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h
index f85a26a6..e4cdec4c 100644
--- a/riscv/insns/vfmv_v_f.h
+++ b/riscv/insns/vfmv_v_f.h
@@ -2,6 +2,14 @@
 require((insn.rd() & (P.VU.vlmul - 1)) == 0);
 VI_VFP_COMMON
 switch(P.VU.vsew) {
+  case e16:
+    for (reg_t i=P.VU.vstart; i<vl; ++i) {
+      auto &vd = P.VU.elt<float16_t>(rd_num, i, true);
+      auto rs1 = f16(READ_FREG(rs1_num));
+
+      vd = rs1;
+    }
+    break;
   case e32:
     for (reg_t i=P.VU.vstart; i<vl; ++i) {
       auto &vd = P.VU.elt<float32_t>(rd_num, i, true);
diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h
index 3a9ead32..d5d5de63 100644
--- a/riscv/insns/vfncvt_f_f_w.h
+++ b/riscv/insns/vfncvt_f_f_w.h
@@ -1,10 +1,11 @@
 // vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
+},
+{
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
+
diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h
index c3283953..25faa1e0 100644
--- a/riscv/insns/vfncvt_f_x_w.h
+++ b/riscv/insns/vfncvt_f_x_w.h
@@ -1,10 +1,10 @@
 // vfncvt.f.x.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
+  P.VU.elt<float16_t>(rd_num, i, true) = i32_to_f16(vs2);
+},
+{
   auto vs2 = P.VU.elt<int64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i, true) = i64_to_f32(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h
index c6746440..a8b62fec 100644
--- a/riscv/insns/vfncvt_f_xu_w.h
+++ b/riscv/insns/vfncvt_f_xu_w.h
@@ -1,10 +1,10 @@
 // vfncvt.f.xu.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
+  P.VU.elt<float16_t>(rd_num, i, true) = ui32_to_f16(vs2);
+},
+{
   auto vs2 = P.VU.elt<uint64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i, true) = ui64_to_f32(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h
index 0eae343f..864b7846 100644
--- a/riscv/insns/vfncvt_rod_f_f_w.h
+++ b/riscv/insns/vfncvt_rod_f_f_w.h
@@ -1,11 +1,12 @@
-// vfncvt.f.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfncvt.rod.f.f.v vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  softfloat_roundingMode = softfloat_round_odd;
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<float16_t>(rd_num, i, true) = f32_to_f16(vs2);
+},
+{
   softfloat_roundingMode = softfloat_round_odd;
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<float32_t>(rd_num, i, true) = f64_to_f32(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_rtz_x_f_w.h b/riscv/insns/vfncvt_rtz_x_f_w.h
index 2fc8edf2..70c66d19 100644
--- a/riscv/insns/vfncvt_rtz_x_f_w.h
+++ b/riscv/insns/vfncvt_rtz_x_f_w.h
@@ -1,10 +1,10 @@
-// vfncvt.x.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfncvt.rtz.x.f.w vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, softfloat_round_minMag, true);
+},
+{
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, softfloat_round_minMag, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_rtz_xu_f_w.h b/riscv/insns/vfncvt_rtz_xu_f_w.h
index 8cd68e29..98f1c4b6 100644
--- a/riscv/insns/vfncvt_rtz_xu_f_w.h
+++ b/riscv/insns/vfncvt_rtz_xu_f_w.h
@@ -1,10 +1,10 @@
-// vfncvt.xu.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfncvt.rtz.xu.f.w vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, softfloat_round_minMag, true);
+},
+{
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, softfloat_round_minMag, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h
index d6728bd1..3ddbb875 100644
--- a/riscv/insns/vfncvt_x_f_w.h
+++ b/riscv/insns/vfncvt_x_f_w.h
@@ -1,10 +1,10 @@
-// vfncvt.x.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfncvt.x.f.w vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<int16_t>(rd_num, i, true) = f32_to_i16(vs2, STATE.frm, true);
+},
+{
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<int32_t>(rd_num, i, true) = f64_to_i32(vs2, STATE.frm, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h
index f2cce241..3b7d4735 100644
--- a/riscv/insns/vfncvt_xu_f_w.h
+++ b/riscv/insns/vfncvt_xu_f_w.h
@@ -1,10 +1,10 @@
-// vfncvt.xu.f.v vd, vs2, vm
-VI_CHECK_SDS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfncvt.xu.f.w vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
+  P.VU.elt<uint16_t>(rd_num, i, true) = f32_to_ui16(vs2, STATE.frm, true);
+},
+{
   auto vs2 = P.VU.elt<float64_t>(rs2_num, i);
   P.VU.elt<uint32_t>(rd_num, i, true) = f64_to_ui32(vs2, STATE.frm, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_END
+}, false)
diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h
index da58d3aa..1b99302c 100644
--- a/riscv/insns/vfnmacc_vf.h
+++ b/riscv/insns/vfnmacc_vf.h
@@ -1,6 +1,9 @@
 // vfnmacc: vd[i] = -(f[rs1] * vs2[i]) - vd[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), f16(vd.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h
index 62a14861..7200e063 100644
--- a/riscv/insns/vfnmacc_vv.h
+++ b/riscv/insns/vfnmacc_vv.h
@@ -1,6 +1,9 @@
 // vfnmacc: vd[i] = -(vs1[i] * vs2[i]) - vd[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(f16(vs2.v ^ F16_SIGN), vs1, f16(vd.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h
index b26f3775..cb9c217f 100644
--- a/riscv/insns/vfnmadd_vf.h
+++ b/riscv/insns/vfnmadd_vf.h
@@ -1,6 +1,9 @@
 // vfnmadd: vd[i] = -(vd[i] * f[rs1]) - vs2[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, f16(vs2.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h
index fc705743..7160ed7d 100644
--- a/riscv/insns/vfnmadd_vv.h
+++ b/riscv/insns/vfnmadd_vv.h
@@ -1,6 +1,9 @@
 // vfnmadd: vd[i] = -(vd[i] * vs1[i]) - vs2[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, f16(vs2.v ^ F16_SIGN));
+},
+{
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN));
 },
 {
diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h
index b78d0cac..aa6baa30 100644
--- a/riscv/insns/vfnmsac_vf.h
+++ b/riscv/insns/vfnmsac_vf.h
@@ -1,6 +1,9 @@
 // vfnmsac: vd[i] = -(f[rs1] * vs2[i]) + vd[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(rs1, f16(vs2.v ^ F16_SIGN), vd);
+},
+{
   vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd);
 },
 {
diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h
index 795dc384..47db61d2 100644
--- a/riscv/insns/vfnmsac_vv.h
+++ b/riscv/insns/vfnmsac_vv.h
@@ -1,6 +1,9 @@
 // vfnmsac.vv vd, vs1, vs2, vm   # vd[i] = -(vs2[i] * vs1[i]) + vd[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(f16(vs1.v ^ F16_SIGN), vs2, vd);
+},
+{
   vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
 },
 {
diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h
index 6c6dc27d..43aa9e26 100644
--- a/riscv/insns/vfnmsub_vf.h
+++ b/riscv/insns/vfnmsub_vf.h
@@ -1,6 +1,9 @@
 // vfnmsub: vd[i] = -(vd[i] * f[rs1]) + vs2[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), rs1, vs2);
+},
+{
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2);
 },
 {
diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h
index ff4a9b59..2a45c8fc 100644
--- a/riscv/insns/vfnmsub_vv.h
+++ b/riscv/insns/vfnmsub_vv.h
@@ -1,6 +1,9 @@
 // vfnmsub: vd[i] = -(vd[i] * vs1[i]) + vs2[i]
 VI_VFP_VV_LOOP
 ({
+  vd = f16_mulAdd(f16(vd.v ^ F16_SIGN), vs1, vs2);
+},
+{
   vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2);
 },
 {
diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h
index 73ec5346..b283343c 100644
--- a/riscv/insns/vfrdiv_vf.h
+++ b/riscv/insns/vfrdiv_vf.h
@@ -1,6 +1,9 @@
 // vfrdiv.vf vd, vs2, rs1, vm  # scalar-vector, vd[i] = f[rs1]/vs2[i]
 VI_VFP_VF_LOOP
 ({
+  vd = f16_div(rs1, vs2);
+},
+{
   vd = f32_div(rs1, vs2);
 },
 {
diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h
index cb03dbb0..1ab856b0 100644
--- a/riscv/insns/vfredmax_vs.h
+++ b/riscv/insns/vfredmax_vs.h
@@ -1,6 +1,9 @@
 // vfredmax vd, vs2, vs1
 VI_VFP_VV_LOOP_REDUCTION
 ({
+  vd_0 = f16_max(vd_0, vs2);
+},
+{
   vd_0 = f32_max(vd_0, vs2);
 },
 {
diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h
index 51c0bcb0..37256cbc 100644
--- a/riscv/insns/vfredmin_vs.h
+++ b/riscv/insns/vfredmin_vs.h
@@ -1,6 +1,9 @@
 // vfredmin vd, vs2, vs1
 VI_VFP_VV_LOOP_REDUCTION
 ({
+  vd_0 = f16_min(vd_0, vs2);
+},
+{
   vd_0 = f32_min(vd_0, vs2);
 },
 {
diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h
index 7de6dbb4..4564f9c6 100644
--- a/riscv/insns/vfredosum_vs.h
+++ b/riscv/insns/vfredosum_vs.h
@@ -1,6 +1,9 @@
 // vfredosum: vd[0] =  sum( vs2[*] , vs1[0] )
 VI_VFP_VV_LOOP_REDUCTION
 ({
+  vd_0 = f16_add(vd_0, vs2);
+},
+{
   vd_0 = f32_add(vd_0, vs2);
 },
 {
diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h
index 7b5cccce..d18b63ed 100644
--- a/riscv/insns/vfredsum_vs.h
+++ b/riscv/insns/vfredsum_vs.h
@@ -1,6 +1,9 @@
 // vfredsum: vd[0] =  sum( vs2[*] , vs1[0] )
 VI_VFP_VV_LOOP_REDUCTION
 ({
+  vd_0 = f16_add(vd_0, vs2);
+},
+{
   vd_0 = f32_add(vd_0, vs2);
 },
 {
diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h
index d9a19863..7fb26a5b 100644
--- a/riscv/insns/vfrsub_vf.h
+++ b/riscv/insns/vfrsub_vf.h
@@ -1,6 +1,9 @@
 // vfsub.vf vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
+  vd = f16_sub(rs1, vs2);
+},
+{
   vd = f32_sub(rs1, vs2);
 },
 {
diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h
index c7f731ba..ce06185e 100644
--- a/riscv/insns/vfsgnj_vf.h
+++ b/riscv/insns/vfsgnj_vf.h
@@ -1,6 +1,9 @@
 // vfsgnj vd, vs2, vs1
 VI_VFP_VF_LOOP
 ({
+  vd = fsgnj16(vs2.v, rs1.v, false, false);
+},
+{
   vd = fsgnj32(vs2.v, rs1.v, false, false);
 },
 {
diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h
index 12d3d437..722cb29c 100644
--- a/riscv/insns/vfsgnj_vv.h
+++ b/riscv/insns/vfsgnj_vv.h
@@ -1,6 +1,9 @@
 // vfsgnj
 VI_VFP_VV_LOOP
 ({
+  vd = fsgnj16(vs2.v, vs1.v, false, false);
+},
+{
   vd = fsgnj32(vs2.v, vs1.v, false, false);
 },
 {
diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h
index 45117481..e4894124 100644
--- a/riscv/insns/vfsgnjn_vf.h
+++ b/riscv/insns/vfsgnjn_vf.h
@@ -1,6 +1,9 @@
 // vfsgnn
 VI_VFP_VF_LOOP
 ({
+  vd = fsgnj16(vs2.v, rs1.v, true, false);
+},
+{
   vd = fsgnj32(vs2.v, rs1.v, true, false);
 },
 {
diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h
index a16acf7a..1d91f691 100644
--- a/riscv/insns/vfsgnjn_vv.h
+++ b/riscv/insns/vfsgnjn_vv.h
@@ -1,6 +1,9 @@
 // vfsgnn
 VI_VFP_VV_LOOP
 ({
+  vd = fsgnj16(vs2.v, vs1.v, true, false);
+},
+{
   vd = fsgnj32(vs2.v, vs1.v, true, false);
 },
 {
diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h
index c4230600..7be164c7 100644
--- a/riscv/insns/vfsgnjx_vf.h
+++ b/riscv/insns/vfsgnjx_vf.h
@@ -1,6 +1,9 @@
 // vfsgnx
 VI_VFP_VF_LOOP
 ({
+  vd = fsgnj16(vs2.v, rs1.v, false, true);
+},
+{
   vd = fsgnj32(vs2.v, rs1.v, false, true);
 },
 {
diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h
index 9dbe0780..b04b8454 100644
--- a/riscv/insns/vfsgnjx_vv.h
+++ b/riscv/insns/vfsgnjx_vv.h
@@ -1,6 +1,9 @@
 // vfsgnx
 VI_VFP_VV_LOOP
 ({
+  vd = fsgnj16(vs2.v, vs1.v, false, true);
+},
+{
   vd = fsgnj32(vs2.v, vs1.v, false, true);
 },
 {
diff --git a/riscv/insns/vfslide1down_vf.h b/riscv/insns/vfslide1down_vf.h
index 43fedb0d..b2ae3457 100644
--- a/riscv/insns/vfslide1down_vf.h
+++ b/riscv/insns/vfslide1down_vf.h
@@ -4,6 +4,11 @@ VI_CHECK_SLIDE(false);
 VI_VFP_LOOP_BASE
 if (i != vl - 1) {
   switch (P.VU.vsew) {
+    case e16: {
+      VI_XI_SLIDEDOWN_PARAMS(e16, 1);
+      vd = vs2;
+    }
+    break;
     case e32: {
       VI_XI_SLIDEDOWN_PARAMS(e32, 1);
       vd = vs2;
@@ -17,6 +22,9 @@ if (i != vl - 1) {
   }
 } else {
   switch (P.VU.vsew) {
+    case e16:
+      P.VU.elt<float16_t>(rd_num, vl - 1) = f16(FRS1);
+      break;
     case e32:
       P.VU.elt<float32_t>(rd_num, vl - 1, true) = f32(FRS1);
       break;
diff --git a/riscv/insns/vfslide1up_vf.h b/riscv/insns/vfslide1up_vf.h
index e0174d65..7012fc1e 100644
--- a/riscv/insns/vfslide1up_vf.h
+++ b/riscv/insns/vfslide1up_vf.h
@@ -4,6 +4,11 @@ VI_CHECK_SLIDE(true);
 VI_VFP_LOOP_BASE
 if (i != 0) {
   switch (P.VU.vsew) {
+    case e16: {
+      VI_XI_SLIDEUP_PARAMS(e16, 1);
+      vd = vs2;
+    }
+    break;
     case e32: {
       VI_XI_SLIDEUP_PARAMS(e32, 1);
       vd = vs2;
@@ -17,6 +22,9 @@ if (i != 0) {
   }
 } else {
   switch (P.VU.vsew) {
+    case e16:
+      P.VU.elt<float16_t>(rd_num, 0) = f16(FRS1);
+      break;
     case e32:
       P.VU.elt<float32_t>(rd_num, 0, true) = f32(FRS1);
       break;
diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h
index 4a36932e..f1213088 100644
--- a/riscv/insns/vfsqrt_v.h
+++ b/riscv/insns/vfsqrt_v.h
@@ -1,6 +1,9 @@
 // vsqrt.v vd, vd2, vm
 VI_VFP_VV_LOOP
 ({
+  vd = f16_sqrt(vs2);
+},
+{
   vd = f32_sqrt(vs2);
 },
 {
diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h
index a4702d04..fc6877ca 100644
--- a/riscv/insns/vfsub_vf.h
+++ b/riscv/insns/vfsub_vf.h
@@ -1,6 +1,9 @@
 // vfsub.vf vd, vs2, rs1
 VI_VFP_VF_LOOP
 ({
+  vd = f16_sub(vs2, rs1);
+},
+{
   vd = f32_sub(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h
index 40545fb6..b0403f11 100644
--- a/riscv/insns/vfsub_vv.h
+++ b/riscv/insns/vfsub_vv.h
@@ -1,6 +1,9 @@
 // vfsub.vv vd, vs2, vs1
 VI_VFP_VV_LOOP
 ({
+  vd = f16_sub(vs2, vs1);
+},
+{
   vd = f32_sub(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vfwadd_vf.h b/riscv/insns/vfwadd_vf.h
index ecac2029..b8249001 100644
--- a/riscv/insns/vfwadd_vf.h
+++ b/riscv/insns/vfwadd_vf.h
@@ -1,5 +1,8 @@
 // vfwadd.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_add(vs2, rs1);
+},
+{
   vd = f64_add(vs2, rs1);
 })
diff --git a/riscv/insns/vfwadd_vv.h b/riscv/insns/vfwadd_vv.h
index 0665cdcd..7255a50e 100644
--- a/riscv/insns/vfwadd_vv.h
+++ b/riscv/insns/vfwadd_vv.h
@@ -1,5 +1,8 @@
 // vfwadd.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_add(vs2, vs1);
+},
+{
   vd = f64_add(vs2, vs1);
 })
diff --git a/riscv/insns/vfwadd_wf.h b/riscv/insns/vfwadd_wf.h
index eb38d0db..021b17f0 100644
--- a/riscv/insns/vfwadd_wf.h
+++ b/riscv/insns/vfwadd_wf.h
@@ -1,5 +1,8 @@
 // vfwadd.wf vd, vs2, vs1
 VI_VFP_WF_LOOP_WIDE
 ({
+  vd = f32_add(vs2, rs1);
+},
+{
   vd = f64_add(vs2, rs1);
 })
diff --git a/riscv/insns/vfwadd_wv.h b/riscv/insns/vfwadd_wv.h
index 675ef228..c1ed0389 100644
--- a/riscv/insns/vfwadd_wv.h
+++ b/riscv/insns/vfwadd_wv.h
@@ -1,5 +1,8 @@
 // vfwadd.wv vd, vs2, vs1
 VI_VFP_WV_LOOP_WIDE
 ({
+  vd = f32_add(vs2, vs1);
+},
+{
   vd = f64_add(vs2, vs1);
 })
diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h
index 424f0f41..abb68a42 100644
--- a/riscv/insns/vfwcvt_f_f_v.h
+++ b/riscv/insns/vfwcvt_f_f_v.h
@@ -1,10 +1,10 @@
 // vfwcvt.f.f.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+  P.VU.elt<float32_t>(rd_num, i, true) = f16_to_f32(vs2);
+},
+{
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i, true) = f32_to_f64(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h
index 1c05ab75..62cd8e8f 100644
--- a/riscv/insns/vfwcvt_f_x_v.h
+++ b/riscv/insns/vfwcvt_f_x_v.h
@@ -1,10 +1,10 @@
 // vfwcvt.f.x.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<int16_t>(rs2_num, i);
+  P.VU.elt<float32_t>(rd_num, i, true) = i32_to_f32(vs2);
+},
+{
   auto vs2 = P.VU.elt<int32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i, true) = i32_to_f64(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h
index fcb8c0c2..36a81edb 100644
--- a/riscv/insns/vfwcvt_f_xu_v.h
+++ b/riscv/insns/vfwcvt_f_xu_v.h
@@ -1,10 +1,10 @@
 // vfwcvt.f.xu.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<uint16_t>(rs2_num, i);
+  P.VU.elt<float32_t>(rd_num, i, true) = ui32_to_f32(vs2);
+},
+{
   auto vs2 = P.VU.elt<uint32_t>(rs2_num, i);
   P.VU.elt<float64_t>(rd_num, i, true) = ui32_to_f64(vs2);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_rtz_x_f_v.h b/riscv/insns/vfwcvt_rtz_x_f_v.h
index afbe939f..ad3a90d4 100644
--- a/riscv/insns/vfwcvt_rtz_x_f_v.h
+++ b/riscv/insns/vfwcvt_rtz_x_f_v.h
@@ -1,10 +1,10 @@
-// vfwcvt.x.f.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfwcvt.rtz.x.f.v vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+  P.VU.elt<int32_t>(rd_num, i, true) = f16_to_i32(vs2, softfloat_round_minMag, true);
+},
+{
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<int64_t>(rd_num, i, true) = f32_to_i64(vs2, softfloat_round_minMag, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_rtz_xu_f_v.h b/riscv/insns/vfwcvt_rtz_xu_f_v.h
index e3e78fff..297008f8 100644
--- a/riscv/insns/vfwcvt_rtz_xu_f_v.h
+++ b/riscv/insns/vfwcvt_rtz_xu_f_v.h
@@ -1,10 +1,10 @@
-// vfwcvt.xu.f.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+// vfwcvt.rtz,xu.f.v vd, vs2, vm
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+  P.VU.elt<uint32_t>(rd_num, i, true) = f16_to_ui32(vs2, softfloat_round_minMag, true);
+},
+{
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<uint64_t>(rd_num, i, true) = f32_to_ui64(vs2, softfloat_round_minMag, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h
index 3df8256b..9b798939 100644
--- a/riscv/insns/vfwcvt_x_f_v.h
+++ b/riscv/insns/vfwcvt_x_f_v.h
@@ -1,10 +1,10 @@
 // vfwcvt.x.f.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+  P.VU.elt<int32_t>(rd_num, i, true) = f16_to_i32(vs2, STATE.frm, true);
+},
+{
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<int64_t>(rd_num, i, true) = f32_to_i64(vs2, STATE.frm, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h
index 6e39b7dc..bfe16ff0 100644
--- a/riscv/insns/vfwcvt_xu_f_v.h
+++ b/riscv/insns/vfwcvt_xu_f_v.h
@@ -1,10 +1,10 @@
 // vfwcvt.xu.f.v vd, vs2, vm
-VI_CHECK_DSS(false);
-if (P.VU.vsew == e32)
-  require(p->supports_extension('D'));
-
-VI_VFP_LOOP_BASE
+VI_VFP_CVT_SCALE
+({
+  auto vs2 = P.VU.elt<float16_t>(rs2_num, i);
+  P.VU.elt<uint32_t>(rd_num, i, true) = f16_to_ui32(vs2, STATE.frm, true);
+},
+{
   auto vs2 = P.VU.elt<float32_t>(rs2_num, i);
   P.VU.elt<uint64_t>(rd_num, i, true) = f32_to_ui64(vs2, STATE.frm, true);
-  set_fp_exceptions;
-VI_VFP_LOOP_WIDE_END
+}, true)
diff --git a/riscv/insns/vfwmacc_vf.h b/riscv/insns/vfwmacc_vf.h
index 6ee011e7..441fa0a7 100644
--- a/riscv/insns/vfwmacc_vf.h
+++ b/riscv/insns/vfwmacc_vf.h
@@ -1,5 +1,8 @@
 // vfwmacc.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_mulAdd(rs1, vs2, vd);
+},
+{
   vd = f64_mulAdd(rs1, vs2, vd);
 })
diff --git a/riscv/insns/vfwmacc_vv.h b/riscv/insns/vfwmacc_vv.h
index 99839afc..a654198b 100644
--- a/riscv/insns/vfwmacc_vv.h
+++ b/riscv/insns/vfwmacc_vv.h
@@ -1,5 +1,8 @@
 // vfwmacc.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_mulAdd(vs1, vs2, vd);
+},
+{
   vd = f64_mulAdd(vs1, vs2, vd);
 })
diff --git a/riscv/insns/vfwmsac_vf.h b/riscv/insns/vfwmsac_vf.h
index ea8f0500..18010ff4 100644
--- a/riscv/insns/vfwmsac_vf.h
+++ b/riscv/insns/vfwmsac_vf.h
@@ -1,5 +1,8 @@
 // vfwmsac.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
   vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfwmsac_vv.h b/riscv/insns/vfwmsac_vv.h
index 81571706..9dc4073f 100644
--- a/riscv/insns/vfwmsac_vv.h
+++ b/riscv/insns/vfwmsac_vv.h
@@ -1,5 +1,8 @@
 // vfwmsac.vv  vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN));
+},
+{
   vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfwmul_vf.h b/riscv/insns/vfwmul_vf.h
index 884e66fe..2bb543f6 100644
--- a/riscv/insns/vfwmul_vf.h
+++ b/riscv/insns/vfwmul_vf.h
@@ -1,5 +1,8 @@
 // vfwmul.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_mul(vs2, rs1);
+},
+{
   vd = f64_mul(vs2, rs1);
 })
diff --git a/riscv/insns/vfwmul_vv.h b/riscv/insns/vfwmul_vv.h
index f8e717e9..2ce38e62 100644
--- a/riscv/insns/vfwmul_vv.h
+++ b/riscv/insns/vfwmul_vv.h
@@ -1,5 +1,8 @@
 // vfwmul.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_mul(vs2, vs1);
+},
+{
   vd = f64_mul(vs2, vs1);
 })
diff --git a/riscv/insns/vfwnmacc_vf.h b/riscv/insns/vfwnmacc_vf.h
index bccc24ff..038bda08 100644
--- a/riscv/insns/vfwnmacc_vf.h
+++ b/riscv/insns/vfwnmacc_vf.h
@@ -1,5 +1,8 @@
 // vfwnmacc.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN));
+},
+{
   vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfwnmacc_vv.h b/riscv/insns/vfwnmacc_vv.h
index 3dcba1d7..bf863e04 100644
--- a/riscv/insns/vfwnmacc_vv.h
+++ b/riscv/insns/vfwnmacc_vv.h
@@ -1,5 +1,8 @@
 // vfwnmacc.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, f32(vd.v ^ F32_SIGN));
+},
+{
   vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, f64(vd.v ^ F64_SIGN));
 })
diff --git a/riscv/insns/vfwnmsac_vf.h b/riscv/insns/vfwnmsac_vf.h
index 32ef6241..1e288e1b 100644
--- a/riscv/insns/vfwnmsac_vf.h
+++ b/riscv/insns/vfwnmsac_vf.h
@@ -1,5 +1,8 @@
 // vfwnmacc.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_mulAdd(f32(rs1.v ^ F32_SIGN), vs2, vd);
+},
+{
   vd = f64_mulAdd(f64(rs1.v ^ F64_SIGN), vs2, vd);
 })
diff --git a/riscv/insns/vfwnmsac_vv.h b/riscv/insns/vfwnmsac_vv.h
index d2447e1a..ce97749e 100644
--- a/riscv/insns/vfwnmsac_vv.h
+++ b/riscv/insns/vfwnmsac_vv.h
@@ -1,5 +1,8 @@
 // vfwnmsac.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd);
+},
+{
   vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd);
 })
diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h
index 49c9ebfa..22fb4dfb 100644
--- a/riscv/insns/vfwredosum_vs.h
+++ b/riscv/insns/vfwredosum_vs.h
@@ -1,8 +1,8 @@
 // vfwredosum.vs vd, vs2, vs1
-require_vector;
-require(P.VU.vsew * 2 <= P.VU.ELEN);
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
 VI_VFP_VV_LOOP_WIDE_REDUCTION
 ({
+  vd_0 = f32_add(vd_0, vs2);
+},
+{
   vd_0 = f64_add(vd_0, vs2);
 })
diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h
index 3426ef85..277cf417 100644
--- a/riscv/insns/vfwredsum_vs.h
+++ b/riscv/insns/vfwredsum_vs.h
@@ -1,8 +1,8 @@
 // vfwredsum.vs vd, vs2, vs1
-require_vector;
-require(P.VU.vsew * 2 <= P.VU.ELEN);
-require((insn.rs2() & (P.VU.vlmul - 1)) == 0);
 VI_VFP_VV_LOOP_WIDE_REDUCTION
 ({
+  vd_0 = f32_add(vd_0, vs2);
+},
+{
   vd_0 = f64_add(vd_0, vs2);
 })
diff --git a/riscv/insns/vfwsub_vf.h b/riscv/insns/vfwsub_vf.h
index 1d20c389..8c376884 100644
--- a/riscv/insns/vfwsub_vf.h
+++ b/riscv/insns/vfwsub_vf.h
@@ -1,5 +1,8 @@
 // vfwsub.vf vd, vs2, rs1
 VI_VFP_VF_LOOP_WIDE
 ({
+  vd = f32_sub(vs2, rs1);
+},
+{
   vd = f64_sub(vs2, rs1);
 })
diff --git a/riscv/insns/vfwsub_vv.h b/riscv/insns/vfwsub_vv.h
index 0a72feae..ce08e36a 100644
--- a/riscv/insns/vfwsub_vv.h
+++ b/riscv/insns/vfwsub_vv.h
@@ -1,5 +1,8 @@
 // vfwsub.vv vd, vs2, vs1
 VI_VFP_VV_LOOP_WIDE
 ({
+  vd = f32_sub(vs2, vs1);
+},
+{
   vd = f64_sub(vs2, vs1);
 })
diff --git a/riscv/insns/vfwsub_wf.h b/riscv/insns/vfwsub_wf.h
index fa3d7470..f6f47ca5 100644
--- a/riscv/insns/vfwsub_wf.h
+++ b/riscv/insns/vfwsub_wf.h
@@ -1,5 +1,8 @@
 // vfwsub.wf vd, vs2, rs1
 VI_VFP_WF_LOOP_WIDE
 ({
+  vd = f32_sub(vs2, rs1);
+},
+{
   vd = f64_sub(vs2, rs1);
 })
diff --git a/riscv/insns/vfwsub_wv.h b/riscv/insns/vfwsub_wv.h
index 4c6fcf60..eef904dc 100644
--- a/riscv/insns/vfwsub_wv.h
+++ b/riscv/insns/vfwsub_wv.h
@@ -1,5 +1,8 @@
 // vfwsub.wv vd, vs2, vs1
 VI_VFP_WV_LOOP_WIDE
 ({
+  vd = f32_sub(vs2, vs1);
+},
+{
   vd = f64_sub(vs2, vs1);
 })
diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h
index 766f0ab3..040f2b0b 100644
--- a/riscv/insns/vmfeq_vf.h
+++ b/riscv/insns/vmfeq_vf.h
@@ -1,6 +1,9 @@
 // vmfeq.vf vd, vs2, fs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_eq(vs2, rs1);
+},
+{
   res = f32_eq(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h
index 19117fc7..fb24d132 100644
--- a/riscv/insns/vmfeq_vv.h
+++ b/riscv/insns/vmfeq_vv.h
@@ -1,6 +1,9 @@
 // vmfeq.vv vd, vs2, vs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_eq(vs2, vs1);
+},
+{
   res = f32_eq(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h
index c5f4c831..9e69855b 100644
--- a/riscv/insns/vmfge_vf.h
+++ b/riscv/insns/vmfge_vf.h
@@ -1,6 +1,9 @@
 // vmfge.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_le(rs1, vs2);
+},
+{
   res = f32_le(rs1, vs2);
 },
 {
diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h
index 53873002..bd5d99b7 100644
--- a/riscv/insns/vmfgt_vf.h
+++ b/riscv/insns/vmfgt_vf.h
@@ -1,6 +1,9 @@
 // vmfgt.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_lt(rs1, vs2);
+},
+{
   res = f32_lt(rs1, vs2);
 },
 {
diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h
index 1a3a7c4a..3d2852fc 100644
--- a/riscv/insns/vmfle_vf.h
+++ b/riscv/insns/vmfle_vf.h
@@ -1,6 +1,9 @@
 // vmfle.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_le(vs2, rs1);
+},
+{
   res = f32_le(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h
index 067f1a96..203ef210 100644
--- a/riscv/insns/vmfle_vv.h
+++ b/riscv/insns/vmfle_vv.h
@@ -1,6 +1,9 @@
 // vmfle.vv vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_le(vs2, vs1);
+},
+{
   res = f32_le(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h
index 248071d8..4780adc5 100644
--- a/riscv/insns/vmflt_vf.h
+++ b/riscv/insns/vmflt_vf.h
@@ -1,6 +1,9 @@
 // vmflt.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_lt(vs2, rs1);
+},
+{
   res = f32_lt(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h
index 71895df6..cdfc3fae 100644
--- a/riscv/insns/vmflt_vv.h
+++ b/riscv/insns/vmflt_vv.h
@@ -1,6 +1,9 @@
 // vmflt.vv vd, vs2, vs1
 VI_VFP_LOOP_CMP
 ({
+  res = f16_lt(vs2, vs1);
+},
+{
   res = f32_lt(vs2, vs1);
 },
 {
diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h
index afccbcb3..84016993 100644
--- a/riscv/insns/vmfne_vf.h
+++ b/riscv/insns/vmfne_vf.h
@@ -1,6 +1,9 @@
 // vmfne.vf vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = !f16_eq(vs2, rs1);
+},
+{
   res = !f32_eq(vs2, rs1);
 },
 {
diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h
index d5df60c8..50dfa9c0 100644
--- a/riscv/insns/vmfne_vv.h
+++ b/riscv/insns/vmfne_vv.h
@@ -1,6 +1,9 @@
 // vmfne.vv vd, vs2, rs1
 VI_VFP_LOOP_CMP
 ({
+  res = !f16_eq(vs2, vs1);
+},
+{
   res = !f32_eq(vs2, vs1);
 },
 {
diff --git a/riscv/processor.cc b/riscv/processor.cc
index 4240fcd6..bf3c40a1 100644
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@@ -273,6 +273,9 @@ void processor_t::parse_isa_string(const char* str)
   if (!supports_extension('I'))
     bad_isa_string(str, "'I' extension is required");
 
+  if (supports_extension(EXT_ZFH) && !supports_extension('F'))
+    bad_isa_string(str, "'Zfh' extension requires 'F'");
+
   if (supports_extension('D') && !supports_extension('F'))
     bad_isa_string(str, "'D' extension requires 'F'");
 
diff --git a/softfloat/f16_classify.c b/softfloat/f16_classify.c
new file mode 100755
index 00000000..9402ff13
--- /dev/null
+++ b/softfloat/f16_classify.c
@@ -0,0 +1,36 @@
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast16_t f16_classify( float16_t a )
+{
+    union ui16_f16 uA;
+    uint_fast16_t uiA;
+
+    uA.f = a;
+    uiA = uA.ui;
+
+    uint_fast16_t infOrNaN = expF16UI( uiA ) == 0x1F;
+    uint_fast16_t subnormalOrZero = expF16UI( uiA ) == 0;
+    bool sign = signF16UI( uiA );
+    bool fracZero = fracF16UI( uiA ) == 0;
+    bool isNaN = isNaNF16UI( uiA );
+    bool isSNaN = softfloat_isSigNaNF16UI( uiA );
+
+    return
+        (  sign && infOrNaN && fracZero )          << 0 |
+        (  sign && !infOrNaN && !subnormalOrZero ) << 1 |
+        (  sign && subnormalOrZero && !fracZero )  << 2 |
+        (  sign && subnormalOrZero && fracZero )   << 3 |
+        ( !sign && infOrNaN && fracZero )          << 7 |
+        ( !sign && !infOrNaN && !subnormalOrZero ) << 6 |
+        ( !sign && subnormalOrZero && !fracZero )  << 5 |
+        ( !sign && subnormalOrZero && fracZero )   << 4 |
+        ( isNaN &&  isSNaN )                       << 8 |
+        ( isNaN && !isSNaN )                       << 9;
+}
+
diff --git a/softfloat/f16_to_i16.c b/softfloat/f16_to_i16.c
new file mode 100644
index 00000000..0ec7ce14
--- /dev/null
+++ b/softfloat/f16_to_i16.c
@@ -0,0 +1,55 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast16_t f16_to_i16( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    int_fast32_t sig32 = f16_to_i32(a, roundingMode, exact);
+
+    if (sig32 > INT16_MAX) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return i16_fromPosOverflow;
+    } else if (sig32 < INT16_MIN) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return i16_fromNegOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/softfloat/f16_to_ui16.c b/softfloat/f16_to_ui16.c
new file mode 100644
index 00000000..818328a1
--- /dev/null
+++ b/softfloat/f16_to_ui16.c
@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast16_t f16_to_ui16( float16_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast32_t sig32 = f16_to_ui32(a, roundingMode, exact);
+
+    if (sig32 > UINT16_MAX) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return ui16_fromPosOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/softfloat/f32_to_i16.c b/softfloat/f32_to_i16.c
new file mode 100644
index 00000000..14ebc6b4
--- /dev/null
+++ b/softfloat/f32_to_i16.c
@@ -0,0 +1,55 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+int_fast16_t f32_to_i16( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+    int_fast32_t sig32 = f32_to_i32(a, roundingMode, exact);
+
+    if (sig32 > INT16_MAX) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return i16_fromPosOverflow;
+    } else if (sig32 < INT16_MIN) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return i16_fromNegOverflow;
+    } else {
+        return sig32;
+    }
+}
+
diff --git a/softfloat/f32_to_ui16.c b/softfloat/f32_to_ui16.c
new file mode 100644
index 00000000..a8f458da
--- /dev/null
+++ b/softfloat/f32_to_ui16.c
@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "specialize.h"
+#include "softfloat.h"
+
+uint_fast16_t f32_to_ui16( float32_t a, uint_fast8_t roundingMode, bool exact )
+{
+    uint_fast32_t sig32 = f32_to_ui32(a, roundingMode, exact);
+
+    if (sig32 > UINT16_MAX) {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        return ui16_fromPosOverflow;
+    } else {
+        return sig32;
+    }
+}
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h
index ddc39e34..5579c68a 100644
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -141,8 +141,10 @@ void i64_to_f128M( int64_t, float128_t * );
 /*----------------------------------------------------------------------------
 | 16-bit (half-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
+uint_fast16_t f16_to_ui16( float16_t, uint_fast8_t, bool );
 uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
 uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
+int_fast16_t f16_to_i16( float16_t, uint_fast8_t, bool );
 int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
 int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
 uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
@@ -174,12 +176,15 @@ bool f16_eq_signaling( float16_t, float16_t );
 bool f16_le_quiet( float16_t, float16_t );
 bool f16_lt_quiet( float16_t, float16_t );
 bool f16_isSignalingNaN( float16_t );
+uint_fast16_t f16_classify( float16_t );
 
 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
+uint_fast16_t f32_to_ui16( float32_t, uint_fast8_t, bool );
 uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
 uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
+int_fast16_t f32_to_i16( float32_t, uint_fast8_t, bool );
 int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
 int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
 uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
diff --git a/softfloat/softfloat.mk.in b/softfloat/softfloat.mk.in
index 52ee1dd0..56fc5602 100644
--- a/softfloat/softfloat.mk.in
+++ b/softfloat/softfloat.mk.in
@@ -38,6 +38,7 @@ softfloat_c_srcs = \
 	f128_to_ui64.c \
 	f128_to_ui64_r_minMag.c \
 	f16_add.c \
+	f16_classify.c \
 	f16_div.c \
 	f16_eq.c \
 	f16_eq_signaling.c \
@@ -55,10 +56,12 @@ softfloat_c_srcs = \
 	f16_to_f128.c \
 	f16_to_f32.c \
 	f16_to_f64.c \
+	f16_to_i16.c \
 	f16_to_i32.c \
 	f16_to_i32_r_minMag.c \
 	f16_to_i64.c \
 	f16_to_i64_r_minMag.c \
+	f16_to_ui16.c \
 	f16_to_ui32.c \
 	f16_to_ui32_r_minMag.c \
 	f16_to_ui64.c \
@@ -82,10 +85,12 @@ softfloat_c_srcs = \
 	f32_to_f128.c \
 	f32_to_f16.c \
 	f32_to_f64.c \
+	f32_to_i16.c \
 	f32_to_i32.c \
 	f32_to_i32_r_minMag.c \
 	f32_to_i64.c \
 	f32_to_i64_r_minMag.c \
+	f32_to_ui16.c \
 	f32_to_ui32.c \
 	f32_to_ui32_r_minMag.c \
 	f32_to_ui64.c \
diff --git a/softfloat/specialize.h b/softfloat/specialize.h
index 629d5185..8bd98570 100644
--- a/softfloat/specialize.h
+++ b/softfloat/specialize.h
@@ -55,6 +55,13 @@ extern "C" {
 | The values to return on conversions to 32-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
+#define ui16_fromPosOverflow 0xFFFF
+#define ui16_fromNegOverflow 0
+#define ui16_fromNaN         0xFFFF
+#define i16_fromPosOverflow  0x7FFF
+#define i16_fromNegOverflow  (-0x7FFF - 1)
+#define i16_fromNaN          0x7FFF
+
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0
 #define ui32_fromNaN         0xFFFFFFFF
diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc
index d8c9eb42..fe6be509 100644
--- a/spike_main/disasm.cc
+++ b/spike_main/disasm.cc
@@ -990,10 +990,10 @@ disassembler_t::disassembler_t(int xlen)
                 match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
     add_insn(new disasm_insn_t(#name "cvt.rtz.xu.f." #suf, \
-                match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \
+                match_##name##cvt_rtz_xu_f_##suf, mask_##name##cvt_rtz_xu_f_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
     add_insn(new disasm_insn_t(#name "cvt.rtz.x.f." #suf, \
-                match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \
+                match_##name##cvt_rtz_x_f_##suf, mask_##name##cvt_rtz_x_f_##suf, \
                 {&vd, &vs2, &opt, &vm})); \
 
   //OPFVV/OPFVF