Browse Source

Merge pull request #1321 from plctlab/plct-bf16-dev

Add support for BF16 extensions
pull/1375/head
Andrew Waterman 3 years ago
committed by GitHub
parent
commit
97fbfec1c2
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 3
      README.md
  2. 22
      disasm/disasm.cc
  3. 2
      riscv/decode_macros.h
  4. 23
      riscv/encoding.h
  5. 5
      riscv/insns/fcvt_bf16_s.h
  6. 5
      riscv/insns/fcvt_s_bf16.h
  7. 2
      riscv/insns/flh.h
  8. 2
      riscv/insns/fmv_h_x.h
  9. 2
      riscv/insns/fmv_x_h.h
  10. 2
      riscv/insns/fsh.h
  11. 5
      riscv/insns/vfncvtbf16_f_f_w.h
  12. 5
      riscv/insns/vfwcvtbf16_f_f_v.h
  13. 5
      riscv/insns/vfwmaccbf16_vf.h
  14. 5
      riscv/insns/vfwmaccbf16_vv.h
  15. 18
      riscv/isa_parser.cc
  16. 4
      riscv/isa_parser.h
  17. 22
      riscv/riscv.mk.in
  18. 76
      riscv/v_ext_macros.h
  19. 80
      softfloat/bf16_to_f32.c
  20. 92
      softfloat/f32_to_bf16.c
  21. 6
      softfloat/internals.h
  22. 113
      softfloat/s_roundPackToBF16.c
  23. 2
      softfloat/softfloat.h
  24. 3
      softfloat/softfloat.mk.in
  25. 1
      softfloat/softfloat_types.h
  26. 19
      softfloat/specialize.h

3
README.md

@ -49,6 +49,9 @@ Spike supports the following RISC-V ISA features:
- Zcd extension, v1.0
- Zcmp extension, v1.0
- Zcmt extension, v1.0
- Zfbfmin extension, v0.6
- Zvfbfmin extension, v0.6
- Zvfbfwma extension, v0.6
As a Spike extension, the remainder of the proposed
[Bit-Manipulation Extensions](https://github.com/riscv/riscv-bitmanip)

22
disasm/disasm.cc

@ -1196,14 +1196,17 @@ void disassembler_t::add_instructions(const isa_parser_t* isa)
}
if (isa->extension_enabled(EXT_ZFHMIN)) {
DEFINE_FLOAD(flh)
DEFINE_FSTORE(fsh)
DEFINE_FR1TYPE(fcvt_h_s);
DEFINE_FR1TYPE(fcvt_h_d);
DEFINE_FR1TYPE(fcvt_h_q);
DEFINE_FR1TYPE(fcvt_s_h);
DEFINE_FR1TYPE(fcvt_d_h);
DEFINE_FR1TYPE(fcvt_q_h);
}
if (isa->extension_enabled(EXT_INTERNAL_ZFH_MOVE)) {
DEFINE_FLOAD(flh)
DEFINE_FSTORE(fsh)
DEFINE_XFTYPE(fmv_h_x);
DEFINE_FXTYPE(fmv_x_h);
}
@ -1251,6 +1254,11 @@ void disassembler_t::add_instructions(const isa_parser_t* isa)
DEFINE_FX2TYPE(fle_q);
}
if (isa->extension_enabled(EXT_ZFBFMIN)) {
DEFINE_FR1TYPE(fcvt_bf16_s);
DEFINE_FR1TYPE(fcvt_s_bf16);
}
// ext-h
if (isa->extension_enabled('H')) {
DEFINE_XLOAD_BASE(hlv_b)
@ -1799,6 +1807,16 @@ void disassembler_t::add_instructions(const isa_parser_t* isa)
}
}
if (isa->extension_enabled(EXT_ZVFBFMIN)) {
DEFINE_VECTOR_V(vfncvtbf16_f_f_w);
DEFINE_VECTOR_V(vfwcvtbf16_f_f_v);
}
if (isa->extension_enabled(EXT_ZVFBFWMA)) {
DEFINE_VECTOR_VV(vfwmaccbf16_vv);
DEFINE_VECTOR_VF(vfwmaccbf16_vf);
}
#define DEFINE_PI3TYPE(code) add_pitype3_insn(this, #code, match_##code, mask_##code);
#define DEFINE_PI4TYPE(code) add_pitype4_insn(this, #code, match_##code, mask_##code);
#define DEFINE_PI5TYPE(code) add_pitype5_insn(this, #code, match_##code, mask_##code);

2
riscv/decode_macros.h

@ -74,6 +74,7 @@ typedef unsigned __int128 uint128_t;
#define FRS2 READ_FREG(insn.rs2())
#define FRS3 READ_FREG(insn.rs3())
#define FRS1_H READ_FREG_H(insn.rs1())
#define FRS1_BF FRS1_H
#define FRS1_F READ_FREG_F(insn.rs1())
#define FRS1_D READ_FREG_D(insn.rs1())
#define FRS2_H READ_FREG_H(insn.rs2())
@ -95,6 +96,7 @@ do { \
WRITE_FRD(value); \
} \
} while (0)
#define WRITE_FRD_BF WRITE_FRD_H
#define WRITE_FRD_F(value) \
do { \
if (p->extension_enabled(EXT_ZFINX)) \

23
riscv/encoding.h

@ -4,7 +4,7 @@
/*
* This file is auto-generated by running 'make' in
* https://github.com/riscv/riscv-opcodes (5adef50)
* https://github.com/riscv/riscv-opcodes (8d70e77)
*/
#ifndef RISCV_CSR_ENCODING_H
@ -751,6 +751,8 @@
#define MASK_FCLASS_Q 0xfff0707f
#define MATCH_FCLASS_S 0xe0001053
#define MASK_FCLASS_S 0xfff0707f
#define MATCH_FCVT_BF16_S 0x44800053
#define MASK_FCVT_BF16_S 0xfff0007f
#define MATCH_FCVT_D_H 0x42200053
#define MASK_FCVT_D_H 0xfff0007f
#define MATCH_FCVT_D_L 0xd2200053
@ -809,6 +811,8 @@
#define MASK_FCVT_Q_W 0xfff0007f
#define MATCH_FCVT_Q_WU 0xd6100053
#define MASK_FCVT_Q_WU 0xfff0007f
#define MATCH_FCVT_S_BF16 0x40600053
#define MASK_FCVT_S_BF16 0xfff0007f
#define MATCH_FCVT_S_D 0x40100053
#define MASK_FCVT_S_D 0xfff0007f
#define MATCH_FCVT_S_H 0x40200053
@ -2165,6 +2169,8 @@
#define MASK_VFNCVT_X_F_W 0xfc0ff07f
#define MATCH_VFNCVT_XU_F_W 0x48081057
#define MASK_VFNCVT_XU_F_W 0xfc0ff07f
#define MATCH_VFNCVTBF16_F_F_W 0x480e9057
#define MASK_VFNCVTBF16_F_F_W 0xfc0ff07f
#define MATCH_VFNMACC_VF 0xb4005057
#define MASK_VFNMACC_VF 0xfc00707f
#define MATCH_VFNMACC_VV 0xb4001057
@ -2241,10 +2247,16 @@
#define MASK_VFWCVT_X_F_V 0xfc0ff07f
#define MATCH_VFWCVT_XU_F_V 0x48041057
#define MASK_VFWCVT_XU_F_V 0xfc0ff07f
#define MATCH_VFWCVTBF16_F_F_V 0x48069057
#define MASK_VFWCVTBF16_F_F_V 0xfc0ff07f
#define MATCH_VFWMACC_VF 0xf0005057
#define MASK_VFWMACC_VF 0xfc00707f
#define MATCH_VFWMACC_VV 0xf0001057
#define MASK_VFWMACC_VV 0xfc00707f
#define MATCH_VFWMACCBF16_VF 0xec005057
#define MASK_VFWMACCBF16_VF 0xfc00707f
#define MATCH_VFWMACCBF16_VV 0xec001057
#define MASK_VFWMACCBF16_VV 0xfc00707f
#define MATCH_VFWMSAC_VF 0xf8005057
#define MASK_VFWMSAC_VF 0xfc00707f
#define MATCH_VFWMSAC_VV 0xf8001057
@ -3392,8 +3404,11 @@
#define INSN_FIELD_AMOOP 0xf8000000
#define INSN_FIELD_NF 0xe0000000
#define INSN_FIELD_SIMM5 0xf8000
#define INSN_FIELD_ZIMM5 0xf8000
#define INSN_FIELD_ZIMM10 0x3ff00000
#define INSN_FIELD_ZIMM11 0x7ff00000
#define INSN_FIELD_ZIMM6HI 0x4000000
#define INSN_FIELD_ZIMM6LO 0xf8000
#define INSN_FIELD_C_NZUIMM10 0x1fe0
#define INSN_FIELD_C_UIMM7LO 0x60
#define INSN_FIELD_C_UIMM7HI 0x1c00
@ -3636,6 +3651,7 @@ DECLARE_INSN(fclass_d, MATCH_FCLASS_D, MASK_FCLASS_D)
DECLARE_INSN(fclass_h, MATCH_FCLASS_H, MASK_FCLASS_H)
DECLARE_INSN(fclass_q, MATCH_FCLASS_Q, MASK_FCLASS_Q)
DECLARE_INSN(fclass_s, MATCH_FCLASS_S, MASK_FCLASS_S)
DECLARE_INSN(fcvt_bf16_s, MATCH_FCVT_BF16_S, MASK_FCVT_BF16_S)
DECLARE_INSN(fcvt_d_h, MATCH_FCVT_D_H, MASK_FCVT_D_H)
DECLARE_INSN(fcvt_d_l, MATCH_FCVT_D_L, MASK_FCVT_D_L)
DECLARE_INSN(fcvt_d_lu, MATCH_FCVT_D_LU, MASK_FCVT_D_LU)
@ -3665,6 +3681,7 @@ DECLARE_INSN(fcvt_q_lu, MATCH_FCVT_Q_LU, MASK_FCVT_Q_LU)
DECLARE_INSN(fcvt_q_s, MATCH_FCVT_Q_S, MASK_FCVT_Q_S)
DECLARE_INSN(fcvt_q_w, MATCH_FCVT_Q_W, MASK_FCVT_Q_W)
DECLARE_INSN(fcvt_q_wu, MATCH_FCVT_Q_WU, MASK_FCVT_Q_WU)
DECLARE_INSN(fcvt_s_bf16, MATCH_FCVT_S_BF16, MASK_FCVT_S_BF16)
DECLARE_INSN(fcvt_s_d, MATCH_FCVT_S_D, MASK_FCVT_S_D)
DECLARE_INSN(fcvt_s_h, MATCH_FCVT_S_H, MASK_FCVT_S_H)
DECLARE_INSN(fcvt_s_l, MATCH_FCVT_S_L, MASK_FCVT_S_L)
@ -4343,6 +4360,7 @@ DECLARE_INSN(vfncvt_rtz_x_f_w, MATCH_VFNCVT_RTZ_X_F_W, MASK_VFNCVT_RTZ_X_F_W)
DECLARE_INSN(vfncvt_rtz_xu_f_w, MATCH_VFNCVT_RTZ_XU_F_W, MASK_VFNCVT_RTZ_XU_F_W)
DECLARE_INSN(vfncvt_x_f_w, MATCH_VFNCVT_X_F_W, MASK_VFNCVT_X_F_W)
DECLARE_INSN(vfncvt_xu_f_w, MATCH_VFNCVT_XU_F_W, MASK_VFNCVT_XU_F_W)
DECLARE_INSN(vfncvtbf16_f_f_w, MATCH_VFNCVTBF16_F_F_W, MASK_VFNCVTBF16_F_F_W)
DECLARE_INSN(vfnmacc_vf, MATCH_VFNMACC_VF, MASK_VFNMACC_VF)
DECLARE_INSN(vfnmacc_vv, MATCH_VFNMACC_VV, MASK_VFNMACC_VV)
DECLARE_INSN(vfnmadd_vf, MATCH_VFNMADD_VF, MASK_VFNMADD_VF)
@ -4381,8 +4399,11 @@ DECLARE_INSN(vfwcvt_rtz_x_f_v, MATCH_VFWCVT_RTZ_X_F_V, MASK_VFWCVT_RTZ_X_F_V)
DECLARE_INSN(vfwcvt_rtz_xu_f_v, MATCH_VFWCVT_RTZ_XU_F_V, MASK_VFWCVT_RTZ_XU_F_V)
DECLARE_INSN(vfwcvt_x_f_v, MATCH_VFWCVT_X_F_V, MASK_VFWCVT_X_F_V)
DECLARE_INSN(vfwcvt_xu_f_v, MATCH_VFWCVT_XU_F_V, MASK_VFWCVT_XU_F_V)
DECLARE_INSN(vfwcvtbf16_f_f_v, MATCH_VFWCVTBF16_F_F_V, MASK_VFWCVTBF16_F_F_V)
DECLARE_INSN(vfwmacc_vf, MATCH_VFWMACC_VF, MASK_VFWMACC_VF)
DECLARE_INSN(vfwmacc_vv, MATCH_VFWMACC_VV, MASK_VFWMACC_VV)
DECLARE_INSN(vfwmaccbf16_vf, MATCH_VFWMACCBF16_VF, MASK_VFWMACCBF16_VF)
DECLARE_INSN(vfwmaccbf16_vv, MATCH_VFWMACCBF16_VV, MASK_VFWMACCBF16_VV)
DECLARE_INSN(vfwmsac_vf, MATCH_VFWMSAC_VF, MASK_VFWMSAC_VF)
DECLARE_INSN(vfwmsac_vv, MATCH_VFWMSAC_VV, MASK_VFWMSAC_VV)
DECLARE_INSN(vfwmul_vf, MATCH_VFWMUL_VF, MASK_VFWMUL_VF)

5
riscv/insns/fcvt_bf16_s.h

@ -0,0 +1,5 @@
require_extension(EXT_ZFBFMIN);
require_fp;
softfloat_roundingMode = RM;
WRITE_FRD_BF(f32_to_bf16(FRS1_F));
set_fp_exceptions;

5
riscv/insns/fcvt_s_bf16.h

@ -0,0 +1,5 @@
require_extension(EXT_ZFBFMIN);
require_fp;
softfloat_roundingMode = RM;
WRITE_FRD_F(bf16_to_f32(FRS1_BF));
set_fp_exceptions;

2
riscv/insns/flh.h

@ -1,3 +1,3 @@
require_extension(EXT_ZFHMIN);
require_extension(EXT_INTERNAL_ZFH_MOVE);
require_fp;
WRITE_FRD(f16(MMU.load<uint16_t>(RS1 + insn.i_imm())));

2
riscv/insns/fmv_h_x.h

@ -1,3 +1,3 @@
require_extension(EXT_ZFHMIN);
require_extension(EXT_INTERNAL_ZFH_MOVE);
require_fp;
WRITE_FRD(f16(RS1));

2
riscv/insns/fmv_x_h.h

@ -1,3 +1,3 @@
require_extension(EXT_ZFHMIN);
require_extension(EXT_INTERNAL_ZFH_MOVE);
require_fp;
WRITE_RD(sext32((int16_t)(FRS1.v[0])));

2
riscv/insns/fsh.h

@ -1,3 +1,3 @@
require_extension(EXT_ZFHMIN);
require_extension(EXT_INTERNAL_ZFH_MOVE);
require_fp;
MMU.store<uint16_t>(RS1 + insn.s_imm(), FRS2.v[0]);

5
riscv/insns/vfncvtbf16_f_f_w.h

@ -0,0 +1,5 @@
// vfncvtbf16.f.f.w vd, vs2, vm
VI_VFP_NCVT_BF16_TO_FP(
{ vd = f32_to_bf16(vs2); }, // BODY16
{ require_extension(EXT_ZVFBFMIN); } // CHECK16
)

5
riscv/insns/vfwcvtbf16_f_f_v.h

@ -0,0 +1,5 @@
// vfwcvtbf16.f.f.v vd, vs2, vm
VI_VFP_WCVT_FP_TO_BF16(
{ vd = bf16_to_f32(vs2); }, // BODY16
{ require_extension(EXT_ZVFBFMIN); } // CHECK16
)

5
riscv/insns/vfwmaccbf16_vf.h

@ -0,0 +1,5 @@
// vfwmaccbf16.vf vd, vs2, rs1
VI_VFP_BF16_VF_LOOP_WIDE
({
vd = f32_mulAdd(rs1, vs2, vd);
})

5
riscv/insns/vfwmaccbf16_vv.h

@ -0,0 +1,5 @@
// vfwmaccbf16.vv vd, vs2, vs1
VI_VFP_BF16_VV_LOOP_WIDE
({
vd = f32_mulAdd(vs1, vs2, vd);
})

18
riscv/isa_parser.cc

@ -139,6 +139,8 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
} else if (ext_str == "zdinx") {
extension_table[EXT_ZFINX] = true;
extension_table[EXT_ZDINX] = true;
} else if (ext_str == "zfbfmin") {
extension_table[EXT_ZFBFMIN] = true;
} else if (ext_str == "zfinx") {
extension_table[EXT_ZFINX] = true;
} else if (ext_str == "zhinx") {
@ -232,6 +234,10 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
extension_table[EXT_ZICOND] = true;
} else if (ext_str == "zihpm") {
extension_table[EXT_ZIHPM] = true;
} else if (ext_str == "zvfbfmin") {
extension_table[EXT_ZVFBFMIN] = true;
} else if (ext_str == "zvfbfwma") {
extension_table[EXT_ZVFBFWMA] = true;
} else if (ext_str == "sstc") {
extension_table[EXT_SSTC] = true;
} else if (ext_str[0] == 'x') {
@ -279,6 +285,18 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
bad_isa_string(str, ("can't parse: " + std::string(p)).c_str());
}
if (extension_table[EXT_ZFBFMIN] && !extension_table['F']) {
bad_isa_string(str, "'Zfbfmin' extension requires 'F' extension");
}
if ((extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZVFBFWMA]) && !extension_table['V']) {
bad_isa_string(str, "'Zvfbfmin/Zvfbfwma' extension requires 'V' extension");
}
if (extension_table[EXT_ZFBFMIN] || extension_table[EXT_ZVFBFMIN] || extension_table[EXT_ZFHMIN]) {
extension_table[EXT_INTERNAL_ZFH_MOVE] = true;
}
if (extension_table['C']) {
extension_table[EXT_ZCA] = true;
if (extension_table['F'] && max_xlen == 32)

4
riscv/isa_parser.h

@ -49,6 +49,7 @@ typedef enum {
EXT_SVINVAL,
EXT_ZDINX,
EXT_ZFA,
EXT_ZFBFMIN,
EXT_ZFINX,
EXT_ZHINX,
EXT_ZHINXMIN,
@ -57,6 +58,8 @@ typedef enum {
EXT_ZICNTR,
EXT_ZICOND,
EXT_ZIHPM,
EXT_ZVFBFMIN,
EXT_ZVFBFWMA,
EXT_XZBP,
EXT_XZBS,
EXT_XZBE,
@ -66,6 +69,7 @@ typedef enum {
EXT_XZBR,
EXT_XZBT,
EXT_SSTC,
EXT_INTERNAL_ZFH_MOVE,
NUM_ISA_EXTENSIONS
} isa_extension_t;

22
riscv/riscv.mk.in

@ -1356,8 +1356,25 @@ riscv_insn_ext_cmo = \
cbo_zero \
riscv_insn_ext_zicond = \
czero_eqz \
czero_nez \
czero_eqz \
czero_nez \
riscv_insn_ext_zfbfmin = \
fcvt_bf16_s \
fcvt_s_bf16 \
riscv_insn_ext_zvfbfmin = \
vfncvtbf16_f_f_w \
vfwcvtbf16_f_f_v \
riscv_insn_ext_zvfbfwma = \
vfwmaccbf16_vv \
vfwmaccbf16_vf \
riscv_insn_ext_bf16 = \
$(riscv_insn_ext_zfbfmin) \
$(riscv_insn_ext_zvfbfmin) \
$(riscv_insn_ext_zvfbfwma) \
riscv_insn_list = \
$(riscv_insn_ext_a) \
@ -1383,6 +1400,7 @@ riscv_insn_list = \
$(riscv_insn_smrnmi) \
$(riscv_insn_ext_cmo) \
$(riscv_insn_ext_zicond) \
$(riscv_insn_ext_bf16) \
riscv_gen_srcs = $(addsuffix .cc,$(riscv_insn_list))

76
riscv/v_ext_macros.h

@ -1488,11 +1488,27 @@ reg_t index[P.VU.vlmax]; \
reg_t UNUSED rs2_num = insn.rs2(); \
softfloat_roundingMode = STATE.frm->read();
#define VI_VFP_BF16_COMMON \
require_fp; \
require((P.VU.vsew == e16 && p->extension_enabled(EXT_ZVFBFWMA))); \
require_vector(true); \
require(STATE.frm->read() < 0x5); \
reg_t UNUSED vl = P.VU.vl->read(); \
reg_t UNUSED rd_num = insn.rd(); \
reg_t UNUSED rs1_num = insn.rs1(); \
reg_t UNUSED rs2_num = insn.rs2(); \
softfloat_roundingMode = STATE.frm->read();
#define VI_VFP_LOOP_BASE \
VI_VFP_COMMON \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_BF16_LOOP_BASE \
VI_VFP_BF16_COMMON \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
#define VI_VFP_LOOP_CMP_BASE \
VI_VFP_COMMON \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
@ -1818,6 +1834,25 @@ reg_t index[P.VU.vlmax]; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_BF16_VF_LOOP_WIDE(BODY) \
VI_CHECK_DSS(false); \
VI_VFP_BF16_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)); \
float32_t rs1 = bf16_to_f32(FRS1_BF); \
BODY; \
set_fp_exceptions; \
break; \
} \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_VV_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DSS(true); \
VI_VFP_LOOP_BASE \
@ -1845,6 +1880,25 @@ reg_t index[P.VU.vlmax]; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_BF16_VV_LOOP_WIDE(BODY) \
VI_CHECK_DSS(true); \
VI_VFP_BF16_LOOP_BASE \
switch (P.VU.vsew) { \
case e16: { \
float32_t &vd = P.VU.elt<float32_t>(rd_num, i, true); \
float32_t vs2 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs2_num, i)); \
float32_t vs1 = bf16_to_f32(P.VU.elt<bfloat16_t>(rs1_num, i)); \
BODY; \
set_fp_exceptions; \
break; \
} \
default: \
require(0); \
break; \
}; \
DEBUG_RVV_FP_VV; \
VI_VFP_LOOP_END
#define VI_VFP_WF_LOOP_WIDE(BODY16, BODY32) \
VI_CHECK_DDS(false); \
VI_VFP_LOOP_BASE \
@ -1980,6 +2034,17 @@ reg_t index[P.VU.vlmax]; \
break; \
}
#define VI_VFP_WCVT_FP_TO_BF16(BODY, CHECK) \
VI_CHECK_DSS(false); \
switch (P.VU.vsew) { \
case e16: \
{ VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(16, 32), CHECK, BODY); } \
break; \
default: \
require(0); \
break; \
}
#define VI_VFP_WCVT_INT_TO_FP(BODY8, BODY16, BODY32, \
CHECK8, CHECK16, CHECK32, \
sign) \
@ -2030,6 +2095,17 @@ reg_t index[P.VU.vlmax]; \
break; \
}
#define VI_VFP_NCVT_BF16_TO_FP(BODY, CHECK) \
VI_CHECK_SDS(false); \
switch (P.VU.vsew) { \
case e16: \
{ VI_VFP_CVT_LOOP(CVT_FP_TO_FP_PARAMS(32, 16), CHECK, BODY); } \
break; \
default: \
require(0); \
break; \
}
#define VI_VFP_NCVT_INT_TO_FP(BODY32, BODY64, \
CHECK32, CHECK64, \
sign) \

80
softfloat/bf16_to_f32.c

@ -0,0 +1,80 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
float32_t bf16_to_f32( bfloat16_t a )
{
union ui16_f16 uA;
uint_fast16_t uiA;
bool sign;
int_fast16_t exp;
uint_fast16_t frac;
struct commonNaN commonNaN;
uint_fast32_t uiZ;
union ui32_f32 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signBF16UI( uiA );
exp = expBF16UI( uiA );
frac = fracBF16UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToF32UI( &commonNaN );
} else {
uiZ = packToF32UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uiZ = packToF32UI( sign, exp, (uint_fast32_t) frac<<16 );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

92
softfloat/f32_to_bf16.c

@ -0,0 +1,92 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"
bfloat16_t f32_to_bf16( float32_t a )
{
union ui32_f32 uA;
uint_fast32_t uiA;
bool sign;
int_fast16_t exp;
uint_fast32_t frac;
struct commonNaN commonNaN;
struct exp16_sig32 normExpSig;
uint_fast16_t uiZ, frac16;
union ui16_f16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
uA.f = a;
uiA = uA.ui;
sign = signF32UI( uiA );
exp = expF32UI( uiA );
frac = fracF32UI( uiA );
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( exp == 0xFF ) {
if ( frac ) {
softfloat_f32UIToCommonNaN( uiA, &commonNaN );
uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
} else {
uiZ = packToBF16UI( sign, 0xFF, 0 );
}
goto uiZ;
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( ! (exp | frac) ) {
uiZ = packToBF16UI( sign, 0, 0 );
goto uiZ;
} else if ( !exp ) {
normExpSig = softfloat_normSubnormalF32Sig( frac );
exp = normExpSig.exp;
frac = normExpSig.sig;
}
frac16 = frac>>9 | ((frac & 0x1FF) != 0);
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
return softfloat_roundPackToBF16( sign, exp - 1, frac16 | 0x4000 );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

6
softfloat/internals.h

@ -89,6 +89,11 @@ int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
#define fracF16UI( a ) ((a) & 0x03FF)
#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
#define fracBF16UI( a ) ((a) & 0x07F)
#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
@ -103,6 +108,7 @@ float16_t
softfloat_mulAddF16(
uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))

113
softfloat/s_roundPackToBF16.c

@ -0,0 +1,113 @@
/*============================================================================
This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3d, by John R. Hauser.
Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions, and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions, and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the University nor the names of its contributors may
be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
=============================================================================*/
#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"
bfloat16_t
softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
{
uint_fast8_t roundingMode;
bool roundNearEven;
uint_fast8_t roundIncrement, roundBits;
bool isTiny;
uint_fast16_t uiZ;
union ui16_f16 uZ;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
roundingMode = softfloat_roundingMode;
roundNearEven = (roundingMode == softfloat_round_near_even);
roundIncrement = 0x40;
if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
roundIncrement =
(roundingMode
== (sign ? softfloat_round_min : softfloat_round_max))
? 0x7F
: 0;
}
roundBits = sig & 0x7F;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
if ( 0xFD <= (unsigned int) exp ) {
if ( exp < 0 ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
isTiny =
(softfloat_detectTininess == softfloat_tininess_beforeRounding)
|| (exp < -1) || (sig + roundIncrement < 0x8000);
sig = softfloat_shiftRightJam32( sig, -exp );
exp = 0;
roundBits = sig & 0x7F;
if ( isTiny && roundBits ) {
softfloat_raiseFlags( softfloat_flag_underflow );
}
} else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
/*----------------------------------------------------------------
*----------------------------------------------------------------*/
softfloat_raiseFlags(
softfloat_flag_overflow | softfloat_flag_inexact );
uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
goto uiZ;
}
}
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
sig = (sig + roundIncrement)>>7;
if ( roundBits ) {
softfloat_exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
if ( roundingMode == softfloat_round_odd ) {
sig |= 1;
goto packReturn;
}
#endif
}
sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
if ( ! sig ) exp = 0;
/*------------------------------------------------------------------------
*------------------------------------------------------------------------*/
packReturn:
uiZ = packToBF16UI( sign, exp, sig );
uiZ:
uZ.ui = uiZ;
return uZ.f;
}

2
softfloat/softfloat.h

@ -154,6 +154,7 @@ uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
float32_t f16_to_f32( float16_t );
float32_t bf16_to_f32( bfloat16_t );
float64_t f16_to_f64( float16_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t f16_to_extF80( float16_t );
@ -196,6 +197,7 @@ uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
float16_t f32_to_f16( float32_t );
bfloat16_t f32_to_bf16( float32_t );
float64_t f32_to_f64( float32_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t f32_to_extF80( float32_t );

3
softfloat/softfloat.mk.in

@ -45,6 +45,7 @@ softfloat_c_srcs = \
f16_sqrt.c \
f16_sub.c \
f16_to_f128.c \
bf16_to_f32.c \
f16_to_f32.c \
f16_to_f64.c \
f16_to_i8.c \
@ -76,6 +77,7 @@ softfloat_c_srcs = \
f32_sqrt.c \
f32_sub.c \
f32_to_f128.c \
f32_to_bf16.c \
f32_to_f16.c \
f32_to_f64.c \
f32_to_i16.c \
@ -181,6 +183,7 @@ softfloat_c_srcs = \
s_roundMToUI64.c \
s_roundPackMToI64.c \
s_roundPackMToUI64.c \
s_roundPackToBF16.c \
s_roundPackToF128.c \
s_roundPackToF16.c \
s_roundPackToF32.c \

1
softfloat/softfloat_types.h

@ -48,6 +48,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
| (typically 'float' and 'double', and possibly 'long double').
*----------------------------------------------------------------------------*/
typedef struct { uint16_t v; } float16_t;
typedef float16_t bfloat16_t;
typedef struct { uint32_t v; } float32_t;
typedef struct { uint64_t v; } float64_t;
typedef struct { uint64_t v[2]; } float128_t;

19
softfloat/specialize.h

@ -98,6 +98,11 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define defaultNaNF16UI 0x7E00
/*----------------------------------------------------------------------------
| The bit pattern for a default generated binary 16-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNBF16UI 0x7FC0
/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer `uiA' has the bit pattern of a
| 16-bit floating-point signaling NaN.
@ -113,6 +118,20 @@ struct commonNaN { char _unused; };
*----------------------------------------------------------------------------*/
#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a binary 16-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'. If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x040) ) (void) (zPtr), softfloat_raiseFlags( softfloat_flag_invalid )
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a binary 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.

Loading…
Cancel
Save