Browse Source

Merge branch 'master' into master

pull/2199/head
Natheir Abu-Dahab 1 month ago
committed by GitHub
parent
commit
8f38d28b43
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
  1. 2
      README.md
  2. 16
      disasm/disasm.cc
  3. 11
      disasm/isa_parser.cc
  4. 28
      fesvr/htif.cc
  5. 6
      riscv/csrs.cc
  6. 6
      riscv/dts.cc
  7. 9
      riscv/insns/vabd_vv.h
  8. 9
      riscv/insns/vabdu_vv.h
  9. 8
      riscv/insns/vabs_v.h
  10. 7
      riscv/insns/vpaire_vv.h
  11. 7
      riscv/insns/vpairo_vv.h
  12. 7
      riscv/insns/vunzipe_v.h
  13. 7
      riscv/insns/vunzipo_v.h
  14. 10
      riscv/insns/vwabda_vv.h
  15. 10
      riscv/insns/vwabdau_vv.h
  16. 7
      riscv/insns/vzip_vv.h
  17. 2
      riscv/isa_parser.h
  18. 16
      riscv/riscv.mk.in
  19. 64
      riscv/v_ext_macros.h
  20. 110
      riscv/zvzip_ext_macros.h

2
README.md

@ -66,6 +66,7 @@ Spike supports the following RISC-V ISA features:
- Zfbfmin extension, v0.6 - Zfbfmin extension, v0.6
- Zvfbfmin extension, v0.6 - Zvfbfmin extension, v0.6
- Zvfbfwma extension, v0.6 - Zvfbfwma extension, v0.6
- Zvabd extension, v0.7
- Zvbb extension, v1.0 - Zvbb extension, v1.0
- Zvbc extension, v1.0 - Zvbc extension, v1.0
- Zvkg extension, v1.0 - Zvkg extension, v1.0
@ -76,6 +77,7 @@ Spike supports the following RISC-V ISA features:
- Zvkt extension, v1.0 - Zvkt extension, v1.0
- Zvkn, Zvknc, Zvkng extension, v1.0 - Zvkn, Zvknc, Zvkng extension, v1.0
- Zvks, Zvksc, Zvksg extension, v1.0 - Zvks, Zvksc, Zvksg extension, v1.0
- Zvzip extension, v0.1
- Zicond extension, v1.0 - Zicond extension, v1.0
- Zilsd extension, v1.0 - Zilsd extension, v1.0
- Zclsd extension, v1.0 - Zclsd extension, v1.0

16
disasm/disasm.cc

@ -2175,6 +2175,22 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict)
DEFINE_R1TYPE(sm3p1); DEFINE_R1TYPE(sm3p1);
} }
if (ext_enabled(EXT_ZVABD)) {
DEFINE_VECTOR_V(vabs_v);
DEFINE_VECTOR_VV(vabd_vv);
DEFINE_VECTOR_VV(vabdu_vv);
DEFINE_VECTOR_MULTIPLYADD_VV(vwabda_vv);
DEFINE_VECTOR_MULTIPLYADD_VV(vwabdau_vv);
}
if (ext_enabled(EXT_ZVZIP)) {
DEFINE_VECTOR_VV(vzip_vv);
DEFINE_VECTOR_V(vunzipe_v);
DEFINE_VECTOR_V(vunzipo_v);
DEFINE_VECTOR_VV(vpaire_vv);
DEFINE_VECTOR_VV(vpairo_vv);
}
if (ext_enabled(EXT_ZVBB)) { if (ext_enabled(EXT_ZVBB)) {
#define DEFINE_VECTOR_VIU_ZIMM6(code) \ #define DEFINE_VECTOR_VIU_ZIMM6(code) \
add_vector_viu_z6_insn(this, #code, match_##code, mask_##code) add_vector_viu_z6_insn(this, #code, match_##code, mask_##code)

11
disasm/isa_parser.cc

@ -244,6 +244,8 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str)
extension_table[EXT_ZCLSD] = true; extension_table[EXT_ZCLSD] = true;
extension_table[EXT_ZCA] = true; extension_table[EXT_ZCA] = true;
extension_table[EXT_ZILSD] = true; extension_table[EXT_ZILSD] = true;
} else if (ext_str == "zvabd") {
extension_table[EXT_ZVABD] = true;
} else if (ext_str == "zvkb") { } else if (ext_str == "zvkb") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
} else if (ext_str == "zvbb") { } else if (ext_str == "zvbb") {
@ -262,18 +264,15 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str)
extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKG] = true;
} else if (ext_str == "zvkn") { } else if (ext_str == "zvkn") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNED] = true;
extension_table[EXT_ZVKNHB] = true; extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvknc") { } else if (ext_str == "zvknc") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVBC] = true;
extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNED] = true;
extension_table[EXT_ZVKNHB] = true; extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvkng") { } else if (ext_str == "zvkng") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKG] = true;
extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNED] = true;
extension_table[EXT_ZVKNHB] = true; extension_table[EXT_ZVKNHB] = true;
@ -285,18 +284,15 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str)
extension_table[EXT_ZVKNHB] = true; extension_table[EXT_ZVKNHB] = true;
} else if (ext_str == "zvks") { } else if (ext_str == "zvks") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSED] = true;
extension_table[EXT_ZVKSH] = true; extension_table[EXT_ZVKSH] = true;
} else if (ext_str == "zvksc") { } else if (ext_str == "zvksc") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVBC] = true;
extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSED] = true;
extension_table[EXT_ZVKSH] = true; extension_table[EXT_ZVKSH] = true;
} else if (ext_str == "zvksg") { } else if (ext_str == "zvksg") {
extension_table[EXT_ZVKB] = true; extension_table[EXT_ZVKB] = true;
extension_table[EXT_ZVBB] = true;
extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKG] = true;
extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSED] = true;
extension_table[EXT_ZVKSH] = true; extension_table[EXT_ZVKSH] = true;
@ -325,6 +321,8 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str)
} else if (ext_str == "zvfwldot16bf") { } else if (ext_str == "zvfwldot16bf") {
extension_table[EXT_ZVFWLDOT16BF] = true; extension_table[EXT_ZVFWLDOT16BF] = true;
} else if (ext_str == "zvkt") { } else if (ext_str == "zvkt") {
} else if (ext_str == "zvzip") {
extension_table[EXT_ZVZIP] = true;
} else if (ext_str == "sstc") { } else if (ext_str == "sstc") {
extension_table[EXT_SSTC] = true; extension_table[EXT_SSTC] = true;
} else if (ext_str == "smcsrind") { } else if (ext_str == "smcsrind") {
@ -406,6 +404,7 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str)
} else if (ext_str == "ssaia") { } else if (ext_str == "ssaia") {
extension_table[EXT_SSAIA] = true; extension_table[EXT_SSAIA] = true;
extension_table[EXT_SSCSRIND] = true; extension_table[EXT_SSCSRIND] = true;
} else if (ext_str == "svvptc") {
} else if (ext_str[0] == 'x') { } else if (ext_str[0] == 'x') {
extension_table['X'] = true; extension_table['X'] = true;
if (ext_str.size() == 1) { if (ext_str.size() == 1) {

28
fesvr/htif.cc

@ -20,6 +20,11 @@
#include <unistd.h> #include <unistd.h>
#include <signal.h> #include <signal.h>
#include <getopt.h> #include <getopt.h>
#include <libgen.h>
#include <limits.h>
#ifdef __APPLE__
#include <mach-o/dyld.h>
#endif
/* Attempt to determine the execution prefix automatically. autoconf /* Attempt to determine the execution prefix automatically. autoconf
* sets PREFIX, and pconfigure sets __PCONFIGURE__PREFIX. */ * sets PREFIX, and pconfigure sets __PCONFIGURE__PREFIX. */
@ -35,6 +40,10 @@
# define TARGET_DIR "/" TARGET_ARCH "/bin/" # define TARGET_DIR "/" TARGET_ARCH "/bin/"
#endif #endif
#ifndef PROC_SELF_EXE
# define PROC_SELF_EXE "/proc/self/exe"
#endif
static volatile bool signal_exit = false; static volatile bool signal_exit = false;
static void handle_signal(int sig) static void handle_signal(int sig)
{ {
@ -107,6 +116,20 @@ static void bad_address(const std::string& situation, reg_t addr)
exit(-1); exit(-1);
} }
static std::string get_prefix_from_arg0() {
char exe_path[PATH_MAX];
#ifdef __APPLE__
uint32_t bufsize = PATH_MAX - 1;
ssize_t len = _NSGetExecutablePath(exe_path, &bufsize) == 0 ? bufsize : -1;
#else
ssize_t len = readlink(PROC_SELF_EXE, exe_path, PATH_MAX - 1);
#endif
if (len == -1)
return PREFIX;
exe_path[len] = '\0';
return std::string(dirname(exe_path)) + "/..";
}
std::map<std::string, uint64_t> htif_t::load_payload(const std::string& payload, reg_t* entry, reg_t load_offset) std::map<std::string, uint64_t> htif_t::load_payload(const std::string& payload, reg_t* entry, reg_t load_offset)
{ {
std::string path; std::string path;
@ -114,14 +137,15 @@ std::map<std::string, uint64_t> htif_t::load_payload(const std::string& payload,
path = payload; path = payload;
else if (payload.find('/') == std::string::npos) else if (payload.find('/') == std::string::npos)
{ {
std::string test_path = PREFIX TARGET_DIR + payload; std::string prefix = get_prefix_from_arg0();
std::string test_path = prefix + TARGET_DIR + payload;
if (access(test_path.c_str(), F_OK) == 0) if (access(test_path.c_str(), F_OK) == 0)
path = test_path; path = test_path;
else else
throw std::runtime_error( throw std::runtime_error(
"could not open " + payload + "; searched paths:\n" + "could not open " + payload + "; searched paths:\n" +
"\t. (current directory)\n" + "\t. (current directory)\n" +
"\t" + PREFIX TARGET_DIR + " (based on configured --prefix and --with-target)" "\t" + prefix + TARGET_DIR + " (based on configured --prefix and --with-target)"
); );
} }

6
riscv/csrs.cc

@ -1787,10 +1787,12 @@ reg_t scountovf_csr_t::read() const noexcept {
val |= of << (i + FIRST_HPMCOUNTER); val |= of << (i + FIRST_HPMCOUNTER);
} }
/* In M and S modes, scountovf bit X is readable when mcounteren bit X is set, */ /* In M-mode, scountovf bit X is always readable. */
/* In S/HS-mode, scountovf bit X is readable when mcounteren bit X is set, */
/* and otherwise reads as zero. Similarly, in VS mode, scountovf bit X is readable */ /* and otherwise reads as zero. Similarly, in VS mode, scountovf bit X is readable */
/* when mcounteren bit X and hcounteren bit X are both set, and otherwise reads as zero. */ /* when mcounteren bit X and hcounteren bit X are both set, and otherwise reads as zero. */
val &= state->mcounteren->read(); if (state->prv < PRV_M)
val &= state->mcounteren->read();
if (state->v) if (state->v)
val &= state->hcounteren->read(); val &= state->hcounteren->read();
return val; return val;

6
riscv/dts.cc

@ -122,11 +122,11 @@ static std::string dtc_compile(const std::string& dtc_input, bool compile)
step = write(dtc_input_pipe[1], buf+done, len-done); step = write(dtc_input_pipe[1], buf+done, len-done);
if (step == -1) { if (step == -1) {
std::cerr << "Failed to write dtc_input: " << strerror(errno) << std::endl; std::cerr << "Failed to write dtc_input: " << strerror(errno) << std::endl;
exit(1); _exit(1);
} }
} }
close(dtc_input_pipe[1]); close(dtc_input_pipe[1]);
exit(0); _exit(0);
} }
pid_t dtc_output_pid; pid_t dtc_output_pid;
@ -146,7 +146,7 @@ static std::string dtc_compile(const std::string& dtc_input, bool compile)
close(dtc_output_pipe[1]); close(dtc_output_pipe[1]);
execlp(DTC, DTC, "-O", output_type, "-I", input_type, nullptr); execlp(DTC, DTC, "-O", output_type, "-I", input_type, nullptr);
std::cerr << "Failed to run " DTC ": " << strerror(errno) << std::endl; std::cerr << "Failed to run " DTC ": " << strerror(errno) << std::endl;
exit(1); _exit(1);
} }
close(dtc_input_pipe[1]); close(dtc_input_pipe[1]);

9
riscv/insns/vabd_vv.h

@ -0,0 +1,9 @@
// vabd.vv vd, vs1, vs2, vm
require_zvabd;
require(P.VU.vsew <= e16);
VI_VV_LOOP
({
vd = DO_ABD(vs1, vs2);
})

9
riscv/insns/vabdu_vv.h

@ -0,0 +1,9 @@
// vabdu.vv vd, vs1, vs2, vm
require_zvabd;
require(P.VU.vsew <= e16);
VI_VV_ULOOP
({
vd = DO_ABD(vs1, vs2);
})

8
riscv/insns/vabs_v.h

@ -0,0 +1,8 @@
// vabs.v vd, vs2, vm
require_zvabd;
VI_V_LOOP
({
vd = vs2 > 0 ? vs2 : -vs2;
})

7
riscv/insns/vpaire_vv.h

@ -0,0 +1,7 @@
// vunzipe.vv vd, vs1, vs2, vm
#include "zvzip_ext_macros.h"
require_zvzip;
VI_VPAIR_VV_LOOP(i - 1, i)

7
riscv/insns/vpairo_vv.h

@ -0,0 +1,7 @@
// vunzipo.vv vd, vs1, vs2, vm
#include "zvzip_ext_macros.h"
require_zvzip;
VI_VPAIR_VV_LOOP(i, i + 1)

7
riscv/insns/vunzipe_v.h

@ -0,0 +1,7 @@
// vunzipe.v vd, vs2, vm
#include "zvzip_ext_macros.h"
require_zvzip;
VI_VUNZIP_V_LOOP(i << 1)

7
riscv/insns/vunzipo_v.h

@ -0,0 +1,7 @@
// vunzipo.v vd, vs2, vm
#include "zvzip_ext_macros.h"
require_zvzip;
VI_VUNZIP_V_LOOP((i << 1) + 1)

10
riscv/insns/vwabda_vv.h

@ -0,0 +1,10 @@
// vwabda.vv vd, vs2, vs1, vm
require_zvabd;
require(P.VU.vsew <= e16);
VI_CHECK_DSS(true);
VI_VV_LOOP_WIDEN
({
VI_WIDE_OP_MACRO_AND_ASSIGN(vs2, vs1, vd_w, DO_ABD, int);
})

10
riscv/insns/vwabdau_vv.h

@ -0,0 +1,10 @@
// vwabdau.vv vd, vs2, vs1, vm
require_zvabd;
require(P.VU.vsew <= e16);
VI_CHECK_DSS(true);
VI_VV_LOOP_WIDEN
({
VI_WIDE_OP_MACRO_AND_ASSIGN(vs2, vs1, vd_w, DO_ABD, uint);
})

7
riscv/insns/vzip_vv.h

@ -0,0 +1,7 @@
// vzip.vv vd, vs1, vs2, vm
#include "zvzip_ext_macros.h"
require_zvzip;
VI_VZIP_VV_LOOP(i >> 1, i >> 1)

2
riscv/isa_parser.h

@ -66,6 +66,7 @@ typedef enum {
EXT_ZICOND, EXT_ZICOND,
EXT_ZIHPM, EXT_ZIHPM,
EXT_ZILSD, EXT_ZILSD,
EXT_ZVABD,
EXT_ZVBB, EXT_ZVBB,
EXT_ZVKB, EXT_ZVKB,
EXT_ZVBC, EXT_ZVBC,
@ -87,6 +88,7 @@ typedef enum {
EXT_ZVQLDOT16I, EXT_ZVQLDOT16I,
EXT_ZVFQLDOT8F, EXT_ZVFQLDOT8F,
EXT_ZVFWLDOT16BF, EXT_ZVFWLDOT16BF,
EXT_ZVZIP,
EXT_SSTC, EXT_SSTC,
EXT_ZAAMO, EXT_ZAAMO,
EXT_ZALRSC, EXT_ZALRSC,

16
riscv/riscv.mk.in

@ -1136,6 +1136,20 @@ riscv_insn_ext_zvk = \
$(riscv_insn_ext_zvksed) \ $(riscv_insn_ext_zvksed) \
$(riscv_insn_ext_zvksh) \ $(riscv_insn_ext_zvksh) \
riscv_insn_ext_zvabd = \
vabs_v \
vabd_vv \
vabdu_vv \
vwabda_vv \
vwabdau_vv \
riscv_insn_ext_zvzip = \
vzip_vv \
vunzipe_v \
vunzipo_v \
vpaire_vv \
vpairo_vv \
riscv_insn_list = \ riscv_insn_list = \
$(riscv_insn_ext_i) \ $(riscv_insn_ext_i) \
$(riscv_insn_ext_c) \ $(riscv_insn_ext_c) \
@ -1166,6 +1180,8 @@ riscv_insn_list = \
$(riscv_insn_ext_zvk) \ $(riscv_insn_ext_zvk) \
$(riscv_insn_ext_zvbdot) \ $(riscv_insn_ext_zvbdot) \
$(riscv_insn_ext_zvldot) \ $(riscv_insn_ext_zvldot) \
$(riscv_insn_ext_zvabd) \
$(riscv_insn_ext_zvzip) \
$(riscv_insn_priv) \ $(riscv_insn_priv) \
$(riscv_insn_smrnmi) \ $(riscv_insn_smrnmi) \
$(riscv_insn_svinval) \ $(riscv_insn_svinval) \

64
riscv/v_ext_macros.h

@ -72,6 +72,12 @@ static inline bool is_overlapped_widen(const int astart, int asize,
#define require_zvfbfa_or_zvfhmin \ #define require_zvfbfa_or_zvfhmin \
require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFHMIN); \ require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFHMIN); \
#define require_zvabd \
do { \
require_vector(true); \
require_extension(EXT_ZVABD); \
} while (0)
#define VI_NARROW_CHECK_COMMON(factor) \ #define VI_NARROW_CHECK_COMMON(factor) \
require_vector(true); \ require_vector(true); \
require(P.VU.vflmul <= (8 / factor)); \ require(P.VU.vflmul <= (8 / factor)); \
@ -353,6 +359,10 @@ static inline bool is_overlapped_widen(const int astart, int asize,
type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \ type_sew_t<x>::type vs1 = P.VU.elt<type_sew_t<x>::type>(rs1_num, i); \
type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i); type_sew_t<x>::type UNUSED vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
#define V_PARAMS(x) \
type_sew_t<x>::type &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
type_sew_t<x>::type vs2 = P.VU.elt<type_sew_t<x>::type>(rs2_num, i);
#define VX_PARAMS(x) \ #define VX_PARAMS(x) \
type_sew_t<x>::type UNUSED &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \ type_sew_t<x>::type UNUSED &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \ type_sew_t<x>::type rs1 = (type_sew_t<x>::type)RS1; \
@ -723,6 +733,24 @@ static inline bool is_overlapped_widen(const int astart, int asize,
} \ } \
VI_LOOP_END VI_LOOP_END
#define VI_V_LOOP(BODY) \
VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8) { \
V_PARAMS(e8); \
BODY; \
} else if (sew == e16) { \
V_PARAMS(e16); \
BODY; \
} else if (sew == e32) { \
V_PARAMS(e32); \
BODY; \
} else if (sew == e64) { \
V_PARAMS(e64); \
BODY; \
} \
VI_LOOP_END
#define VI_VX_ULOOP(BODY) \ #define VI_VX_ULOOP(BODY) \
VI_CHECK_SSS(false) \ VI_CHECK_SSS(false) \
VI_LOOP_BASE \ VI_LOOP_BASE \
@ -974,6 +1002,28 @@ static inline bool is_overlapped_widen(const int astart, int asize,
break; \ break; \
} }
#define VI_WIDE_OP_MACRO_AND_ASSIGN(var0, var1, var2, op, sign) \
switch (P.VU.vsew) { \
case e8: { \
sign##16_t UNUSED vd_w = P.VU.elt<sign##16_t>(rd_num, i); \
P.VU.elt<uint16_t>(rd_num, i, true) = \
op((sign##16_t)(sign##8_t)var0, (sign##16_t)(sign##8_t)var1) + var2; \
} \
break; \
case e16: { \
sign##32_t UNUSED vd_w = P.VU.elt<sign##32_t>(rd_num, i); \
P.VU.elt<uint32_t>(rd_num, i, true) = \
op((sign##32_t)(sign##16_t)var0, (sign##32_t)(sign##16_t)var1) + var2; \
} \
break; \
default: { \
sign##64_t UNUSED vd_w = P.VU.elt<sign##64_t>(rd_num, i); \
P.VU.elt<uint64_t>(rd_num, i, true) = \
op((sign##64_t)(sign##32_t)var0, (sign##64_t)(sign##32_t)var1) + var2; \
} \
break; \
}
#define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ #define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \
switch (P.VU.vsew) { \ switch (P.VU.vsew) { \
case e8: { \ case e8: { \
@ -2195,9 +2245,9 @@ c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_
} }
#define ZVLDOT_LOOP(a_t, b_t, c_t, dot) \ #define ZVLDOT_LOOP(a_t, b_t, c_t, dot) \
std::vector<a_t> a(P.VU.vl->read(), a_t()); \ std::vector<a_t> a(P.VU.vlmax, a_t()); \
std::vector<b_t> b(P.VU.vl->read(), b_t()); \ std::vector<b_t> b(P.VU.vlmax, b_t()); \
for (reg_t i = 0; i < a.size(); i++) { \ for (reg_t i = 0, vl = P.VU.vl->read(); i < vl; i++) { \
VI_LOOP_ELEMENT_SKIP(); \ VI_LOOP_ELEMENT_SKIP(); \
a[i] = P.VU.elt<a_t>(insn.rs1(), i); \ a[i] = P.VU.elt<a_t>(insn.rs1(), i); \
b[i] = P.VU.elt<b_t>(insn.rs2(), i); \ b[i] = P.VU.elt<b_t>(insn.rs2(), i); \
@ -2217,9 +2267,9 @@ c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_
for (reg_t idx = 0; idx < 8; idx++) { \ for (reg_t idx = 0; idx < 8; idx++) { \
reg_t i = ci + idx; \ reg_t i = ci + idx; \
VI_LOOP_ELEMENT_SKIP(); \ VI_LOOP_ELEMENT_SKIP(); \
std::vector<a_t> a(P.VU.vl->read(), a_t()); \ std::vector<a_t> a(P.VU.vlmax, a_t()); \
std::vector<b_t> b(P.VU.vl->read(), b_t()); \ std::vector<b_t> b(P.VU.vlmax, b_t()); \
for (reg_t k = 0; k < a.size(); k++) { \ for (reg_t k = 0, vl = P.VU.vl->read(); k < vl; k++) { \
a[k] = P.VU.elt<a_t>(insn.rs1(), k); \ a[k] = P.VU.elt<a_t>(insn.rs1(), k); \
b[k] = P.VU.elt<b_t>(vs2 + idx, k); \ b[k] = P.VU.elt<b_t>(vs2 + idx, k); \
} \ } \
@ -2238,4 +2288,6 @@ c_t generic_dot_product(const std::vector<a_t>& a, const std::vector<b_t>& b, c_
#define P_SET_OV(ov) \ #define P_SET_OV(ov) \
if (ov) P.VU.vxsat->write(1); if (ov) P.VU.vxsat->write(1);
#define DO_ABD(N, M) ((N) > (M) ? (N) - (M) : (M) - (N))
#endif #endif

110
riscv/zvzip_ext_macros.h

@ -0,0 +1,110 @@
#ifndef RISCV_ZVZIP_MACROS_H_
#define RISCV_ZVZIP_MACROS_H_
#define require_zvzip \
do { \
require_vector(true); \
require_extension(EXT_ZVZIP); \
} while (0)
#define ZVZIP_EXTRACT_V(x, VS2_IDX) \
type_sew_t<x>::type UNUSED &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
vd = P.VU.elt<type_sew_t<x>::type>(rs2_num, VS2_IDX);
#define ZVZIP_EXTRACT_VV(x, VS1_IDX, VS2_IDX) \
type_sew_t<x>::type UNUSED &vd = P.VU.elt<type_sew_t<x>::type>(rd_num, i, true); \
vd = (i & 1) \
? ((VS1_IDX) >= P.VU.vlmax ? 0 : P.VU.elt<type_sew_t<x>::type>(rs1_num, VS1_IDX)) \
: ((VS2_IDX) >= P.VU.vlmax ? 0 : P.VU.elt<type_sew_t<x>::type>(rs2_num, VS2_IDX));
#define VI_VZIP_VV_CHECK \
require_vector(true); \
require(P.VU.vflmul <= 4); \
require_align(insn.rd(), P.VU.vflmul * 2); \
require_align(insn.rs2(), P.VU.vflmul); \
require_align(insn.rs1(), P.VU.vflmul); \
require_vm; \
if (P.VU.vflmul < 1) { \
require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \
require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \
} else { \
require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \
require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \
}
#define VI_VZIP_VV_LOOP_BASE \
require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \
reg_t vl = P.VU.vl->read() * 2; \
reg_t UNUSED sew = P.VU.vsew; \
reg_t UNUSED rd_num = insn.rd(); \
reg_t UNUSED rs1_num = insn.rs1(); \
reg_t rs2_num = insn.rs2(); \
for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \
VI_LOOP_ELEMENT_SKIP();
#define VI_VZIP_VV_LOOP_END \
} \
P.VU.vstart->write(0);
#define VI_VZIP_VV_LOOP(VS1_IDX, VS2_IDX) \
VI_VZIP_VV_CHECK \
VI_VZIP_VV_LOOP_BASE \
if (sew == e8) { \
ZVZIP_EXTRACT_VV(e8, VS1_IDX, VS2_IDX); \
} else if (sew == e16) { \
ZVZIP_EXTRACT_VV(e16, VS1_IDX, VS2_IDX); \
} else if (sew == e32) { \
ZVZIP_EXTRACT_VV(e32, VS1_IDX, VS2_IDX); \
} else if (sew == e64) { \
ZVZIP_EXTRACT_VV(e64, VS1_IDX, VS2_IDX); \
} \
VI_VZIP_VV_LOOP_END
#define VI_VUNZIP_V_CHECK \
require_vector(true); \
require(P.VU.vflmul <= 4); \
require_align(insn.rs2(), P.VU.vflmul * 2); \
require_align(insn.rd(), P.VU.vflmul); \
require_vm; \
if (insn.rd() != insn.rs2()) { \
require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \
}
#define VI_VUNZIP_V_LOOP(VS2_IDX) \
VI_VUNZIP_V_CHECK \
VI_LOOP_BASE \
if (sew == e8) { \
ZVZIP_EXTRACT_V(e8, VS2_IDX); \
} else if (sew == e16) { \
ZVZIP_EXTRACT_V(e16, VS2_IDX); \
} else if (sew == e32) { \
ZVZIP_EXTRACT_V(e32, VS2_IDX); \
} else if (sew == e64) { \
ZVZIP_EXTRACT_V(e64, VS2_IDX); \
} \
VI_LOOP_END
#define VI_VPAIR_VV_CHECK \
require_vector(true); \
require_align(insn.rd(), P.VU.vflmul); \
require_align(insn.rs2(), P.VU.vflmul); \
require_align(insn.rs1(), P.VU.vflmul); \
require_vm; \
require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul); \
require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), P.VU.vflmul);
#define VI_VPAIR_VV_LOOP(VS1_IDX, VS2_IDX) \
VI_VPAIR_VV_CHECK \
VI_LOOP_BASE \
if (sew == e8) { \
ZVZIP_EXTRACT_VV(e8, VS1_IDX, VS2_IDX); \
} else if (sew == e16) { \
ZVZIP_EXTRACT_VV(e16, VS1_IDX, VS2_IDX); \
} else if (sew == e32) { \
ZVZIP_EXTRACT_VV(e32, VS1_IDX, VS2_IDX); \
} else if (sew == e64) { \
ZVZIP_EXTRACT_VV(e64, VS1_IDX, VS2_IDX); \
} \
VI_LOOP_END
#endif
Loading…
Cancel
Save