diff --git a/README.md b/README.md index 0639d727..42ee21e5 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,7 @@ Spike supports the following RISC-V ISA features: - Zfbfmin extension, v0.6 - Zvfbfmin extension, v0.6 - Zvfbfwma extension, v0.6 + - Zvabd extension, v0.7 - Zvbb extension, v1.0 - Zvbc extension, v1.0 - Zvkg extension, v1.0 @@ -76,6 +77,7 @@ Spike supports the following RISC-V ISA features: - Zvkt extension, v1.0 - Zvkn, Zvknc, Zvkng extension, v1.0 - Zvks, Zvksc, Zvksg extension, v1.0 + - Zvzip extension, v0.1 - Zicond extension, v1.0 - Zilsd extension, v1.0 - Zclsd extension, v1.0 diff --git a/disasm/disasm.cc b/disasm/disasm.cc index 63468ce9..df56ff50 100644 --- a/disasm/disasm.cc +++ b/disasm/disasm.cc @@ -2175,6 +2175,22 @@ void disassembler_t::add_instructions(const isa_parser_t* isa, bool strict) DEFINE_R1TYPE(sm3p1); } + if (ext_enabled(EXT_ZVABD)) { + DEFINE_VECTOR_V(vabs_v); + DEFINE_VECTOR_VV(vabd_vv); + DEFINE_VECTOR_VV(vabdu_vv); + DEFINE_VECTOR_MULTIPLYADD_VV(vwabda_vv); + DEFINE_VECTOR_MULTIPLYADD_VV(vwabdau_vv); + } + + if (ext_enabled(EXT_ZVZIP)) { + DEFINE_VECTOR_VV(vzip_vv); + DEFINE_VECTOR_V(vunzipe_v); + DEFINE_VECTOR_V(vunzipo_v); + DEFINE_VECTOR_VV(vpaire_vv); + DEFINE_VECTOR_VV(vpairo_vv); + } + if (ext_enabled(EXT_ZVBB)) { #define DEFINE_VECTOR_VIU_ZIMM6(code) \ add_vector_viu_z6_insn(this, #code, match_##code, mask_##code) diff --git a/disasm/isa_parser.cc b/disasm/isa_parser.cc index 448b4684..4f48e5f9 100644 --- a/disasm/isa_parser.cc +++ b/disasm/isa_parser.cc @@ -244,6 +244,8 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str) extension_table[EXT_ZCLSD] = true; extension_table[EXT_ZCA] = true; extension_table[EXT_ZILSD] = true; + } else if (ext_str == "zvabd") { + extension_table[EXT_ZVABD] = true; } else if (ext_str == "zvkb") { extension_table[EXT_ZVKB] = true; } else if (ext_str == "zvbb") { @@ -262,18 +264,15 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str) extension_table[EXT_ZVKG] = true; } else if (ext_str == "zvkn") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvknc") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvkng") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKNED] = true; extension_table[EXT_ZVKNHB] = true; @@ -285,18 +284,15 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str) extension_table[EXT_ZVKNHB] = true; } else if (ext_str == "zvks") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvksc") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVBC] = true; extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSH] = true; } else if (ext_str == "zvksg") { extension_table[EXT_ZVKB] = true; - extension_table[EXT_ZVBB] = true; extension_table[EXT_ZVKG] = true; extension_table[EXT_ZVKSED] = true; extension_table[EXT_ZVKSH] = true; @@ -325,6 +321,8 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str) } else if (ext_str == "zvfwldot16bf") { extension_table[EXT_ZVFWLDOT16BF] = true; } else if (ext_str == "zvkt") { + } else if (ext_str == "zvzip") { + extension_table[EXT_ZVZIP] = true; } else if (ext_str == "sstc") { extension_table[EXT_SSTC] = true; } else if (ext_str == "smcsrind") { @@ -406,6 +404,7 @@ void isa_parser_t::add_extension(const std::string& ext_str, const char* str) } else if (ext_str == "ssaia") { extension_table[EXT_SSAIA] = true; extension_table[EXT_SSCSRIND] = true; + } else if (ext_str == "svvptc") { } else if (ext_str[0] == 'x') { extension_table['X'] = true; if (ext_str.size() == 1) { diff --git a/fesvr/htif.cc b/fesvr/htif.cc index 15f79bf5..267bb498 100644 --- a/fesvr/htif.cc +++ b/fesvr/htif.cc @@ -20,6 +20,11 @@ #include #include #include +#include +#include +#ifdef __APPLE__ +#include +#endif /* Attempt to determine the execution prefix automatically. autoconf * sets PREFIX, and pconfigure sets __PCONFIGURE__PREFIX. */ @@ -35,6 +40,10 @@ # define TARGET_DIR "/" TARGET_ARCH "/bin/" #endif +#ifndef PROC_SELF_EXE +# define PROC_SELF_EXE "/proc/self/exe" +#endif + static volatile bool signal_exit = false; static void handle_signal(int sig) { @@ -107,6 +116,20 @@ static void bad_address(const std::string& situation, reg_t addr) exit(-1); } +static std::string get_prefix_from_arg0() { + char exe_path[PATH_MAX]; +#ifdef __APPLE__ + uint32_t bufsize = PATH_MAX - 1; + ssize_t len = _NSGetExecutablePath(exe_path, &bufsize) == 0 ? bufsize : -1; +#else + ssize_t len = readlink(PROC_SELF_EXE, exe_path, PATH_MAX - 1); +#endif + if (len == -1) + return PREFIX; + exe_path[len] = '\0'; + return std::string(dirname(exe_path)) + "/.."; +} + std::map htif_t::load_payload(const std::string& payload, reg_t* entry, reg_t load_offset) { std::string path; @@ -114,14 +137,15 @@ std::map htif_t::load_payload(const std::string& payload, path = payload; else if (payload.find('/') == std::string::npos) { - std::string test_path = PREFIX TARGET_DIR + payload; + std::string prefix = get_prefix_from_arg0(); + std::string test_path = prefix + TARGET_DIR + payload; if (access(test_path.c_str(), F_OK) == 0) path = test_path; else throw std::runtime_error( "could not open " + payload + "; searched paths:\n" + "\t. (current directory)\n" + - "\t" + PREFIX TARGET_DIR + " (based on configured --prefix and --with-target)" + "\t" + prefix + TARGET_DIR + " (based on configured --prefix and --with-target)" ); } diff --git a/riscv/csrs.cc b/riscv/csrs.cc index 27c26d3a..476fba17 100644 --- a/riscv/csrs.cc +++ b/riscv/csrs.cc @@ -1787,10 +1787,12 @@ reg_t scountovf_csr_t::read() const noexcept { val |= of << (i + FIRST_HPMCOUNTER); } - /* In M and S modes, scountovf bit X is readable when mcounteren bit X is set, */ + /* In M-mode, scountovf bit X is always readable. */ + /* In S/HS-mode, scountovf bit X is readable when mcounteren bit X is set, */ /* and otherwise reads as zero. Similarly, in VS mode, scountovf bit X is readable */ /* when mcounteren bit X and hcounteren bit X are both set, and otherwise reads as zero. */ - val &= state->mcounteren->read(); + if (state->prv < PRV_M) + val &= state->mcounteren->read(); if (state->v) val &= state->hcounteren->read(); return val; diff --git a/riscv/dts.cc b/riscv/dts.cc index 5be9d57f..aee7903e 100644 --- a/riscv/dts.cc +++ b/riscv/dts.cc @@ -122,11 +122,11 @@ static std::string dtc_compile(const std::string& dtc_input, bool compile) step = write(dtc_input_pipe[1], buf+done, len-done); if (step == -1) { std::cerr << "Failed to write dtc_input: " << strerror(errno) << std::endl; - exit(1); + _exit(1); } } close(dtc_input_pipe[1]); - exit(0); + _exit(0); } pid_t dtc_output_pid; @@ -146,7 +146,7 @@ static std::string dtc_compile(const std::string& dtc_input, bool compile) close(dtc_output_pipe[1]); execlp(DTC, DTC, "-O", output_type, "-I", input_type, nullptr); std::cerr << "Failed to run " DTC ": " << strerror(errno) << std::endl; - exit(1); + _exit(1); } close(dtc_input_pipe[1]); diff --git a/riscv/insns/vabd_vv.h b/riscv/insns/vabd_vv.h new file mode 100644 index 00000000..ea4797f8 --- /dev/null +++ b/riscv/insns/vabd_vv.h @@ -0,0 +1,9 @@ +// vabd.vv vd, vs1, vs2, vm + +require_zvabd; +require(P.VU.vsew <= e16); + +VI_VV_LOOP +({ + vd = DO_ABD(vs1, vs2); +}) diff --git a/riscv/insns/vabdu_vv.h b/riscv/insns/vabdu_vv.h new file mode 100644 index 00000000..183ba334 --- /dev/null +++ b/riscv/insns/vabdu_vv.h @@ -0,0 +1,9 @@ +// vabdu.vv vd, vs1, vs2, vm + +require_zvabd; +require(P.VU.vsew <= e16); + +VI_VV_ULOOP +({ + vd = DO_ABD(vs1, vs2); +}) diff --git a/riscv/insns/vabs_v.h b/riscv/insns/vabs_v.h new file mode 100644 index 00000000..f175eb04 --- /dev/null +++ b/riscv/insns/vabs_v.h @@ -0,0 +1,8 @@ +// vabs.v vd, vs2, vm + +require_zvabd; + +VI_V_LOOP +({ + vd = vs2 > 0 ? vs2 : -vs2; +}) diff --git a/riscv/insns/vpaire_vv.h b/riscv/insns/vpaire_vv.h new file mode 100644 index 00000000..960b134a --- /dev/null +++ b/riscv/insns/vpaire_vv.h @@ -0,0 +1,7 @@ +// vunzipe.vv vd, vs1, vs2, vm + +#include "zvzip_ext_macros.h" + +require_zvzip; + +VI_VPAIR_VV_LOOP(i - 1, i) diff --git a/riscv/insns/vpairo_vv.h b/riscv/insns/vpairo_vv.h new file mode 100644 index 00000000..449a493f --- /dev/null +++ b/riscv/insns/vpairo_vv.h @@ -0,0 +1,7 @@ +// vunzipo.vv vd, vs1, vs2, vm + +#include "zvzip_ext_macros.h" + +require_zvzip; + +VI_VPAIR_VV_LOOP(i, i + 1) diff --git a/riscv/insns/vunzipe_v.h b/riscv/insns/vunzipe_v.h new file mode 100644 index 00000000..7415c363 --- /dev/null +++ b/riscv/insns/vunzipe_v.h @@ -0,0 +1,7 @@ +// vunzipe.v vd, vs2, vm + +#include "zvzip_ext_macros.h" + +require_zvzip; + +VI_VUNZIP_V_LOOP(i << 1) diff --git a/riscv/insns/vunzipo_v.h b/riscv/insns/vunzipo_v.h new file mode 100644 index 00000000..e2950d92 --- /dev/null +++ b/riscv/insns/vunzipo_v.h @@ -0,0 +1,7 @@ +// vunzipo.v vd, vs2, vm + +#include "zvzip_ext_macros.h" + +require_zvzip; + +VI_VUNZIP_V_LOOP((i << 1) + 1) diff --git a/riscv/insns/vwabda_vv.h b/riscv/insns/vwabda_vv.h new file mode 100644 index 00000000..ef8face0 --- /dev/null +++ b/riscv/insns/vwabda_vv.h @@ -0,0 +1,10 @@ +// vwabda.vv vd, vs2, vs1, vm + +require_zvabd; +require(P.VU.vsew <= e16); +VI_CHECK_DSS(true); + +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_MACRO_AND_ASSIGN(vs2, vs1, vd_w, DO_ABD, int); +}) diff --git a/riscv/insns/vwabdau_vv.h b/riscv/insns/vwabdau_vv.h new file mode 100644 index 00000000..fd662543 --- /dev/null +++ b/riscv/insns/vwabdau_vv.h @@ -0,0 +1,10 @@ +// vwabdau.vv vd, vs2, vs1, vm + +require_zvabd; +require(P.VU.vsew <= e16); +VI_CHECK_DSS(true); + +VI_VV_LOOP_WIDEN +({ + VI_WIDE_OP_MACRO_AND_ASSIGN(vs2, vs1, vd_w, DO_ABD, uint); +}) diff --git a/riscv/insns/vzip_vv.h b/riscv/insns/vzip_vv.h new file mode 100644 index 00000000..7d2b7855 --- /dev/null +++ b/riscv/insns/vzip_vv.h @@ -0,0 +1,7 @@ +// vzip.vv vd, vs1, vs2, vm + +#include "zvzip_ext_macros.h" + +require_zvzip; + +VI_VZIP_VV_LOOP(i >> 1, i >> 1) diff --git a/riscv/isa_parser.h b/riscv/isa_parser.h index 2fc0f3aa..ada74c76 100644 --- a/riscv/isa_parser.h +++ b/riscv/isa_parser.h @@ -66,6 +66,7 @@ typedef enum { EXT_ZICOND, EXT_ZIHPM, EXT_ZILSD, + EXT_ZVABD, EXT_ZVBB, EXT_ZVKB, EXT_ZVBC, @@ -87,6 +88,7 @@ typedef enum { EXT_ZVQLDOT16I, EXT_ZVFQLDOT8F, EXT_ZVFWLDOT16BF, + EXT_ZVZIP, EXT_SSTC, EXT_ZAAMO, EXT_ZALRSC, diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index ae87a4d7..a996b889 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -1136,6 +1136,20 @@ riscv_insn_ext_zvk = \ $(riscv_insn_ext_zvksed) \ $(riscv_insn_ext_zvksh) \ +riscv_insn_ext_zvabd = \ + vabs_v \ + vabd_vv \ + vabdu_vv \ + vwabda_vv \ + vwabdau_vv \ + +riscv_insn_ext_zvzip = \ + vzip_vv \ + vunzipe_v \ + vunzipo_v \ + vpaire_vv \ + vpairo_vv \ + riscv_insn_list = \ $(riscv_insn_ext_i) \ $(riscv_insn_ext_c) \ @@ -1166,6 +1180,8 @@ riscv_insn_list = \ $(riscv_insn_ext_zvk) \ $(riscv_insn_ext_zvbdot) \ $(riscv_insn_ext_zvldot) \ + $(riscv_insn_ext_zvabd) \ + $(riscv_insn_ext_zvzip) \ $(riscv_insn_priv) \ $(riscv_insn_smrnmi) \ $(riscv_insn_svinval) \ diff --git a/riscv/v_ext_macros.h b/riscv/v_ext_macros.h index e96fc122..2418b336 100644 --- a/riscv/v_ext_macros.h +++ b/riscv/v_ext_macros.h @@ -72,6 +72,12 @@ static inline bool is_overlapped_widen(const int astart, int asize, #define require_zvfbfa_or_zvfhmin \ require_extension(P.VU.altfmt ? EXT_ZVFBFA : EXT_ZVFHMIN); \ +#define require_zvabd \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVABD); \ + } while (0) + #define VI_NARROW_CHECK_COMMON(factor) \ require_vector(true); \ require(P.VU.vflmul <= (8 / factor)); \ @@ -353,6 +359,10 @@ static inline bool is_overlapped_widen(const int astart, int asize, type_sew_t::type vs1 = P.VU.elt::type>(rs1_num, i); \ type_sew_t::type UNUSED vs2 = P.VU.elt::type>(rs2_num, i); +#define V_PARAMS(x) \ + type_sew_t::type &vd = P.VU.elt::type>(rd_num, i, true); \ + type_sew_t::type vs2 = P.VU.elt::type>(rs2_num, i); + #define VX_PARAMS(x) \ type_sew_t::type UNUSED &vd = P.VU.elt::type>(rd_num, i, true); \ type_sew_t::type rs1 = (type_sew_t::type)RS1; \ @@ -723,6 +733,24 @@ static inline bool is_overlapped_widen(const int astart, int asize, } \ VI_LOOP_END +#define VI_V_LOOP(BODY) \ + VI_CHECK_SSS(false) \ + VI_LOOP_BASE \ + if (sew == e8) { \ + V_PARAMS(e8); \ + BODY; \ + } else if (sew == e16) { \ + V_PARAMS(e16); \ + BODY; \ + } else if (sew == e32) { \ + V_PARAMS(e32); \ + BODY; \ + } else if (sew == e64) { \ + V_PARAMS(e64); \ + BODY; \ + } \ + VI_LOOP_END + #define VI_VX_ULOOP(BODY) \ VI_CHECK_SSS(false) \ VI_LOOP_BASE \ @@ -974,6 +1002,28 @@ static inline bool is_overlapped_widen(const int astart, int asize, break; \ } +#define VI_WIDE_OP_MACRO_AND_ASSIGN(var0, var1, var2, op, sign) \ + switch (P.VU.vsew) { \ + case e8: { \ + sign##16_t UNUSED vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op((sign##16_t)(sign##8_t)var0, (sign##16_t)(sign##8_t)var1) + var2; \ + } \ + break; \ + case e16: { \ + sign##32_t UNUSED vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op((sign##32_t)(sign##16_t)var0, (sign##32_t)(sign##16_t)var1) + var2; \ + } \ + break; \ + default: { \ + sign##64_t UNUSED vd_w = P.VU.elt(rd_num, i); \ + P.VU.elt(rd_num, i, true) = \ + op((sign##64_t)(sign##32_t)var0, (sign##64_t)(sign##32_t)var1) + var2; \ + } \ + break; \ + } + #define VI_WIDE_OP_AND_ASSIGN_MIX(var0, var1, var2, op0, op1, sign_d, sign_1, sign_2) \ switch (P.VU.vsew) { \ case e8: { \ @@ -2195,9 +2245,9 @@ c_t generic_dot_product(const std::vector& a, const std::vector& b, c_ } #define ZVLDOT_LOOP(a_t, b_t, c_t, dot) \ - std::vector a(P.VU.vl->read(), a_t()); \ - std::vector b(P.VU.vl->read(), b_t()); \ - for (reg_t i = 0; i < a.size(); i++) { \ + std::vector a(P.VU.vlmax, a_t()); \ + std::vector b(P.VU.vlmax, b_t()); \ + for (reg_t i = 0, vl = P.VU.vl->read(); i < vl; i++) { \ VI_LOOP_ELEMENT_SKIP(); \ a[i] = P.VU.elt(insn.rs1(), i); \ b[i] = P.VU.elt(insn.rs2(), i); \ @@ -2217,9 +2267,9 @@ c_t generic_dot_product(const std::vector& a, const std::vector& b, c_ for (reg_t idx = 0; idx < 8; idx++) { \ reg_t i = ci + idx; \ VI_LOOP_ELEMENT_SKIP(); \ - std::vector a(P.VU.vl->read(), a_t()); \ - std::vector b(P.VU.vl->read(), b_t()); \ - for (reg_t k = 0; k < a.size(); k++) { \ + std::vector a(P.VU.vlmax, a_t()); \ + std::vector b(P.VU.vlmax, b_t()); \ + for (reg_t k = 0, vl = P.VU.vl->read(); k < vl; k++) { \ a[k] = P.VU.elt(insn.rs1(), k); \ b[k] = P.VU.elt(vs2 + idx, k); \ } \ @@ -2238,4 +2288,6 @@ c_t generic_dot_product(const std::vector& a, const std::vector& b, c_ #define P_SET_OV(ov) \ if (ov) P.VU.vxsat->write(1); +#define DO_ABD(N, M) ((N) > (M) ? (N) - (M) : (M) - (N)) + #endif diff --git a/riscv/zvzip_ext_macros.h b/riscv/zvzip_ext_macros.h new file mode 100644 index 00000000..745b3b8e --- /dev/null +++ b/riscv/zvzip_ext_macros.h @@ -0,0 +1,110 @@ +#ifndef RISCV_ZVZIP_MACROS_H_ +#define RISCV_ZVZIP_MACROS_H_ + +#define require_zvzip \ + do { \ + require_vector(true); \ + require_extension(EXT_ZVZIP); \ + } while (0) + +#define ZVZIP_EXTRACT_V(x, VS2_IDX) \ + type_sew_t::type UNUSED &vd = P.VU.elt::type>(rd_num, i, true); \ + vd = P.VU.elt::type>(rs2_num, VS2_IDX); + +#define ZVZIP_EXTRACT_VV(x, VS1_IDX, VS2_IDX) \ + type_sew_t::type UNUSED &vd = P.VU.elt::type>(rd_num, i, true); \ + vd = (i & 1) \ + ? ((VS1_IDX) >= P.VU.vlmax ? 0 : P.VU.elt::type>(rs1_num, VS1_IDX)) \ + : ((VS2_IDX) >= P.VU.vlmax ? 0 : P.VU.elt::type>(rs2_num, VS2_IDX)); + +#define VI_VZIP_VV_CHECK \ + require_vector(true); \ + require(P.VU.vflmul <= 4); \ + require_align(insn.rd(), P.VU.vflmul * 2); \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_align(insn.rs1(), P.VU.vflmul); \ + require_vm; \ + if (P.VU.vflmul < 1) { \ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + require_noover(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } else { \ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs2(), P.VU.vflmul); \ + require_noover_widen(insn.rd(), P.VU.vflmul * 2, insn.rs1(), P.VU.vflmul); \ + } + +#define VI_VZIP_VV_LOOP_BASE \ + require(P.VU.vsew >= e8 && P.VU.vsew <= e64); \ + reg_t vl = P.VU.vl->read() * 2; \ + reg_t UNUSED sew = P.VU.vsew; \ + reg_t UNUSED rd_num = insn.rd(); \ + reg_t UNUSED rs1_num = insn.rs1(); \ + reg_t rs2_num = insn.rs2(); \ + for (reg_t i = P.VU.vstart->read(); i < vl; ++i) { \ + VI_LOOP_ELEMENT_SKIP(); + +#define VI_VZIP_VV_LOOP_END \ + } \ + P.VU.vstart->write(0); + +#define VI_VZIP_VV_LOOP(VS1_IDX, VS2_IDX) \ + VI_VZIP_VV_CHECK \ + VI_VZIP_VV_LOOP_BASE \ + if (sew == e8) { \ + ZVZIP_EXTRACT_VV(e8, VS1_IDX, VS2_IDX); \ + } else if (sew == e16) { \ + ZVZIP_EXTRACT_VV(e16, VS1_IDX, VS2_IDX); \ + } else if (sew == e32) { \ + ZVZIP_EXTRACT_VV(e32, VS1_IDX, VS2_IDX); \ + } else if (sew == e64) { \ + ZVZIP_EXTRACT_VV(e64, VS1_IDX, VS2_IDX); \ + } \ + VI_VZIP_VV_LOOP_END + +#define VI_VUNZIP_V_CHECK \ + require_vector(true); \ + require(P.VU.vflmul <= 4); \ + require_align(insn.rs2(), P.VU.vflmul * 2); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_vm; \ + if (insn.rd() != insn.rs2()) { \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul * 2); \ + } + +#define VI_VUNZIP_V_LOOP(VS2_IDX) \ + VI_VUNZIP_V_CHECK \ + VI_LOOP_BASE \ + if (sew == e8) { \ + ZVZIP_EXTRACT_V(e8, VS2_IDX); \ + } else if (sew == e16) { \ + ZVZIP_EXTRACT_V(e16, VS2_IDX); \ + } else if (sew == e32) { \ + ZVZIP_EXTRACT_V(e32, VS2_IDX); \ + } else if (sew == e64) { \ + ZVZIP_EXTRACT_V(e64, VS2_IDX); \ + } \ + VI_LOOP_END + +#define VI_VPAIR_VV_CHECK \ + require_vector(true); \ + require_align(insn.rd(), P.VU.vflmul); \ + require_align(insn.rs2(), P.VU.vflmul); \ + require_align(insn.rs1(), P.VU.vflmul); \ + require_vm; \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs2(), P.VU.vflmul); \ + require_noover(insn.rd(), P.VU.vflmul, insn.rs1(), P.VU.vflmul); + +#define VI_VPAIR_VV_LOOP(VS1_IDX, VS2_IDX) \ + VI_VPAIR_VV_CHECK \ + VI_LOOP_BASE \ + if (sew == e8) { \ + ZVZIP_EXTRACT_VV(e8, VS1_IDX, VS2_IDX); \ + } else if (sew == e16) { \ + ZVZIP_EXTRACT_VV(e16, VS1_IDX, VS2_IDX); \ + } else if (sew == e32) { \ + ZVZIP_EXTRACT_VV(e32, VS1_IDX, VS2_IDX); \ + } else if (sew == e64) { \ + ZVZIP_EXTRACT_VV(e64, VS1_IDX, VS2_IDX); \ + } \ + VI_LOOP_END + +#endif