Browse Source

Zvk: Infrastructure for Zvk extensions, element group handling

Introduce types and macros useful across multiple Zvk sub-extensions,
including Zvbb and Zvbc. Those will be used by upcoming
per-sub-extension commits.

In particular we introduce "Element Group" types and loop macros handling
those element groups. The concept of element group is described in
<https://github.com/riscv/riscv-crypto/blob/master/doc/vector/riscv-crypto-vector-element-groups.adoc>.

Note that the element group access method is not implemented
for WORDS_BIGENDIAN setup. As such, isa_parser.cc is modified to emit
an error when WORDS_BIGENDIAN is defined and extensions using element
groups are enabled.

Signed-off-by: Eric Gouriou <ego@rivosinc.com>
pull/1303/head
Eric Gouriou 3 years ago
parent
commit
d5c0339484
No known key found for this signature in database GPG Key ID: 9654303189395784
  1. 21
      riscv/arith.h
  2. 10
      riscv/isa_parser.cc
  3. 22
      riscv/v_ext_macros.h
  4. 55
      riscv/vector_unit.cc
  5. 19
      riscv/vector_unit.h
  6. 1023
      riscv/zvk_ext_macros.h

21
riscv/arith.h

@ -7,6 +7,7 @@
#include <cstdint>
#include <climits>
#include <cstddef>
#include <type_traits>
inline uint64_t mulhu(uint64_t a, uint64_t b)
{
@ -221,4 +222,24 @@ static inline uint64_t xperm(uint64_t rs1, uint64_t rs2, size_t sz_log2, size_t
return r;
}
// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_right(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t rshift = shiftamt & mask;
const std::size_t lshift = (-rshift) & mask;
return (x << lshift) | (x >> rshift);
}
// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_left(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t lshift = shiftamt & mask;
const std::size_t rshift = (-lshift) & mask;
return (x << lshift) | (x >> rshift);
}
#endif

10
riscv/isa_parser.cc

@ -361,7 +361,15 @@ isa_parser_t::isa_parser_t(const char* str, const char *priv)
(extension_table[EXT_ZVKG] || extension_table[EXT_ZVKNED] || extension_table[EXT_ZVKSH])) {
bad_isa_string(str, "'Zvkg', 'Zvkned', and 'Zvksh' extensions are incompatible with 'Zpn' extension in rv64");
}
#ifdef WORDS_BIGENDIAN
// Access to the vector registers as element groups is unimplemented on big-endian setups.
if (extension_table[EXT_ZVKG] || extension_table[EXT_ZVKNHA] || extension_table[EXT_ZVKNHB] ||
extension_table[EXT_ZVKSED] || extension_table[EXT_ZVKSH]) {
bad_isa_string(str,
"'Zvkg', 'Zvkned', 'Zvknha', 'Zvknhb', 'Zvksed', and 'Zvksh' "
"extensions are incompatible with WORDS_BIGENDIAN setups.");
}
#endif
std::string lowercase = strtolower(priv);
bool user = false, supervisor = false;

22
riscv/v_ext_macros.h

@ -325,6 +325,10 @@ static inline bool is_overlapped_widen(const int astart, int asize,
type_usew_t<x>::type vs1 = P.VU.elt<type_usew_t<x>::type>(rs1_num, i); \
type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
#define V_U_PARAMS(x) \
type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i, true); \
type_usew_t<x>::type vs2 = P.VU.elt<type_usew_t<x>::type>(rs2_num, i);
#define VX_U_PARAMS(x) \
type_usew_t<x>::type &vd = P.VU.elt<type_usew_t<x>::type>(rd_num, i, true); \
type_usew_t<x>::type rs1 = (type_usew_t<x>::type)RS1; \
@ -693,6 +697,24 @@ static inline bool is_overlapped_widen(const int astart, int asize,
} \
VI_LOOP_END
#define VI_V_ULOOP(BODY) \
VI_CHECK_SSS(false) \
VI_LOOP_BASE \
if (sew == e8) { \
V_U_PARAMS(e8); \
BODY; \
} else if (sew == e16) { \
V_U_PARAMS(e16); \
BODY; \
} else if (sew == e32) { \
V_U_PARAMS(e32); \
BODY; \
} else if (sew == e64) { \
V_U_PARAMS(e64); \
BODY; \
} \
VI_LOOP_END
#define VI_VX_ULOOP(BODY) \
VI_CHECK_SSS(false) \
VI_LOOP_BASE \

55
riscv/vector_unit.cc

@ -86,6 +86,56 @@ template<class T> T& vectorUnit_t::elt(reg_t vReg, reg_t n, bool UNUSED is_write
return regStart[n];
}
// The logic differences between 'elt()' and 'elt_group()' come from
// the fact that, while 'elt()' requires that the element is fully
// contained in a single vector register, the element group may span
// multiple registers in a single register group (LMUL>1).
//
// Notes:
// - We do NOT check that a single element - i.e., the T in the element
// group type std::array<T, N> - fits within a single register, or that
// T is smaller or equal to VSEW. Implementations of the instructions
// sometimes use a different T than what the specification suggests.
// Instructon implementations should 'require()' what the specification
// dictates.
// - We do NOT check that 'vReg' is a valid register group, or that
// 'n+1' element groups fit in the register group 'vReg'. It is
// the responsibility of the caller to validate those preconditions.
template<typename EG> EG&
vectorUnit_t::elt_group(reg_t vReg, reg_t n, bool UNUSED is_write) {
#ifdef WORDS_BIGENDIAN
fputs("vectorUnit_t::elt_group is not compatible with WORDS_BIGENDIAN setup.\n",
stderr);
abort();
#endif
using T = typename EG::value_type;
constexpr std::size_t N = std::tuple_size<EG>::value;
assert(N > 0);
assert(vsew != 0);
constexpr reg_t elt_group_size = N * sizeof(T);
const reg_t reg_group_size = (VLEN >> 3) * vflmul;
assert(((n + 1) * elt_group_size) <= reg_group_size);
const reg_t start_byte = n * elt_group_size;
const reg_t bytes_per_reg = VLEN >> 3;
// Inclusive first/last register indices.
const reg_t reg_first = vReg + start_byte / bytes_per_reg;
const reg_t reg_last = vReg + (start_byte + elt_group_size - 1) / bytes_per_reg;
// Element groups per register groups
for (reg_t vidx = reg_first; vidx <= reg_last; ++vidx) {
reg_referenced[vidx] = 1;
if (unlikely(p->get_log_commits_enabled() && is_write)) {
p->get_state()->log_reg_write[(vidx << 4) | 2] = {0, 0};
}
}
return *(EG*)((char*)reg_file + vReg * (VLEN >> 3) + start_byte);
}
template signed char& vectorUnit_t::elt<signed char>(reg_t, reg_t, bool);
template short& vectorUnit_t::elt<short>(reg_t, reg_t, bool);
template int& vectorUnit_t::elt<int>(reg_t, reg_t, bool);
@ -98,3 +148,8 @@ template uint64_t& vectorUnit_t::elt<uint64_t>(reg_t, reg_t, bool);
template float16_t& vectorUnit_t::elt<float16_t>(reg_t, reg_t, bool);
template float32_t& vectorUnit_t::elt<float32_t>(reg_t, reg_t, bool);
template float64_t& vectorUnit_t::elt<float64_t>(reg_t, reg_t, bool);
template EGU32x4_t& vectorUnit_t::elt_group<EGU32x4_t>(reg_t, reg_t, bool);
template EGU32x8_t& vectorUnit_t::elt_group<EGU32x8_t>(reg_t, reg_t, bool);
template EGU64x4_t& vectorUnit_t::elt_group<EGU64x4_t>(reg_t, reg_t, bool);
template EGU8x16_t& vectorUnit_t::elt_group<EGU8x16_t>(reg_t, reg_t, bool);

19
riscv/vector_unit.h

@ -2,6 +2,9 @@
#ifndef _RISCV_VECTOR_UNIT_H
#define _RISCV_VECTOR_UNIT_H
#include <array>
#include <cstdint>
#include "decode.h"
#include "csrs.h"
@ -69,6 +72,17 @@ struct type_sew_t<64>
using type=int64_t;
};
// Element Group of 4 32 bits elements (128b total).
using EGU32x4_t = std::array<uint32_t, 4>;
// Element Group of 8 32 bits elements (256b total).
using EGU32x8_t = std::array<uint32_t, 8>;
// Element Group of 4 64 bits elements (256b total).
using EGU64x4_t = std::array<uint64_t, 4>;
// Element Group of 16 8 bits elements (128b total).
using EGU8x16_t = std::array<uint8_t, 16>;
class vectorUnit_t
{
@ -88,8 +102,11 @@ public:
bool vill;
bool vstart_alu;
// vector element for varies SEW
// vector element for various SEW
template<class T> T& elt(reg_t vReg, reg_t n, bool is_write = false);
// vector element group access, where EG is a std::array<T, N>.
template<typename EG> EG&
elt_group(reg_t vReg, reg_t n, bool is_write = false);
public:

1023
riscv/zvk_ext_macros.h

File diff suppressed because it is too large
Loading…
Cancel
Save