Browse Source
Implement the Zvksh sub-extension, "ShangMi Suite: SM3 Hash Function Instructions": - vsm3me.vv, message expansion, - vsm3c.vi, compression rounds. This also introduces a SM3 specific header for common logic. Co-authored-by: Raghav Gupta <rgupta@rivosinc.com> Co-authored-by: Albert Jakieła <aja@semihalf.com> Co-authored-by: Kornel Dulęba <mindal@semihalf.com> Signed-off-by: Eric Gouriou <ego@rivosinc.com>pull/1303/head
4 changed files with 151 additions and 0 deletions
@ -0,0 +1,60 @@ |
|||
// vsm3c.vi vd, vs2, rnd
|
|||
|
|||
#include "zvksh_ext_macros.h" |
|||
|
|||
require_vsm3_constraints; |
|||
|
|||
VI_ZVK_VD_VS2_ZIMM5_EGU32x8_NOVM_LOOP( |
|||
{}, |
|||
// No need to validate or normalize 'zimm5' here as this is a 5 bits value
|
|||
// and all values in 0-31 are valid.
|
|||
const reg_t round = zimm5;, |
|||
{ |
|||
// {H, G, F, E, D, C, B, A} <- vd
|
|||
EXTRACT_EGU32x8_WORDS_BE_BSWAP(vd, H, G, F, E, D, C, B, A); |
|||
// {_, _, w5, w4, _, _, w1, w0} <- vs2
|
|||
EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs2, |
|||
UNUSED _unused_w7, UNUSED _unused_w6, w5, w4, |
|||
UNUSED _unused_w3, UNUSED _unused_w2, w1, w0); |
|||
const uint32_t x0 = w0 ^ w4; // W'[0] in spec documentation.
|
|||
const uint32_t x1 = w1 ^ w5; // W'[1]
|
|||
|
|||
// Two rounds of compression.
|
|||
uint32_t ss1; |
|||
uint32_t ss2; |
|||
uint32_t tt1; |
|||
uint32_t tt2; |
|||
uint32_t j; |
|||
|
|||
j = 2 * round; |
|||
ss1 = ZVK_ROL32(ZVK_ROL32(A, 12) + E + ZVK_ROL32(ZVKSH_T(j), j % 32), 7); |
|||
ss2 = ss1 ^ ZVK_ROL32(A, 12); |
|||
tt1 = ZVKSH_FF(A, B, C, j) + D + ss2 + x0; |
|||
tt2 = ZVKSH_GG(E, F, G, j) + H + ss1 + w0; |
|||
D = C; |
|||
const uint32_t C1 = ZVK_ROL32(B, 9); |
|||
B = A; |
|||
const uint32_t A1 = tt1; |
|||
H = G; |
|||
const uint32_t G1 = ZVK_ROL32(F, 19); |
|||
F = E; |
|||
const uint32_t E1 = ZVKSH_P0(tt2); |
|||
|
|||
j = 2 * round + 1; |
|||
ss1 = ZVK_ROL32(ZVK_ROL32(A1, 12) + E1 + ZVK_ROL32(ZVKSH_T(j), j % 32), 7); |
|||
ss2 = ss1 ^ ZVK_ROL32(A1, 12); |
|||
tt1 = ZVKSH_FF(A1, B, C1, j) + D + ss2 + x1; |
|||
tt2 = ZVKSH_GG(E1, F, G1, j) + H + ss1 + w1; |
|||
D = C1; |
|||
const uint32_t C2 = ZVK_ROL32(B, 9); |
|||
B = A1; |
|||
const uint32_t A2 = tt1; |
|||
H = G1; |
|||
const uint32_t G2 = ZVK_ROL32(F, 19); |
|||
F = E1; |
|||
const uint32_t E2 = ZVKSH_P0(tt2); |
|||
|
|||
// Update the destination register.
|
|||
SET_EGU32x8_WORDS_BE_BSWAP(vd, G1, G2, E1, E2, C1, C2, A1, A2); |
|||
} |
|||
); |
|||
@ -0,0 +1,39 @@ |
|||
// vsm3me.vv vd, vs2, vs1
|
|||
|
|||
#include "zvk_ext_macros.h" |
|||
#include "zvksh_ext_macros.h" |
|||
|
|||
// Per the SM3 spec, the message expansion computes new words Wi as:
|
|||
// W[i] = ( P_1( W[i-16] xor W[i-9] xor ( W[i-3] <<< 15 ) )
|
|||
// xor ( W[i-13] <<< 7 )
|
|||
// xor W[i-6]))
|
|||
// Using arguments M16 = W[i-16], M9 = W[i-9], etc.,
|
|||
// where Mk stands for "W[i Minus k]", we define the "W function":
|
|||
#define ZVKSH_W(M16, M9, M3, M13, M6) \ |
|||
(ZVKSH_P1((M16) ^ (M9) ^ ZVK_ROL32((M3), 15)) ^ ZVK_ROL32((M13), 7) ^ (M6)) |
|||
|
|||
require_vsm3_constraints; |
|||
|
|||
VI_ZVK_VD_VS1_VS2_EGU32x8_NOVM_LOOP( |
|||
{}, |
|||
{ |
|||
// {w7, w6, w5, w4, w3, w2, w1, w0} <- vs1
|
|||
EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs1, w7, w6, w5, w4, w3, w2, w1, w0); |
|||
// {w15, w14, w13, w12, w11, w10, w9, w8} <- vs2
|
|||
EXTRACT_EGU32x8_WORDS_BE_BSWAP(vs2, w15, w14, w13, w12, w11, w10, w9, w8); |
|||
|
|||
// Arguments are W[i-16], W[i-9], W[i-13], W[i-6].
|
|||
// Note that some of the newly computed words are used in later invocations.
|
|||
const uint32_t w16 = ZVKSH_W(w0, w7, w13, w3, w10); |
|||
const uint32_t w17 = ZVKSH_W(w1, w8, w14, w4, w11); |
|||
const uint32_t w18 = ZVKSH_W(w2, w9, w15, w5, w12); |
|||
const uint32_t w19 = ZVKSH_W(w3, w10, w16, w6, w13); |
|||
const uint32_t w20 = ZVKSH_W(w4, w11, w17, w7, w14); |
|||
const uint32_t w21 = ZVKSH_W(w5, w12, w18, w8, w15); |
|||
const uint32_t w22 = ZVKSH_W(w6, w13, w19, w9, w16); |
|||
const uint32_t w23 = ZVKSH_W(w7, w14, w20, w10, w17); |
|||
|
|||
// Update the destination register.
|
|||
SET_EGU32x8_WORDS_BE_BSWAP(vd, w23, w22, w21, w20, w19, w18, w17, w16); |
|||
} |
|||
); |
|||
@ -0,0 +1,47 @@ |
|||
// Helper macros and functions to help implement instructions defined as part of
|
|||
// the RISC-V Zvksh extension (vectorized SM3).
|
|||
|
|||
#include "zvk_ext_macros.h" |
|||
|
|||
#ifndef RISCV_INSNS_ZVKSH_COMMON_H_ |
|||
#define RISCV_INSNS_ZVKSH_COMMON_H_ |
|||
|
|||
// Constraints common to all vsm3* instructions:
|
|||
// - Zvksh is enabled
|
|||
// - VSEW == 32
|
|||
// - EGW (256) <= LMUL * VLEN
|
|||
// - No overlap of vd and vs2.
|
|||
//
|
|||
// The constraint that vstart and vl are both EGS (8) aligned
|
|||
// is checked in the VI_ZVK_..._EGU32x8_..._LOOP macros.
|
|||
#define require_vsm3_constraints \ |
|||
do { \ |
|||
require_zvksh; \ |
|||
require(P.VU.vsew == 32); \ |
|||
require_egw_fits(256); \ |
|||
require(insn.rd() != insn.rs2()); \ |
|||
} while (false) |
|||
|
|||
#define FF1(X, Y, Z) ((X) ^ (Y) ^ (Z)) |
|||
#define FF2(X, Y, Z) (((X) & (Y)) | ((X) & (Z)) | ((Y) & (Z))) |
|||
|
|||
// Boolean function FF_j - section 4.3. of the IETF draft.
|
|||
#define ZVKSH_FF(X, Y, Z, J) (((J) <= 15) ? FF1(X, Y, Z) : FF2(X, Y, Z)) |
|||
|
|||
#define GG1(X, Y, Z) ((X) ^ (Y) ^ (Z)) |
|||
#define GG2(X, Y, Z) (((X) & (Y)) | ((~(X)) & (Z))) |
|||
|
|||
// Boolean function GG_j - section 4.3. of the IETF draft.
|
|||
#define ZVKSH_GG(X, Y, Z, J) (((J) <= 15) ? GG1(X, Y, Z) : GG2(X, Y, Z)) |
|||
|
|||
#define T1 0x79CC4519 |
|||
#define T2 0x7A879D8A |
|||
|
|||
// T_j constant - section 4.2. of the IETF draft.
|
|||
#define ZVKSH_T(J) (((J) <= 15) ? (T1) : (T2)) |
|||
|
|||
// Permutation functions P_0 and P_1 - section 4.4 of the IETF draft.
|
|||
#define ZVKSH_P0(X) ((X) ^ ZVK_ROL32((X), 9) ^ ZVK_ROL32((X), 17)) |
|||
#define ZVKSH_P1(X) ((X) ^ ZVK_ROL32((X), 15) ^ ZVK_ROL32((X), 23)) |
|||
|
|||
#endif // RISCV_INSNS_ZVKSH_COMMON_H
|
|||
Loading…
Reference in new issue