scalar-crypto: Initial spike support for v0.8.1 (#635)

Brief: - This commit adds spike support for the scalar cryptography extension. See the riscv-crypto repository (https://github.com/riscv/riscv-crypto) for more information on this extension. - It is based on the experimental patch which has so far been kept in the riscv-crypto repository. Now that scalar crypto is nearly at the "freeze" stage and entering opcode consistency review, it makes sense to start upstreaming our experimental version. - In terms of compiler support - we are using an experimental patch in the riscv-crypto repository at the moment, others are working on an upstream appropriate version. Details: - Add support for dedicated scalar crypto instructions. - Add very basic support for the entropy source (entropy_source.h). Much of the behaviour of this is implementation specific. This model aims to provide the bare minimum of functionality which can be used to quickly develop software. It uses /dev/urandom as its entropy source for now. - Scalar crypto is unique in that it _borrows_ instructions from the Bitmanipulation extension. This is currently _not_ encoded in the patch, as I didn't want to damage anything in Bitmanip which is currently under review. However, I've added a macro in riscv/decode.h called "require_either_extension(A,B)", which allows instructions to be valid opcodes iff they are in one or both extensions. On branch scalar-crypto Changes to be committed: modified: README.md modified: riscv/decode.h modified: riscv/encoding.h new file: riscv/entropy_source.h new file: riscv/insns/aes64im.h new file: riscv/insns/aes64ks1i.h new file: riscv/insns/aes64ks2.h new file: riscv/insns/aes_common.h new file: riscv/insns/aesds.h new file: riscv/insns/aesdsm.h new file: riscv/insns/aeses.h new file: riscv/insns/aesesm.h new file: riscv/insns/sha256sig0.h new file: riscv/insns/sha256sig1.h new file: riscv/insns/sha256sum0.h new file: riscv/insns/sha256sum1.h new file: riscv/insns/sha512sig0.h new file: riscv/insns/sha512sig0h.h new file: riscv/insns/sha512sig0l.h new file: riscv/insns/sha512sig1.h new file: riscv/insns/sha512sig1h.h new file: riscv/insns/sha512sig1l.h new file: riscv/insns/sha512sum0.h new file: riscv/insns/sha512sum0r.h new file: riscv/insns/sha512sum1.h new file: riscv/insns/sha512sum1r.h new file: riscv/insns/sm3p0.h new file: riscv/insns/sm3p1.h new file: riscv/insns/sm4_common.h new file: riscv/insns/sm4ed.h new file: riscv/insns/sm4ks.h modified: riscv/processor.cc modified: riscv/processor.h modified: riscv/riscv.mk.in
5 years ago · d6238d9945
34 changed files with 984 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -19,6 +19,7 @@ Spike supports the following RISC-V ISA features:
  - Q extension, v2.2
  - C extension, v2.0
  - B extension, v0.92
+  - K extension, v0.8.1 ([Scalar Cryptography](https://github.com/riscv/riscv-crypto))
  - V extension, v0.9, w/ Zvlsseg/Zvamo/Zvqmac, w/o Zvediv, (_requires a 64-bit host_)
  - Bi-endianness
  - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent)
--- a/riscv/decode.h
+++ b/riscv/decode.h
@ -92,6 +92,8 @@ public:
  uint64_t rm() { return x(12, 3); }
  uint64_t csr() { return x(20, 12); }
  uint64_t iorw() { return x(20, 8); }
+  uint64_t bs  () {return x(30,2);} // Crypto ISE - SM4/AES32 byte select.
+  uint64_t rcon() {return x(20,4);} // Crypto ISE - AES64 round const.

  int64_t rvc_imm() { return x(2, 5) + (xs(12, 1) << 5); }
  int64_t rvc_zimm() { return x(2, 5) + (x(12, 1) << 5); }
@ -237,6 +239,7 @@ private:
 #define require_rv64 require(xlen == 64)
 #define require_rv32 require(xlen == 32)
 #define require_extension(s) require(p->supports_extension(s))
+#define require_either_extension(A,B) require(p->supports_extension(A) || p->supports_extension(B));
 #define require_impl(s) require(p->supports_impl(s))
 #define require_fp require((STATE.mstatus & MSTATUS_FS) != 0)
 #define require_accelerator require((STATE.mstatus & MSTATUS_XS) != 0)
--- a/riscv/encoding.h
+++ b/riscv/encoding.h
@ -928,6 +928,62 @@
 #define MASK_BFPW  0xfe00707f
 #define MATCH_XPERM_W 0x28000033
 #define MASK_XPERM_W  0xfe00707f
+// Crypto ISE Begin
+#define MASK_POLLENTROPY  0xfffff07f
+#define MATCH_POLLENTROPY 0xf1500073
+#define MASK_GETNOISE  0xfffff07f
+#define MATCH_GETNOISE 0x7a900073
+#define MASK_SM4ED  0x3e007fff
+#define MATCH_SM4ED 0x30000033
+#define MASK_SM4KS  0x3e007fff
+#define MATCH_SM4KS 0x34000033
+#define MASK_SM3P0  0xfff0707f
+#define MATCH_SM3P0 0x10801013
+#define MASK_SM3P1  0xfff0707f
+#define MATCH_SM3P1 0x10901013
+#define MASK_SHA256SUM0  0xfff0707f
+#define MATCH_SHA256SUM0 0x10001013
+#define MASK_SHA256SUM1  0xfff0707f
+#define MATCH_SHA256SUM1 0x10101013
+#define MASK_SHA256SIG0  0xfff0707f
+#define MATCH_SHA256SIG0 0x10201013
+#define MASK_SHA256SIG1  0xfff0707f
+#define MATCH_SHA256SIG1 0x10301013
+#define MASK_SHA512SUM0R  0xfe00707f
+#define MATCH_SHA512SUM0R 0x50000033
+#define MASK_SHA512SUM1R  0xfe00707f
+#define MATCH_SHA512SUM1R 0x52000033
+#define MASK_SHA512SIG0L  0xfe00707f
+#define MATCH_SHA512SIG0L 0x54000033
+#define MASK_SHA512SIG0H  0xfe00707f
+#define MATCH_SHA512SIG0H 0x5c000033
+#define MASK_SHA512SIG1L  0xfe00707f
+#define MATCH_SHA512SIG1L 0x56000033
+#define MASK_SHA512SIG1H  0xfe00707f
+#define MATCH_SHA512SIG1H 0x5e000033
+#define MASK_AES64KS1I  0xff00707f
+#define MATCH_AES64KS1I 0x31001013
+#define MASK_AES64IM  0xfff0707f
+#define MATCH_AES64IM 0x30001013
+#define MASK_AES64KS2  0xfe00707f
+#define MATCH_AES64KS2 0x7e000033
+#define MASK_AESESM  0x3e00707f
+#define MATCH_AESESM 0x36000033
+#define MASK_AESES  0x3e00707f
+#define MATCH_AESES 0x32000033
+#define MASK_AESDSM  0x3e00707f
+#define MATCH_AESDSM 0x3e000033
+#define MASK_AESDS  0x3e00707f
+#define MATCH_AESDS 0x3a000033
+#define MASK_SHA512SUM0  0xfff0707f
+#define MATCH_SHA512SUM0 0x10401013
+#define MASK_SHA512SUM1  0xfff0707f
+#define MATCH_SHA512SUM1 0x10501013
+#define MASK_SHA512SIG0  0xfff0707f
+#define MATCH_SHA512SIG0 0x10601013
+#define MASK_SHA512SIG1  0xfff0707f
+#define MATCH_SHA512SIG1 0x10701013
+// Crypto ISE End
 #define MATCH_ECALL 0x73
 #define MASK_ECALL  0xffffffff
 #define MATCH_EBREAK 0x100073
@ -2276,6 +2332,8 @@
 #define CSR_MARCHID 0xf12
 #define CSR_MIMPID 0xf13
 #define CSR_MHARTID 0xf14
+#define CSR_MENTROPY 0xf15
+#define CSR_MNOISE 0x7a9
 #define CSR_HTIMEDELTAH 0x615
 #define CSR_CYCLEH 0xc80
 #define CSR_TIMEH 0xc81
@ -2683,6 +2741,33 @@ DECLARE_INSN(packw, MATCH_PACKW, MASK_PACKW)
 DECLARE_INSN(packuw, MATCH_PACKUW, MASK_PACKUW)
 DECLARE_INSN(bfpw, MATCH_BFPW, MASK_BFPW)
 DECLARE_INSN(xperm_w, MATCH_XPERM_W, MASK_XPERM_W)
+DECLARE_INSN(sm4ed, MATCH_SM4ED, MASK_SM4ED)
+DECLARE_INSN(sm4ks, MATCH_SM4KS, MASK_SM4KS)
+DECLARE_INSN(aes64ks1i, MATCH_AES64KS1I, MASK_AES64KS1I)
+DECLARE_INSN(aes64ks2, MATCH_AES64KS2, MASK_AES64KS2)
+DECLARE_INSN(aes64im, MATCH_AES64IM, MASK_AES64IM)
+DECLARE_INSN(aesesm, MATCH_AESESM, MASK_AESESM)
+DECLARE_INSN(aeses, MATCH_AESES, MASK_AESES)
+DECLARE_INSN(aesdsm, MATCH_AESDSM, MASK_AESDSM)
+DECLARE_INSN(aesds, MATCH_AESDS, MASK_AESDS)
+DECLARE_INSN(sha256sig0, MATCH_SHA256SIG0, MASK_SHA256SIG0)
+DECLARE_INSN(sha256sig1, MATCH_SHA256SIG1, MASK_SHA256SIG1)
+DECLARE_INSN(sha256sum0, MATCH_SHA256SUM0, MASK_SHA256SUM0)
+DECLARE_INSN(sha256sum1, MATCH_SHA256SUM1, MASK_SHA256SUM1)
+DECLARE_INSN(sm3p0, MATCH_SM3P0, MASK_SM3P0)
+DECLARE_INSN(sm3p1, MATCH_SM3P1, MASK_SM3P1)
+DECLARE_INSN(sha512sig0l, MATCH_SHA512SIG0L, MASK_SHA512SIG0L)
+DECLARE_INSN(sha512sig0h, MATCH_SHA512SIG0H, MASK_SHA512SIG0H)
+DECLARE_INSN(sha512sig1l, MATCH_SHA512SIG1L, MASK_SHA512SIG1L)
+DECLARE_INSN(sha512sig1h, MATCH_SHA512SIG1H, MASK_SHA512SIG1H)
+DECLARE_INSN(sha512sum0r, MATCH_SHA512SUM0R, MASK_SHA512SUM0R)
+DECLARE_INSN(sha512sum1r, MATCH_SHA512SUM1R, MASK_SHA512SUM1R)
+DECLARE_INSN(sha512sig0, MATCH_SHA512SIG0, MASK_SHA512SIG0)
+DECLARE_INSN(sha512sig1, MATCH_SHA512SIG1, MASK_SHA512SIG1)
+DECLARE_INSN(sha512sum0, MATCH_SHA512SUM0, MASK_SHA512SUM0)
+DECLARE_INSN(sha512sum1, MATCH_SHA512SUM1, MASK_SHA512SUM1)
+DECLARE_INSN(pollentropy, MATCH_POLLENTROPY, MASK_POLLENTROPY)
+DECLARE_INSN(getnoise, MATCH_GETNOISE, MASK_GETNOISE)
 DECLARE_INSN(ecall, MATCH_ECALL, MASK_ECALL)
 DECLARE_INSN(ebreak, MATCH_EBREAK, MASK_EBREAK)
 DECLARE_INSN(uret, MATCH_URET, MASK_URET)
@ -3464,6 +3549,8 @@ DECLARE_CSR(mvendorid, CSR_MVENDORID)
 DECLARE_CSR(marchid, CSR_MARCHID)
 DECLARE_CSR(mimpid, CSR_MIMPID)
 DECLARE_CSR(mhartid, CSR_MHARTID)
+DECLARE_CSR(mentropy, CSR_MENTROPY)
+DECLARE_CSR(mnoise, CSR_MNOISE)
 DECLARE_CSR(htimedeltah, CSR_HTIMEDELTAH)
 DECLARE_CSR(cycleh, CSR_CYCLEH)
 DECLARE_CSR(timeh, CSR_TIMEH)
--- a/riscv/entropy_source.h
+++ b/riscv/entropy_source.h
@ -0,0 +1,146 @@
+
+#include <fstream>
+#include <iostream>
+
+#include "internals.h"
+
+//
+// Used to model the cryptography extension entropy source.
+// See Section 4 of the Scalar Cryptography Extension specificaiton.
+class entropy_source {
+
+public:
+
+  // Valid return codes for OPST bits [31:30] when reading mentropy.
+  static const uint32_t OPST_BIST = 0x0 << 30;
+  static const uint32_t OPST_ES16 = 0x1 << 30;
+  static const uint32_t OPST_WAIT = 0x2 << 30;
+  static const uint32_t OPST_DEAD = 0x3 << 30;
+
+  //
+  // Other system events
+  // ------------------------------------------------------------
+
+  void reset() {
+    // Does nothing for now. In the future, can be used to model things
+    // like initial BIST states.
+  }
+
+  //
+  // mentropy register
+  // ------------------------------------------------------------
+
+  void set_mentropy(reg_t val) {
+    // Always ignore writes to mentropy.
+    // This CSR *must never* accept write values, it is strictly read only.
+  }
+
+
+  //
+  // The format of mentropy is described in Table 3 / Section 4.1 of
+  // the scalar cryptography specification.
+  reg_t get_mentropy() {
+
+    uint32_t result  = 0;
+
+    // Currently, always return ES16 (i.e. good randomness) unless in
+    // noise capture mode.  In the future, we can more realistically model
+    // things like WAIT states, BIST warm up and maybe scriptable entry
+    // into the DEAD state, but until then, this is the bare minimum.
+    uint32_t return_status = noise_capture_mode ? OPST_BIST : OPST_ES16;
+
+    if(return_status == OPST_ES16) {
+
+        // Add some sampled entropy into the low 16 bits
+        uint16_t seed  = this -> get_two_random_bytes();
+        result        |= seed;
+
+    } else if(return_status == OPST_BIST) {
+
+        // Do nothing.
+
+    } else if(return_status == OPST_WAIT) {
+
+        // Do nothing.
+
+    } else if(return_status == OPST_DEAD) {
+
+        // Do nothing. Stay dead.
+
+    } else {
+
+        // Unreachable.
+
+    }
+
+    // Note that if we get here any return_status is anything other than
+    // OPST_ES16, then the low 16-bits of the return value must be zero.
+
+    result |= return_status;
+
+    // Result is zero-extended on RV64.
+    return (reg_t)result;
+  }
+
+  //
+  // mnoise register
+  // ------------------------------------------------------------
+
+
+  void set_mnoise(reg_t val) {
+      // Almost all of the behaviour for mnoise is vendor specific,
+      // except for bit 31.
+      int new_noisemode = (val >> 31) & 0x1;
+      noise_capture_mode = new_noisemode == 1;
+  }
+
+
+  reg_t get_mnoise() {
+      reg_t to_return = 0;
+
+      if(this -> noise_capture_mode) {
+          // Set bit 31 indicating we are in noise capture mode.
+          to_return |= 0x1 << 31;
+      }
+
+      return to_return;
+  }
+
+  //
+  // Utility / support variables and functions.
+  // ------------------------------------------------------------
+
+  // The ES is in noise capture mode?
+  // If so, then get_mentropy must always return OPST_BIST.
+  bool noise_capture_mode = false;
+
+  // The file to read entropy from.
+  std::string randomness_source = "/dev/urandom";
+
+  // Read two random bytes from the entropy source file.
+  uint16_t get_two_random_bytes() {
+
+      std::ifstream fh(this -> randomness_source, std::ios::binary);
+
+      if(fh.is_open()) {
+
+          uint16_t random_bytes;
+
+          fh.read((char*)(&random_bytes), 2);
+
+          fh.close();
+
+          return random_bytes;
+
+      } else {
+
+          fprintf(stderr, "Could not open randomness source file:\n\t");
+          fprintf(stderr, "%s", randomness_source.c_str());
+          abort();
+
+      }
+
+  }
+
+};
+
--- a/riscv/insns/aes64im.h
+++ b/riscv/insns/aes64im.h
@ -0,0 +1,16 @@
+
+#include "aes_common.h"
+
+require_rv64;
+require_extension('K');
+
+uint32_t col_0 = RS1 & 0xFFFFFFFF;
+uint32_t col_1 = RS1 >> 32       ;
+
+         col_0 = AES_INVMIXCOLUMN(col_0);
+         col_1 = AES_INVMIXCOLUMN(col_1);
+
+uint64_t result= ((uint64_t)col_1 << 32) | col_0;
+
+WRITE_RD(result);
+
--- a/riscv/insns/aes64ks1i.h
+++ b/riscv/insns/aes64ks1i.h
@ -0,0 +1,38 @@
+
+#include "aes_common.h"
+
+require_rv64;
+require_extension('K');
+
+uint8_t     round_consts [10] = {
+    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
+};
+
+uint8_t     enc_rcon          = insn.rcon() ;
+
+if(enc_rcon > 0xA) {
+    // Invalid opcode.
+    throw trap_illegal_instruction(0);
+}
+
+uint32_t    temp              = (RS1 >> 32) & 0xFFFFFFFF  ;
+uint8_t     rcon              = 0            ;
+uint64_t    result                           ;
+
+if(enc_rcon != 0xA) {
+    temp    = (temp >> 8) | (temp << 24); // Rotate left by 8
+    rcon    = round_consts[enc_rcon];
+}
+
+temp        =
+    ((uint32_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) |
+    ((uint32_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) |
+    ((uint32_t)AES_ENC_SBOX[(temp >>  8) & 0xFF] <<  8) |
+    ((uint32_t)AES_ENC_SBOX[(temp >>  0) & 0xFF] <<  0) ;
+
+temp       ^= rcon;
+
+result      = ((uint64_t)temp << 32) | temp;
+
+WRITE_RD(result);
+
--- a/riscv/insns/aes64ks2.h
+++ b/riscv/insns/aes64ks2.h
@ -0,0 +1,16 @@
+
+#include "aes_common.h"
+
+require_rv64;
+require_extension('K');
+
+uint32_t    rs1_hi  =  RS1 >> 32;
+uint32_t    rs2_lo  =  RS2      ;
+uint32_t    rs2_hi  =  RS2 >> 32;
+
+uint32_t    r_lo    = (rs1_hi ^ rs2_lo         ) ;
+uint32_t    r_hi    = (rs1_hi ^ rs2_lo ^ rs2_hi) ;
+uint64_t    result  =  ((uint64_t)r_hi << 32) | r_lo ;
+
+WRITE_RD(result);
+
--- a/riscv/insns/aes_common.h
+++ b/riscv/insns/aes_common.h
@ -0,0 +1,156 @@
+
+uint8_t AES_ENC_SBOX[]= {
+  0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+  0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+  0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+  0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+  0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+  0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+  0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+  0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+  0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+  0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+  0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+  0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+  0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+  0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+  0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+  0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+  0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+  0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+  0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+  0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+  0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+  0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+  0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+  0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+  0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+  0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+  0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+  0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+  0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+  0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+  0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+  0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
+};
+
+uint8_t AES_DEC_SBOX[] = {
+  0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+  0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+  0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+  0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+  0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+  0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+  0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+  0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+  0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+  0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+  0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+  0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+  0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+  0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+  0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+  0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+  0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+  0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+  0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+  0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+  0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+  0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+  0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+  0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+  0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+  0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+  0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+  0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+  0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+  0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+  0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+  0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D
+};
+
+#define AES_UNPACK_BYTES(b0,b1,b2,b3) \
+    uint8_t  b0 = (RS1 >>  0) & 0xFF; \
+    uint8_t  b1 = (RS2 >>  8) & 0xFF; \
+    uint8_t  b2 = (RS1 >> 16) & 0xFF; \
+    uint8_t  b3 = (RS2 >> 24) & 0xFF; \
+
+#define AES_PACK_BYTES(b0,b1,b2,b3) ( \
+    (uint32_t)b0 <<  0  | \
+    (uint32_t)b1 <<  8  | \
+    (uint32_t)b2 << 16  | \
+    (uint32_t)b3 << 24  )
+
+#define AES_SBOX(b0, b1, b2, b3) \
+    b0 = AES_ENC_SBOX[b0]; \
+    b1 = AES_ENC_SBOX[b1]; \
+    b2 = AES_ENC_SBOX[b2]; \
+    b3 = AES_ENC_SBOX[b3]; \
+
+#define AES_RSBOX(b0, b1, b2, b3) \
+    b0 = AES_DEC_SBOX[b0]; \
+    b1 = AES_DEC_SBOX[b1]; \
+    b2 = AES_DEC_SBOX[b2]; \
+    b3 = AES_DEC_SBOX[b3]; \
+
+#define AES_XTIME(a) \
+    ((a << 1) ^ ((a&0x80) ? 0x1b : 0))
+
+#define AES_GFMUL(a,b) (( \
+    ( ( (b) & 0x1 ) ?                              (a)   : 0 ) ^ \
+    ( ( (b) & 0x2 ) ?                     AES_XTIME(a)   : 0 ) ^ \
+    ( ( (b) & 0x4 ) ?           AES_XTIME(AES_XTIME(a))  : 0 ) ^ \
+    ( ( (b) & 0x8 ) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0 ) )&0xFF)
+
+#define BY(X,I) ((X >> (8*I)) & 0xFF)
+
+#define AES_SHIFROWS_LO(RS1,RS2) ( \
+    (((RS1 >> 24) & 0xFF) << 56) | \
+    (((RS2 >> 48) & 0xFF) << 48) | \
+    (((RS2 >>  8) & 0xFF) << 40) | \
+    (((RS1 >> 32) & 0xFF) << 32) | \
+    (((RS2 >> 56) & 0xFF) << 24) | \
+    (((RS2 >> 16) & 0xFF) << 16) | \
+    (((RS1 >> 40) & 0xFF) <<  8) | \
+    (((RS1 >>  0) & 0xFF) <<  0) ) 
+
+#define AES_INVSHIFROWS_LO(RS1,RS2) ( \
+    (((RS2 >> 24) & 0xFF) << 56) | \
+    (((RS2 >> 48) & 0xFF) << 48) | \
+    (((RS1 >>  8) & 0xFF) << 40) | \
+    (((RS1 >> 32) & 0xFF) << 32) | \
+    (((RS1 >> 56) & 0xFF) << 24) | \
+    (((RS2 >> 16) & 0xFF) << 16) | \
+    (((RS2 >> 40) & 0xFF) <<  8) | \
+    (((RS1 >>  0) & 0xFF) <<  0) ) 
+
+
+#define AES_MIXBYTE(COL,B0,B1,B2,B3) ( \
+              BY(COL,B3)     ^ \
+              BY(COL,B2)     ^ \
+    AES_GFMUL(BY(COL,B1), 3) ^ \
+    AES_GFMUL(BY(COL,B0), 2)   \
+)
+
+#define AES_MIXCOLUMN(COL) ( \
+    AES_MIXBYTE(COL,3,0,1,2) << 24 | \
+    AES_MIXBYTE(COL,2,3,0,1) << 16 | \
+    AES_MIXBYTE(COL,1,2,3,0) <<  8 | \
+    AES_MIXBYTE(COL,0,1,2,3) <<  0   \
+)
+
+
+#define AES_INVMIXBYTE(COL,B0,B1,B2,B3) ( \
+    AES_GFMUL(BY(COL,B3),0x9) ^ \
+    AES_GFMUL(BY(COL,B2),0xd) ^ \
+    AES_GFMUL(BY(COL,B1),0xb) ^ \
+    AES_GFMUL(BY(COL,B0),0xe)   \
+)
+
+#define AES_INVMIXCOLUMN(COL) ( \
+    AES_INVMIXBYTE(COL,3,0,1,2) << 24 | \
+    AES_INVMIXBYTE(COL,2,3,0,1) << 16 | \
+    AES_INVMIXBYTE(COL,1,2,3,0) <<  8 | \
+    AES_INVMIXBYTE(COL,0,1,2,3) <<  0   \
+)
+
--- a/riscv/insns/aesds.h
+++ b/riscv/insns/aesds.h
@ -0,0 +1,47 @@
+
+#include "aes_common.h"
+
+// The encodings for the RV32 and RV64 AES instructions overlap, as they
+// are mutually exclusive. They have rather different functionality.
+
+if(xlen == 32) {
+    // Execute the RV32 aes32dsi instruction
+
+    require_rv32;
+    require_extension('K');
+    require(RD == 0); // Additional decoding required for RV32
+
+    uint8_t     bs = insn.bs();
+
+    uint8_t     t0 = RS2 >> (8*bs);
+    uint8_t      x = AES_DEC_SBOX[t0];
+    uint32_t     u = x;
+
+    u = (u << (8*bs)) | (u >> (32-8*bs));
+
+    uint64_t    rd = insn.rs1(); // RD sourced from RS1 field.
+    WRITE_REG(rd, u ^ RS1);
+
+} else {
+    // Execute the RV64 aes64ds instruction
+
+    require(insn.bs() == 0);
+    require_rv64;
+    require_extension('K');
+
+    uint64_t temp = AES_INVSHIFROWS_LO(RS1,RS2);
+
+             temp = (
+        ((uint64_t)AES_DEC_SBOX[(temp >>  0) & 0xFF] <<  0) |
+        ((uint64_t)AES_DEC_SBOX[(temp >>  8) & 0xFF] <<  8) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 16) & 0xFF] << 16) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 24) & 0xFF] << 24) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 32) & 0xFF] << 32) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 40) & 0xFF] << 40) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 48) & 0xFF] << 48) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 56) & 0xFF] << 56) 
+    );
+
+    WRITE_RD(temp);
+
+}
--- a/riscv/insns/aesdsm.h
+++ b/riscv/insns/aesdsm.h
@ -0,0 +1,60 @@
+
+#include "aes_common.h"
+
+// The encodings for the RV32 and RV64 AES instructions overlap, as they
+// are mutually exclusive. They have rather different functionality.
+
+if(xlen == 32) {
+    // Execute the RV32 aes32dsmi instruction
+
+    require_rv32;
+    require_extension('K');
+    require(RD == 0); // Additional decoding required for RV32
+
+    uint8_t     bs = insn.bs();
+
+    uint8_t     t0 = RS2 >> (8*bs);
+    uint8_t      x = AES_DEC_SBOX[t0];
+    uint32_t     u ;
+
+    u = (AES_GFMUL(x,0xb) << 24) |
+        (AES_GFMUL(x,0xd) << 16) |
+        (AES_GFMUL(x,0x9) <<  8) |
+        (AES_GFMUL(x,0xe) <<  0) ;
+
+    u = (u << (8*bs)) | (u >> (32-8*bs));
+
+    uint64_t    rd = insn.rs1(); // RD sourced from RS1 field.
+    WRITE_REG(rd, u ^ RS1);
+
+} else {
+    // Execute the RV64 aes64dsm instruction
+
+    require(insn.bs() == 0);
+    require_rv64;
+    require_extension('K');
+
+    uint64_t temp = AES_INVSHIFROWS_LO(RS1,RS2);
+
+             temp = (
+        ((uint64_t)AES_DEC_SBOX[(temp >>  0) & 0xFF] <<  0) |
+        ((uint64_t)AES_DEC_SBOX[(temp >>  8) & 0xFF] <<  8) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 16) & 0xFF] << 16) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 24) & 0xFF] << 24) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 32) & 0xFF] << 32) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 40) & 0xFF] << 40) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 48) & 0xFF] << 48) |
+        ((uint64_t)AES_DEC_SBOX[(temp >> 56) & 0xFF] << 56) 
+    );
+
+    uint32_t col_0 = temp & 0xFFFFFFFF;
+    uint32_t col_1 = temp >> 32       ;
+
+             col_0 = AES_INVMIXCOLUMN(col_0);
+             col_1 = AES_INVMIXCOLUMN(col_1);
+
+    uint64_t result= ((uint64_t)col_1 << 32) | col_0;
+
+    WRITE_RD(result);
+
+}
--- a/riscv/insns/aeses.h
+++ b/riscv/insns/aeses.h
@ -0,0 +1,47 @@
+
+#include "aes_common.h"
+
+// The encodings for the RV32 and RV64 AES instructions overlap, as they
+// are mutually exclusive. They have rather different functionality.
+
+if(xlen == 32) {
+    // Execute the RV32 aes32esi instruction
+
+    require_rv32;
+    require_extension('K');
+    require(RD == 0); // Additional decoding required for RV32
+
+    uint8_t     bs = insn.bs();
+
+    uint8_t     t0 = RS2 >> (8*bs);
+    uint8_t      x = AES_ENC_SBOX[t0];
+    uint32_t     u = x;
+
+    u = (u << (8*bs)) | (u >> (32-8*bs));
+
+    uint64_t    rd = insn.rs1(); // RD sourced from RS1 field.
+    WRITE_REG(rd, u ^ RS1);
+
+} else {
+    // Execute the RV64 aes64es instruction
+    
+    require(insn.bs() == 0);
+    require_rv64;
+    require_extension('K');
+
+    uint64_t temp = AES_SHIFROWS_LO(RS1,RS2);
+
+             temp = (
+        ((uint64_t)AES_ENC_SBOX[(temp >>  0) & 0xFF] <<  0) |
+        ((uint64_t)AES_ENC_SBOX[(temp >>  8) & 0xFF] <<  8) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 32) & 0xFF] << 32) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 40) & 0xFF] << 40) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 48) & 0xFF] << 48) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 56) & 0xFF] << 56) 
+    );
+
+    WRITE_RD(temp);
+
+}
--- a/riscv/insns/aesesm.h
+++ b/riscv/insns/aesesm.h
@ -0,0 +1,61 @@
+
+#include "aes_common.h"
+
+// The encodings for the RV32 and RV64 AES instructions overlap, as they
+// are mutually exclusive. They have rather different functionality.
+
+if(xlen == 32) {
+    // Execute the RV32 aes32esmi instruction
+
+    require_rv32;
+    require_extension('K');
+    require(RD == 0); // Additional decoding required for RV32
+
+    uint8_t     bs = insn.bs();
+
+    uint8_t     t0 = RS2 >> (8*bs);
+    uint8_t      x = AES_ENC_SBOX[t0];
+    uint32_t     u ;
+
+    u = (AES_GFMUL(x,3) << 24) |
+        (          x    << 16) |
+        (          x    <<  8) |
+        (AES_GFMUL(x,2) <<  0) ;
+
+    u = (u << (8*bs)) | (u >> (32-8*bs));
+
+    uint64_t    rd = insn.rs1(); // RD sourced from RS1 field.
+    WRITE_REG(rd, u ^ RS1);
+
+} else {
+    // Execute the RV64 aes64esm instruction
+
+    require(insn.bs() == 0);
+    require_rv64;
+    require_extension('K');
+
+    uint64_t temp = AES_SHIFROWS_LO(RS1,RS2);
+
+             temp = (
+        ((uint64_t)AES_ENC_SBOX[(temp >>  0) & 0xFF] <<  0) |
+        ((uint64_t)AES_ENC_SBOX[(temp >>  8) & 0xFF] <<  8) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 16) & 0xFF] << 16) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 24) & 0xFF] << 24) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 32) & 0xFF] << 32) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 40) & 0xFF] << 40) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 48) & 0xFF] << 48) |
+        ((uint64_t)AES_ENC_SBOX[(temp >> 56) & 0xFF] << 56) 
+    );
+
+    uint32_t col_0 = temp & 0xFFFFFFFF;
+    uint32_t col_1 = temp >> 32       ;
+
+             col_0 = AES_MIXCOLUMN(col_0);
+             col_1 = AES_MIXCOLUMN(col_1);
+
+    uint64_t result= ((uint64_t)col_1 << 32) | col_0;
+
+    WRITE_RD(result);
+
+}
+
--- a/riscv/insns/sha256sig0.h
+++ b/riscv/insns/sha256sig0.h
@ -0,0 +1,13 @@
+
+require_extension('K');
+
+#define ROR32(a,amt) ((a << (-amt & (32-1))) | (a >> (amt & (32-1))))
+
+uint32_t a = RS1;
+
+WRITE_RD(
+    ROR32(a, 7) ^ ROR32(a,18) ^ (a >> 3)
+);
+
+#undef ROR32
+
--- a/riscv/insns/sha256sig1.h
+++ b/riscv/insns/sha256sig1.h
@ -0,0 +1,13 @@
+
+require_extension('K');
+
+#define ROR32(a,amt) ((a << (-amt & (32-1))) | (a >> (amt & (32-1))))
+
+uint32_t a = RS1;
+
+WRITE_RD(
+    ROR32(a, 17) ^ ROR32(a,19) ^ (a >> 10)
+);
+
+#undef ROR32
+
--- a/riscv/insns/sha256sum0.h
+++ b/riscv/insns/sha256sum0.h
@ -0,0 +1,13 @@
+
+require_extension('K');
+
+#define ROR32(a,amt) ((a << (-amt & (32-1))) | (a >> (amt & (32-1))))
+
+uint32_t a = RS1;
+
+WRITE_RD(
+    ROR32(a, 2) ^ ROR32(a,13) ^ ROR32(a, 22)
+);
+
+#undef ROR32
+
--- a/riscv/insns/sha256sum1.h
+++ b/riscv/insns/sha256sum1.h
@ -0,0 +1,13 @@
+
+require_extension('K');
+
+#define ROR32(a,amt) ((a << (-amt & (32-1))) | (a >> (amt & (32-1))))
+
+uint32_t a = RS1;
+
+WRITE_RD(
+    ROR32(a, 6) ^ ROR32(a,11) ^ ROR32(a, 25)
+);
+
+#undef ROR32
+
--- a/riscv/insns/sha512sig0.h
+++ b/riscv/insns/sha512sig0.h
@ -0,0 +1,13 @@
+require_rv64;
+require_extension('K');
+
+#define ROR64(a,amt) ((a << (-amt & (64-1))) | (a >> (amt & (64-1))))
+
+uint64_t a = RS1;
+
+WRITE_RD(
+    ROR64(a,  1) ^ ROR64(a, 8) ^ (a >>  7)
+);
+
+#undef ROR64
+
--- a/riscv/insns/sha512sig0h.h
+++ b/riscv/insns/sha512sig0h.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 >>  1) ^ ((uint32_t)RS1 >>  7) ^ ((uint32_t)RS1 >>  8) ^
+    ((uint32_t)RS2 << 31) ^                         ((uint32_t)RS2 << 24);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sha512sig0l.h
+++ b/riscv/insns/sha512sig0l.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 >>  1) ^ ((uint32_t)RS1 >>  7) ^ ((uint32_t)RS1 >>  8) ^
+    ((uint32_t)RS2 << 31) ^ ((uint32_t)RS2 << 25) ^ ((uint32_t)RS2 << 24);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sha512sig1.h
+++ b/riscv/insns/sha512sig1.h
@ -0,0 +1,13 @@
+require_rv64;
+require_extension('K');
+
+#define ROR64(a,amt) ((a << (-amt & (64-1))) | (a >> (amt & (64-1))))
+
+uint64_t a = RS1;
+
+WRITE_RD(
+    ROR64(a, 19) ^ ROR64(a,61) ^ (a >>  6)
+);
+
+#undef ROR64
+
--- a/riscv/insns/sha512sig1h.h
+++ b/riscv/insns/sha512sig1h.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 <<  3) ^ ((uint32_t)RS1 >>  6) ^ ((uint32_t)RS1 >> 19) ^
+    ((uint32_t)RS2 >> 29) ^                         ((uint32_t)RS2 << 13);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sha512sig1l.h
+++ b/riscv/insns/sha512sig1l.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 <<  3) ^ ((uint32_t)RS1 >>  6) ^ ((uint32_t)RS1 >> 19) ^
+    ((uint32_t)RS2 >> 29) ^ ((uint32_t)RS2 << 26) ^ ((uint32_t)RS2 << 13);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sha512sum0.h
+++ b/riscv/insns/sha512sum0.h
@ -0,0 +1,13 @@
+require_rv64;
+require_extension('K');
+
+#define ROR64(a,amt) ((a << (-amt & (64-1))) | (a >> (amt & (64-1))))
+
+uint64_t a = RS1;
+
+WRITE_RD(
+    ROR64(a, 28) ^ ROR64(a,34) ^ ROR64(a,39)
+);
+
+#undef ROR64
+
--- a/riscv/insns/sha512sum0r.h
+++ b/riscv/insns/sha512sum0r.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 << 25) ^ ((uint32_t)RS1 << 30) ^ ((uint32_t)RS1 >> 28) ^
+    ((uint32_t)RS2 >>  7) ^ ((uint32_t)RS2 >>  2) ^ ((uint32_t)RS2 <<  4);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sha512sum1.h
+++ b/riscv/insns/sha512sum1.h
@ -0,0 +1,13 @@
+require_rv64;
+require_extension('K');
+
+#define ROR64(a,amt) ((a << (-amt & (64-1))) | (a >> (amt & (64-1))))
+
+uint64_t a = RS1;
+
+WRITE_RD(
+    ROR64(a, 14) ^ ROR64(a, 18) ^ ROR64(a, 41)
+);
+
+#undef ROR64
+
--- a/riscv/insns/sha512sum1r.h
+++ b/riscv/insns/sha512sum1r.h
@ -0,0 +1,9 @@
+
+require_rv32;
+require_extension('K');
+
+uint32_t result =
+    ((uint32_t)RS1 << 23) ^ ((uint32_t)RS1 >> 14) ^ ((uint32_t)RS1 >> 18) ^
+    ((uint32_t)RS2 >>  9) ^ ((uint32_t)RS2 << 18) ^ ((uint32_t)RS2 << 14);
+
+WRITE_RD(zext_xlen(result));
--- a/riscv/insns/sm3p0.h
+++ b/riscv/insns/sm3p0.h
@ -0,0 +1,14 @@
+
+require_extension('K');
+
+#define ROL32(a,amt) ((a >> (-amt & (32-1))) | (a << (amt & (32-1))))
+
+uint32_t src    = RS1;
+uint32_t result = src ^ ROL32(src, 9) ^ ROL32(src, 17);
+
+WRITE_RD(
+    zext_xlen(result)
+);
+
+#undef ROL32
+
--- a/riscv/insns/sm3p1.h
+++ b/riscv/insns/sm3p1.h
@ -0,0 +1,14 @@
+
+require_extension('K');
+
+#define ROL32(a,amt) ((a >> (-amt & (32-1))) | (a << (amt & (32-1))))
+
+uint32_t src    = RS1;
+uint32_t result = src ^ ROL32(src, 15) ^ ROL32(src, 23);
+
+WRITE_RD(
+    zext_xlen(result)
+);
+
+#undef ROL32
+
--- a/riscv/insns/sm4_common.h
+++ b/riscv/insns/sm4_common.h
@ -0,0 +1,27 @@
+
+// SM4 forward SBox. SM4 has no inverse sbox.
+static const uint8_t sm4_sbox[256] = {
+	0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2,
+	0x28, 0xFB, 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
+	0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4,
+	0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
+	0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA,
+	0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
+	0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
+	0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
+	0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B,
+	0x01, 0x21, 0x78, 0x87, 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
+	0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2,
+	0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
+	0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30,
+	0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
+	0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45,
+	0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
+	0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41,
+	0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
+	0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, 0x89, 0x69, 0x97, 0x4A,
+	0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
+	0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
+	0xD7, 0xCB, 0x39, 0x48
+};
+
--- a/riscv/insns/sm4ed.h
+++ b/riscv/insns/sm4ed.h
@ -0,0 +1,23 @@
+
+require_extension('K');
+
+#include "sm4_common.h"
+
+uint8_t  bs     = insn.bs();
+
+uint32_t sb_in  = (RS2 >> (8*bs)) & 0xFF;
+uint32_t sb_out = (uint32_t)sm4_sbox[sb_in];
+
+uint32_t linear = sb_out ^  (sb_out         <<  8) ^ 
+                            (sb_out         <<  2) ^
+                            (sb_out         << 18) ^
+                           ((sb_out & 0x3f) << 26) ^
+                           ((sb_out & 0xC0) << 10) ;
+
+uint32_t rotl   = (linear << (8*bs)) | (linear >> (32-8*bs));
+
+uint32_t result = rotl ^ RS1;
+uint64_t rd     = insn.rs1(); // RD sourced from RS1 field.
+
+WRITE_REG(rd, zext_xlen(result));
+
--- a/riscv/insns/sm4ks.h
+++ b/riscv/insns/sm4ks.h
@ -0,0 +1,21 @@
+
+require_extension('K');
+
+#include "sm4_common.h"
+
+uint8_t  bs     = insn.bs();
+
+uint32_t sb_in  = (RS2 >> (8*bs)) & 0xFF;
+uint32_t sb_out = sm4_sbox[sb_in];
+
+uint32_t x      = sb_out ^
+    ((sb_out & 0x07) << 29) ^ ((sb_out & 0xFE) <<  7) ^
+    ((sb_out & 0x01) << 23) ^ ((sb_out & 0xF8) << 13) ;
+
+uint32_t rotl   = (x << (8*bs)) | (x >> (32-8*bs));
+
+uint32_t result = rotl ^ RS1;
+uint64_t rd     = insn.rs1(); // RD sourced from RS1 field.
+
+WRITE_REG(rd, zext_xlen(result));
+
--- a/riscv/processor.cc
+++ b/riscv/processor.cc
@ -210,7 +210,7 @@ void processor_t::parse_isa_string(const char* str)

  char error_msg[256];
  const char* p = lowercase.c_str();
-  const char* all_subsets = "imafdqcbh"
+  const char* all_subsets = "imafdqcbkh"
 #ifdef __SIZEOF_INT128__
    "v"
 #endif
@ -882,6 +882,12 @@ void processor_t::set_csr(int which, reg_t val)

  switch (which)
  {
+    case CSR_MENTROPY:
+      es.set_mentropy(val);
+      break;
+    case CSR_MNOISE:
+      es.set_mnoise(val);
+      break;
    case CSR_FFLAGS:
      dirty_fp_state;
      state.fflags = val & (FSR_AEXC >> FSR_AEXC_SHIFT);
@ -1351,6 +1357,8 @@ void processor_t::set_csr(int which, reg_t val)
    case CSR_DPC:
    case CSR_DSCRATCH0:
    case CSR_DSCRATCH1:
+    case CSR_MENTROPY:
+    case CSR_MNOISE:
      LOG_CSR(which);
      break;
  }
@ -1415,6 +1423,14 @@ reg_t processor_t::get_csr(int which, insn_t insn, bool write, bool peek)

  switch (which)
  {
+    case CSR_MENTROPY:
+      if(!supports_extension('K'))
+          break;
+      return es.get_mentropy();
+    case CSR_MNOISE:
+      if(!supports_extension('K'))
+          break;
+      return es.get_mnoise();
    case CSR_FFLAGS:
      require_fp;
      if (!supports_extension('F'))
--- a/riscv/processor.h
+++ b/riscv/processor.h
@ -12,6 +12,8 @@
 #include <map>
 #include <cassert>
 #include "debug_rom_defines.h"
+#include "entropy_source.h"
+

 class processor_t;
 class mmu_t;
@ -443,6 +445,7 @@ private:
  std::vector<bool> extension_table;
  std::vector<bool> impl_table;
  
+  entropy_source es; // Crypto ISE Entropy source.

  std::vector<insn_desc_t> instructions;
  std::map<reg_t,uint64_t> pc_histogram;
--- a/riscv/riscv.mk.in
+++ b/riscv/riscv.mk.in
@ -431,6 +431,34 @@ riscv_insn_ext_b = \
 	xperm_h \
 	xperm_w \

+# Scalar Crypto ISE
+riscv_insn_ext_k = \
+       aesds \
+       aesdsm \
+       aeses \
+       aesesm \
+       aes64ks1i \
+       aes64ks2 \
+       aes64im \
+       sha256sig0 \
+       sha256sig1 \
+       sha256sum0 \
+       sha256sum1 \
+       sha512sig0 \
+       sha512sig0h \
+       sha512sig0l \
+       sha512sig1 \
+       sha512sig1h \
+       sha512sig1l \
+       sha512sum0 \
+       sha512sum0r \
+       sha512sum1 \
+       sha512sum1r \
+       sm3p0 \
+       sm3p1 \
+       sm4ed \
+       sm4ks
+
 riscv_insn_ext_v_alu_int = \
 	vaadd_vv \
 	vaaddu_vv \
@ -897,6 +925,7 @@ riscv_insn_list = \
 	$(riscv_insn_ext_zfh) \
 	$(riscv_insn_ext_q) \
 	$(riscv_insn_ext_b) \
+    $(riscv_insn_ext_k) \
 	$(if $(HAVE_INT128),$(riscv_insn_ext_v),) \
 	$(riscv_insn_ext_h) \
 	$(riscv_insn_priv) \