Browse Source

Begin refactoring emulation code

pull/17/head
Andrew Waterman 10 years ago
parent
commit
bbc9a65fed
  1. 3
      pk/bits.h
  2. 596
      pk/emulation.c
  3. 28
      pk/emulation.h
  4. 449
      pk/fp_emulation.c
  5. 81
      pk/fp_emulation.h
  6. 10
      pk/mentry.S
  7. 1
      pk/minit.c
  8. 90
      pk/mtrap.c
  9. 136
      pk/mtrap.h
  10. 3
      pk/pk.h
  11. 15
      pk/pk.mk.in
  12. 79
      pk/unprivileged_memory.h
  13. 2
      softfloat/specialize.h

3
pk/bits.h

@ -1,6 +1,9 @@
#ifndef PK_BITS_H
#define PK_BITS_H
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#define CONST_POPCOUNT2(x) ((((x) >> 0) & 1) + (((x) >> 1) & 1))
#define CONST_POPCOUNT4(x) (CONST_POPCOUNT2(x) + CONST_POPCOUNT2((x)>>2))
#define CONST_POPCOUNT8(x) (CONST_POPCOUNT4(x) + CONST_POPCOUNT4((x)>>4))

596
pk/emulation.c

@ -1,24 +1,76 @@
#include "emulation.h"
#include "fp_emulation.h"
#include "config.h"
#include "unprivileged_memory.h"
#include "mtrap.h"
#include "softfloat.h"
#include <limits.h>
void redirect_trap(uintptr_t epc, uintptr_t mstatus)
void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
write_csr(sepc, epc);
write_csr(scause, read_csr(mcause));
write_csr(mepc, read_csr(stvec));
asm (".pushsection .rodata\n"
"illegal_insn_trap_table:\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_float_load\n"
#else
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_float_store\n"
#else
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_mul_div\n"
" .word truly_illegal_insn\n"
" .word emulate_mul_div32\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fp\n"
#else
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_system\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .popsection");
uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP);
uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE);
kassert(prev_priv <= PRV_S);
mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv);
mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie);
mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S);
mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0);
write_csr(mstatus, mstatus);
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
extern void __redirect_trap();
return __redirect_trap();
if (unlikely((insn & 3) != 3))
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
write_csr(mepc, mepc + 4);
extern int32_t illegal_insn_trap_table[];
int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c);
emulation_func f = (emulation_func)(uintptr_t)*pf;
f(regs, mcause, mepc, mstatus, insn);
}
void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
@ -28,8 +80,14 @@ void __attribute__((noinline)) truly_illegal_insn(uintptr_t* regs, uintptr_t mca
void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
union {
uint8_t bytes[8];
uintptr_t intx;
uint64_t int64;
} val;
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
int shift = 0, fp = 0, len;
if ((insn & MASK_LW) == MATCH_LW)
@ -51,139 +109,54 @@ void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
uintptr_t val = 0, tmp, tmp2;
unpriv_mem_access("add %[tmp2], %[addr], %[len];"
"1: slli %[val], %[val], 8;"
"lbu %[tmp], -1(%[tmp2]);"
"addi %[tmp2], %[tmp2], -1;"
"or %[val], %[val], %[tmp];"
"bne %[addr], %[tmp2], 1b;",
val, tmp, tmp2, addr, len);
if (shift)
val = (intptr_t)val << shift >> shift;
val.int64 = 0;
for (intptr_t i = len-1; i >= 0; i--)
val.bytes[i] = load_uint8_t((void *)(addr + i), mepc);
if (!fp)
SET_RD(insn, regs, val);
SET_RD(insn, regs, (intptr_t)val.intx << shift >> shift);
else if (len == 8)
SET_F64_RD(insn, regs, val);
SET_F64_RD(insn, regs, val.int64);
else
SET_F32_RD(insn, regs, val);
SET_F32_RD(insn, regs, val.intx);
write_csr(mepc, mepc + 4);
}
void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
union {
uint8_t bytes[8];
uintptr_t intx;
uint64_t int64;
} val;
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
uintptr_t val = GET_RS2(insn, regs), error;
int len;
val.intx = GET_RS2(insn, regs);
if ((insn & MASK_SW) == MATCH_SW)
len = 4;
#ifdef __riscv64
else if ((insn & MASK_SD) == MATCH_SD)
len = 8;
#endif
else if ((insn & MASK_FSD) == MATCH_FSD)
len = 8, val = GET_F64_RS2(insn, regs);
len = 8, val.int64 = GET_F64_RS2(insn, regs);
else if ((insn & MASK_FSW) == MATCH_FSW)
len = 4, val = GET_F32_RS2(insn, regs);
len = 4, val.intx = GET_F32_RS2(insn, regs);
else if ((insn & MASK_SH) == MATCH_SH)
len = 2;
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
uintptr_t tmp, tmp2, addr_end = addr + len;
unpriv_mem_access("mv %[tmp], %[val];"
"mv %[tmp2], %[addr];"
"1: sb %[tmp], 0(%[tmp2]);"
"srli %[tmp], %[tmp], 8;"
"addi %[tmp2], %[tmp2], 1;"
"bne %[tmp2], %[addr_end], 1b",
tmp, tmp2, unused1, val, addr, addr_end);
for (size_t i = 0; i < len; i++)
store_uint8_t((void *)(addr + i), val.bytes[i], mepc);
write_csr(mepc, mepc + 4);
}
DECLARE_EMULATION_FUNC(emulate_float_load)
{
uintptr_t val_lo, val_hi;
uint64_t val;
uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
switch (insn & MASK_FUNCT3)
{
case MATCH_FLW & MASK_FUNCT3:
if (addr % 4 != 0)
return misaligned_load_trap(regs, mcause, mepc);
unpriv_mem_access("lw %[val_lo], (%[addr])",
val_lo, unused1, unused2, addr, mepc/*X*/);
SET_F32_RD(insn, regs, val_lo);
break;
case MATCH_FLD & MASK_FUNCT3:
if (addr % sizeof(uintptr_t) != 0)
return misaligned_load_trap(regs, mcause, mepc);
#ifdef __riscv64
unpriv_mem_access("ld %[val], (%[addr])",
val, val_hi/*X*/, unused1, addr, mepc/*X*/);
#else
unpriv_mem_access("lw %[val_lo], (%[addr]);"
"lw %[val_hi], 4(%[addr])",
val_lo, val_hi, unused1, addr, mepc/*X*/);
val = val_lo | ((uint64_t)val_hi << 32);
#endif
SET_F64_RD(insn, regs, val);
break;
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_float_store)
{
uintptr_t val_lo, val_hi;
uint64_t val;
uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
switch (insn & MASK_FUNCT3)
{
case MATCH_FSW & MASK_FUNCT3:
if (addr % 4 != 0)
return misaligned_store_trap(regs, mcause, mepc);
val_lo = GET_F32_RS2(insn, regs);
unpriv_mem_access("sw %[val_lo], (%[addr])",
unused1, unused2, unused3, val_lo, addr);
break;
case MATCH_FSD & MASK_FUNCT3:
if (addr % sizeof(uintptr_t) != 0)
return misaligned_store_trap(regs, mcause, mepc);
val = GET_F64_RS2(insn, regs);
#ifdef __riscv64
unpriv_mem_access("sd %[val], (%[addr])",
unused1, unused2, unused3, val, addr);
#else
val_lo = val;
val_hi = val >> 32;
unpriv_mem_access("sw %[val_lo], (%[addr]);"
"sw %[val_hi], 4(%[addr])",
unused1, unused2, unused3, val_lo, val_hi, addr);
#endif
break;
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
#ifdef __riscv64
typedef __int128 double_int;
#else
@ -279,7 +252,7 @@ static inline int emulate_read_csr(int num, uintptr_t mstatus, uintptr_t* result
+ HLS()->sinstret_delta) >> 32;
return 0;
#endif
#ifndef __riscv_hard_float
#ifdef PK_ENABLE_FP_EMULATION
case CSR_FRM:
if ((mstatus & MSTATUS_FS) == 0) break;
*result = GET_FRM();
@ -301,7 +274,7 @@ static inline int emulate_write_csr(int num, uintptr_t value, uintptr_t mstatus)
{
switch (num)
{
#ifndef __riscv_hard_float
#ifndef PK_ENABLE_FP_EMULATION
case CSR_FRM: SET_FRM(value); return 0;
case CSR_FFLAGS: SET_FFLAGS(value); return 0;
case CSR_FCSR: SET_FCSR(value); return 0;
@ -338,384 +311,3 @@ DECLARE_EMULATION_FUNC(emulate_system)
SET_RD(insn, regs, csr_val);
}
DECLARE_EMULATION_FUNC(emulate_fp)
{
asm (".pushsection .rodata\n"
"fp_emulation_table:\n"
" .word emulate_fadd\n"
" .word emulate_fsub\n"
" .word emulate_fmul\n"
" .word emulate_fdiv\n"
" .word emulate_fsgnj\n"
" .word emulate_fmin\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_ff\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fsqrt\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcmp\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_if\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_fi\n"
" .word truly_illegal_insn\n"
" .word emulate_fmv_if\n"
" .word truly_illegal_insn\n"
" .word emulate_fmv_fi\n"
" .word truly_illegal_insn\n"
" .popsection");
// if FPU is disabled, punt back to the OS
if (unlikely((mstatus & MSTATUS_FS) == 0))
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
extern int32_t fp_emulation_table[];
int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c);
emulation_func f = (emulation_func)(uintptr_t)*pf;
SETUP_STATIC_ROUNDING(insn);
return f(regs, mcause, mepc, mstatus, insn);
}
void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b;
SET_F32_RD(insn, regs, f32_add(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32);
SET_F64_RD(insn, regs, f64_add(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fadd)
{
return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0);
}
DECLARE_EMULATION_FUNC(emulate_fsub)
{
return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN);
}
DECLARE_EMULATION_FUNC(emulate_fmul)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, f32_mul(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, f64_mul(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fdiv)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, f32_div(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, f64_div(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fsqrt)
{
if ((insn >> 20) & 0x1f)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
if (GET_PRECISION(insn) == PRECISION_S) {
SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs)));
} else if (GET_PRECISION(insn) == PRECISION_D) {
SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs)));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fsgnj)
{
int rm = GET_RM(insn);
if (rm >= 3)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#define DO_FSGNJ(rs1, rs2, rm) ({ \
typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \
typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \
rs1_sign &= (rm) >> 1; \
rs1_sign ^= (rm) ^ rs2_sign; \
((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); })
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fmin)
{
int rm = GET_RM(insn);
if (rm >= 2)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
uint32_t arg1 = rm ? rs2 : rs1;
uint32_t arg2 = rm ? rs1 : rs2;
int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2);
SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2);
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
uint64_t arg1 = rm ? rs2 : rs1;
uint64_t arg2 = rm ? rs1 : rs2;
int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2);
SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2);
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fcvt_ff)
{
int rs2_num = (insn >> 20) & 0x1f;
if (GET_PRECISION(insn) == PRECISION_S) {
if (rs2_num != 1)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs)));
} else if (GET_PRECISION(insn) == PRECISION_D) {
if (rs2_num != 0)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs)));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fcvt_fi)
{
if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int negative = 0;
uint64_t uint_val = GET_RS1(insn, regs);
switch ((insn >> 20) & 0x1f)
{
case 0: // int32
negative = (int32_t)uint_val < 0;
uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val;
break;
case 1: // uint32
uint_val = (uint32_t)uint_val;
break;
#ifdef __riscv64
case 2: // int64
negative = (int64_t)uint_val < 0;
uint_val = negative ? -uint_val : uint_val;
case 3: // uint64
break;
#endif
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
uint64_t float64 = ui64_to_f64(uint_val);
if (negative)
float64 ^= INT64_MIN;
if (GET_PRECISION(insn) == PRECISION_S)
SET_F32_RD(insn, regs, f64_to_f32(float64));
else
SET_F64_RD(insn, regs, float64);
}
DECLARE_EMULATION_FUNC(emulate_fcvt_if)
{
int rs2_num = (insn >> 20) & 0x1f;
#ifdef __riscv64
if (rs2_num >= 4)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#else
if (rs2_num >= 2)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#endif
int64_t float64;
if (GET_PRECISION(insn) == PRECISION_S)
float64 = f32_to_f64(GET_F32_RS1(insn, regs));
else if (GET_PRECISION(insn) == PRECISION_D)
float64 = GET_F64_RS1(insn, regs);
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int negative = 0;
if (float64 < 0) {
negative = 1;
float64 ^= INT64_MIN;
}
uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true);
uint64_t result, limit, limit_result;
switch (rs2_num)
{
case 0: // int32
if (negative) {
result = (int32_t)-uint_val;
limit_result = limit = (uint32_t)INT32_MIN;
} else {
result = (int32_t)uint_val;
limit_result = limit = INT32_MAX;
}
break;
case 1: // uint32
limit = limit_result = UINT32_MAX;
if (negative)
result = limit = 0;
else
result = (uint32_t)uint_val;
break;
case 2: // int32
if (negative) {
result = (int64_t)-uint_val;
limit_result = limit = (uint64_t)INT64_MIN;
} else {
result = (int64_t)uint_val;
limit_result = limit = INT64_MAX;
}
break;
case 3: // uint64
limit = limit_result = UINT64_MAX;
if (negative)
result = limit = 0;
else
result = (uint64_t)uint_val;
break;
default:
__builtin_unreachable();
}
if (uint_val > limit) {
result = limit_result;
softfloat_raiseFlags(softfloat_flag_invalid);
}
SET_FS_DIRTY();
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fcmp)
{
int rm = GET_RM(insn);
if (rm >= 3)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
uintptr_t result;
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
if (rm != 1)
result = f32_eq(rs1, rs2);
if (rm == 1 || (rm == 0 && !result))
result = f32_lt(rs1, rs2);
goto success;
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
if (rm != 1)
result = f64_eq(rs1, rs2);
if (rm == 1 || (rm == 0 && !result))
result = f64_lt(rs1, rs2);
goto success;
}
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
success:
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fmv_if)
{
uintptr_t result;
if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S)
result = GET_F32_RS1(insn, regs);
#ifdef __riscv64
else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D)
result = GET_F64_RS1(insn, regs);
#endif
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fmv_fi)
{
uintptr_t rs1 = GET_RS1(insn, regs);
if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X)
SET_F32_RD(insn, regs, rs1);
else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X)
SET_F64_RD(insn, regs, rs1);
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
DECLARE_EMULATION_FUNC(emulate_fmadd)
{
// if FPU is disabled, punt back to the OS
if (unlikely((mstatus & MSTATUS_FS) == 0))
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int op = (insn >> 2) & 3;
SETUP_STATIC_ROUNDING(insn);
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
uint32_t rs3 = GET_F32_RS3(insn, regs);
SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
uint64_t rs3 = GET_F64_RS3(insn, regs);
SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}

28
pk/emulation.h

@ -0,0 +1,28 @@
#ifndef _RISCV_EMULATION_H
#define _RISCV_EMULATION_H
#include "encoding.h"
#include "bits.h"
#include <stdint.h>
typedef uint32_t insn_t;
typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t);
#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
void misaligned_load_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc);
void misaligned_store_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc);
void redirect_trap(uintptr_t epc, uintptr_t mstatus);
DECLARE_EMULATION_FUNC(truly_illegal_insn);
#define GET_REG(insn, pos, regs) ({ \
int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \
(uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \
})
#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs))
#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs))
#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val))
#define IMM_I(insn) ((int32_t)(insn) >> 20)
#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f))
#define MASK_FUNCT3 0x7000
#endif

449
pk/fp_emulation.c

@ -0,0 +1,449 @@
#include "fp_emulation.h"
#include "unprivileged_memory.h"
#include "softfloat.h"
#include "config.h"
DECLARE_EMULATION_FUNC(emulate_float_load)
{
uint64_t val;
uintptr_t addr = GET_RS1(insn, regs) + IMM_I(insn);
switch (insn & MASK_FUNCT3)
{
case MATCH_FLW & MASK_FUNCT3:
if (addr % 4 != 0)
return misaligned_load_trap(regs, mcause, mepc);
SET_F32_RD(insn, regs, load_int32_t((void *)addr, mepc));
break;
case MATCH_FLD & MASK_FUNCT3:
if (addr % sizeof(uintptr_t) != 0)
return misaligned_load_trap(regs, mcause, mepc);
#ifdef __riscv64
val = load_uint64_t((void *)addr, mepc);
#else
val = load_uint32_t(addr, mepc);
val += (uint64_t)load_uint32_t((void *)(addr + 4), mepc) << 32;
#endif
SET_F64_RD(insn, regs, val);
break;
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_float_store)
{
uint64_t val;
uintptr_t addr = GET_RS1(insn, regs) + IMM_S(insn);
switch (insn & MASK_FUNCT3)
{
case MATCH_FSW & MASK_FUNCT3:
if (addr % 4 != 0)
return misaligned_store_trap(regs, mcause, mepc);
store_uint32_t((void *)addr, GET_F32_RS2(insn, regs), mepc);
break;
case MATCH_FSD & MASK_FUNCT3:
if (addr % sizeof(uintptr_t) != 0)
return misaligned_store_trap(regs, mcause, mepc);
val = GET_F64_RS2(insn, regs);
#ifdef __riscv64
store_uint64_t((void *)addr, val, mepc);
#else
store_uint32_t((void *)addr, val, mepc);
store_uint32_t((void *)(addr + 4), val >> 32, mepc);
#endif
break;
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fp)
{
asm (".pushsection .rodata\n"
"fp_emulation_table:\n"
" .word emulate_fadd\n"
" .word emulate_fsub\n"
" .word emulate_fmul\n"
" .word emulate_fdiv\n"
" .word emulate_fsgnj\n"
" .word emulate_fmin\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_ff\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fsqrt\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcmp\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_if\n"
" .word truly_illegal_insn\n"
" .word emulate_fcvt_fi\n"
" .word truly_illegal_insn\n"
" .word emulate_fmv_if\n"
" .word truly_illegal_insn\n"
" .word emulate_fmv_fi\n"
" .word truly_illegal_insn\n"
" .popsection");
// if FPU is disabled, punt back to the OS
if (unlikely((mstatus & MSTATUS_FS) == 0))
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
extern int32_t fp_emulation_table[];
int32_t* pf = (void*)fp_emulation_table + ((insn >> 25) & 0x7c);
emulation_func f = (emulation_func)(uintptr_t)*pf;
SETUP_STATIC_ROUNDING(insn);
return f(regs, mcause, mepc, mstatus, insn);
}
void emulate_any_fadd(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn, int32_t neg_b)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs) ^ neg_b;
SET_F32_RD(insn, regs, f32_add(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs) ^ ((uint64_t)neg_b << 32);
SET_F64_RD(insn, regs, f64_add(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fadd)
{
return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, 0);
}
DECLARE_EMULATION_FUNC(emulate_fsub)
{
return emulate_any_fadd(regs, mcause, mepc, mstatus, insn, INT32_MIN);
}
DECLARE_EMULATION_FUNC(emulate_fmul)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, f32_mul(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, f64_mul(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fdiv)
{
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, f32_div(rs1, rs2));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, f64_div(rs1, rs2));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fsqrt)
{
if ((insn >> 20) & 0x1f)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
if (GET_PRECISION(insn) == PRECISION_S) {
SET_F32_RD(insn, regs, f32_sqrt(GET_F32_RS1(insn, regs)));
} else if (GET_PRECISION(insn) == PRECISION_D) {
SET_F64_RD(insn, regs, f64_sqrt(GET_F64_RS1(insn, regs)));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fsgnj)
{
int rm = GET_RM(insn);
if (rm >= 3)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#define DO_FSGNJ(rs1, rs2, rm) ({ \
typeof(rs1) rs1_sign = (rs1) >> (8*sizeof(rs1)-1); \
typeof(rs1) rs2_sign = (rs2) >> (8*sizeof(rs1)-1); \
rs1_sign &= (rm) >> 1; \
rs1_sign ^= (rm) ^ rs2_sign; \
((rs1) << 1 >> 1) | (rs1_sign << (8*sizeof(rs1)-1)); })
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
SET_F32_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
SET_F64_RD(insn, regs, DO_FSGNJ(rs1, rs2, rm));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fmin)
{
int rm = GET_RM(insn);
if (rm >= 2)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
uint32_t arg1 = rm ? rs2 : rs1;
uint32_t arg2 = rm ? rs1 : rs2;
int use_rs1 = f32_lt_quiet(arg1, arg2) || isNaNF32UI(rs2);
SET_F32_RD(insn, regs, use_rs1 ? rs1 : rs2);
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
uint64_t arg1 = rm ? rs2 : rs1;
uint64_t arg2 = rm ? rs1 : rs2;
int use_rs1 = f64_lt_quiet(arg1, arg2) || isNaNF64UI(rs2);
SET_F64_RD(insn, regs, use_rs1 ? rs1 : rs2);
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fcvt_ff)
{
int rs2_num = (insn >> 20) & 0x1f;
if (GET_PRECISION(insn) == PRECISION_S) {
if (rs2_num != 1)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_F32_RD(insn, regs, f64_to_f32(GET_F64_RS1(insn, regs)));
} else if (GET_PRECISION(insn) == PRECISION_D) {
if (rs2_num != 0)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_F64_RD(insn, regs, f32_to_f64(GET_F32_RS1(insn, regs)));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}
DECLARE_EMULATION_FUNC(emulate_fcvt_fi)
{
if (GET_PRECISION(insn) != PRECISION_S && GET_PRECISION(insn) != PRECISION_D)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int negative = 0;
uint64_t uint_val = GET_RS1(insn, regs);
switch ((insn >> 20) & 0x1f)
{
case 0: // int32
negative = (int32_t)uint_val < 0;
uint_val = negative ? -(int32_t)uint_val : (int32_t)uint_val;
break;
case 1: // uint32
uint_val = (uint32_t)uint_val;
break;
#ifdef __riscv64
case 2: // int64
negative = (int64_t)uint_val < 0;
uint_val = negative ? -uint_val : uint_val;
case 3: // uint64
break;
#endif
default:
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
uint64_t float64 = ui64_to_f64(uint_val);
if (negative)
float64 ^= INT64_MIN;
if (GET_PRECISION(insn) == PRECISION_S)
SET_F32_RD(insn, regs, f64_to_f32(float64));
else
SET_F64_RD(insn, regs, float64);
}
DECLARE_EMULATION_FUNC(emulate_fcvt_if)
{
int rs2_num = (insn >> 20) & 0x1f;
#ifdef __riscv64
if (rs2_num >= 4)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#else
if (rs2_num >= 2)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
#endif
int64_t float64;
if (GET_PRECISION(insn) == PRECISION_S)
float64 = f32_to_f64(GET_F32_RS1(insn, regs));
else if (GET_PRECISION(insn) == PRECISION_D)
float64 = GET_F64_RS1(insn, regs);
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int negative = 0;
if (float64 < 0) {
negative = 1;
float64 ^= INT64_MIN;
}
uint64_t uint_val = f64_to_ui64(float64, softfloat_roundingMode, true);
uint64_t result, limit, limit_result;
switch (rs2_num)
{
case 0: // int32
if (negative) {
result = (int32_t)-uint_val;
limit_result = limit = (uint32_t)INT32_MIN;
} else {
result = (int32_t)uint_val;
limit_result = limit = INT32_MAX;
}
break;
case 1: // uint32
limit = limit_result = UINT32_MAX;
if (negative)
result = limit = 0;
else
result = (uint32_t)uint_val;
break;
case 2: // int32
if (negative) {
result = (int64_t)-uint_val;
limit_result = limit = (uint64_t)INT64_MIN;
} else {
result = (int64_t)uint_val;
limit_result = limit = INT64_MAX;
}
break;
case 3: // uint64
limit = limit_result = UINT64_MAX;
if (negative)
result = limit = 0;
else
result = (uint64_t)uint_val;
break;
default:
__builtin_unreachable();
}
if (uint_val > limit) {
result = limit_result;
softfloat_raiseFlags(softfloat_flag_invalid);
}
SET_FS_DIRTY();
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fcmp)
{
int rm = GET_RM(insn);
if (rm >= 3)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
uintptr_t result;
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
if (rm != 1)
result = f32_eq(rs1, rs2);
if (rm == 1 || (rm == 0 && !result))
result = f32_lt(rs1, rs2);
goto success;
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
if (rm != 1)
result = f64_eq(rs1, rs2);
if (rm == 1 || (rm == 0 && !result))
result = f64_lt(rs1, rs2);
goto success;
}
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
success:
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fmv_if)
{
uintptr_t result;
if ((insn & MASK_FMV_X_S) == MATCH_FMV_X_S)
result = GET_F32_RS1(insn, regs);
#ifdef __riscv64
else if ((insn & MASK_FMV_X_D) == MATCH_FMV_X_D)
result = GET_F64_RS1(insn, regs);
#endif
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
SET_RD(insn, regs, result);
}
DECLARE_EMULATION_FUNC(emulate_fmv_fi)
{
uintptr_t rs1 = GET_RS1(insn, regs);
if ((insn & MASK_FMV_S_X) == MATCH_FMV_S_X)
SET_F32_RD(insn, regs, rs1);
else if ((insn & MASK_FMV_D_X) == MATCH_FMV_D_X)
SET_F64_RD(insn, regs, rs1);
else
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
DECLARE_EMULATION_FUNC(emulate_fmadd)
{
// if FPU is disabled, punt back to the OS
if (unlikely((mstatus & MSTATUS_FS) == 0))
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
int op = (insn >> 2) & 3;
SETUP_STATIC_ROUNDING(insn);
if (GET_PRECISION(insn) == PRECISION_S) {
uint32_t rs1 = GET_F32_RS1(insn, regs);
uint32_t rs2 = GET_F32_RS2(insn, regs);
uint32_t rs3 = GET_F32_RS3(insn, regs);
SET_F32_RD(insn, regs, softfloat_mulAddF32(op, rs1, rs2, rs3));
} else if (GET_PRECISION(insn) == PRECISION_D) {
uint64_t rs1 = GET_F64_RS1(insn, regs);
uint64_t rs2 = GET_F64_RS2(insn, regs);
uint64_t rs3 = GET_F64_RS3(insn, regs);
SET_F64_RD(insn, regs, softfloat_mulAddF64(op, rs1, rs2, rs3));
} else {
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
}
}

81
pk/fp_emulation.h

@ -0,0 +1,81 @@
#ifndef _RISCV_FP_EMULATION_H
#define _RISCV_FP_EMULATION_H
#include "emulation.h"
#define GET_PRECISION(insn) (((insn) >> 25) & 3)
#define GET_RM(insn) (((insn) >> 12) & 7)
#define PRECISION_S 0
#define PRECISION_D 1
#ifdef __riscv_hard_float
# define GET_F32_REG(insn, pos, regs) ({ \
register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
value; })
# define SET_F32_REG(insn, pos, regs, val) ({ \
register uint32_t value asm("a0") = (val); \
uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0)
# define GET_F64_REG(insn, pos, regs) ({ \
register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; })
# define SET_F64_REG(insn, pos, regs, val) ({ \
uint64_t __val = (val); \
register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \
uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
# define GET_FCSR() read_csr(fcsr)
# define SET_FCSR(value) write_csr(fcsr, (value))
# define GET_FRM() read_csr(frm)
# define SET_FRM(value) write_csr(frm, (value))
# define GET_FFLAGS() read_csr(fflags)
# define SET_FFLAGS(value) write_csr(fflags, (value))
# define SETUP_STATIC_ROUNDING(insn) ({ \
register long tp asm("tp") = read_csr(frm); \
if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \
else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
else tp = GET_RM(insn); \
asm volatile ("":"+r"(tp)); })
# define softfloat_raiseFlags(which) set_csr(fflags, which)
# define softfloat_roundingMode ({ register int tp asm("tp"); tp; })
#else
# define GET_F64_REG(insn, pos, regs) (*(int64_t*)((void*)((regs) + 32) + (((insn) >> ((pos)-3)) & 0xf8)))
# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val))
# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs))
# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val))
# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; })
# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); })
# define GET_FRM() (GET_FCSR() >> 5)
# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5))
# define GET_FFLAGS() (GET_FCSR() & 0x1F)
# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F))
# define SETUP_STATIC_ROUNDING(insn) ({ \
register int tp asm("tp"); tp &= 0xFF; \
if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \
else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
else tp |= GET_RM(insn) << 13; \
asm volatile ("":"+r"(tp)); })
# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); })
# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; })
#endif
#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs))
#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs))
#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs))
#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs))
#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs))
#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs))
#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY())
#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY())
#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS)
#endif

10
pk/mentry.S

@ -86,15 +86,15 @@ trap_vector:
STORE t1, 6*REGBYTES(sp)
sll t1, a1, 2 # t1 <- mcause << 2
STORE t2, 7*REGBYTES(sp)
add t0, t0, t1 # t0 <- %hi(trap_table)[mcause]
add t1, t0, t1 # t1 <- %hi(trap_table)[mcause]
STORE s0, 8*REGBYTES(sp)
lw t0, %pcrel_lo(1b)(t0) # t0 <- trap_table[mcause]
lw t1, %pcrel_lo(1b)(t1) # t1 <- trap_table[mcause]
STORE s1, 9*REGBYTES(sp)
mv a0, sp # a0 <- regs
STORE a2,12*REGBYTES(sp)
csrr a2, mepc # a2 <- mepc
STORE a3,13*REGBYTES(sp)
csrrw t1, mscratch, x0 # t1 <- user sp
csrrw t0, mscratch, x0 # t0 <- user sp
STORE a4,14*REGBYTES(sp)
STORE a5,15*REGBYTES(sp)
STORE a6,16*REGBYTES(sp)
@ -113,7 +113,7 @@ trap_vector:
STORE t4,29*REGBYTES(sp)
STORE t5,30*REGBYTES(sp)
STORE t6,31*REGBYTES(sp)
STORE t1, 2*REGBYTES(sp) # sp
STORE t0, 2*REGBYTES(sp) # sp
#ifndef __riscv_hard_float
lw tp, (sp) # Move the emulated FCSR from x0's save slot into tp.
@ -121,7 +121,7 @@ trap_vector:
STORE x0, (sp) # Zero x0's save slot.
# Invoke the handler.
jalr t0
jalr t1
#ifndef __riscv_hard_float
sw tp, (sp) # Move the emulated FCSR from tp into x0's save slot.

1
pk/minit.c

@ -1,5 +1,6 @@
#include "vm.h"
#include "mtrap.h"
#include "fp_emulation.h"
uintptr_t mem_size;
uint32_t num_harts;

90
pk/mtrap.c

@ -4,77 +4,6 @@
#include "vm.h"
#include <errno.h>
void illegal_insn_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
{
asm (".pushsection .rodata\n"
"illegal_insn_trap_table:\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_float_load\n"
#else
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_float_store\n"
#else
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word emulate_mul_div\n"
" .word truly_illegal_insn\n"
" .word emulate_mul_div32\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fmadd\n"
" .word emulate_fp\n"
#else
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
#ifdef PK_ENABLE_FP_EMULATION
" .word emulate_system\n"
#else
" .word truly_illegal_insn\n"
#endif
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .word truly_illegal_insn\n"
" .popsection");
uintptr_t mstatus;
insn_t insn = get_insn(mepc, &mstatus);
if ((insn & 3) != 3)
return truly_illegal_insn(regs, mcause, mepc, mstatus, insn);
write_csr(mepc, mepc + 4);
extern int32_t illegal_insn_trap_table[];
int32_t* pf = (void*)illegal_insn_trap_table + (insn & 0x7c);
emulation_func f = (emulation_func)(uintptr_t)*pf;
f(regs, mcause, mepc, mstatus, insn);
}
void __attribute__((noreturn)) bad_trap()
{
panic("machine mode: unhandlable trap %d @ %p", read_csr(mcause), read_csr(mepc));
@ -301,6 +230,25 @@ void mcall_trap(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc)
write_csr(mepc, mepc + 4);
}
void redirect_trap(uintptr_t epc, uintptr_t mstatus)
{
write_csr(sepc, epc);
write_csr(scause, read_csr(mcause));
write_csr(mepc, read_csr(stvec));
uintptr_t prev_priv = EXTRACT_FIELD(mstatus, MSTATUS_MPP);
uintptr_t prev_ie = EXTRACT_FIELD(mstatus, MSTATUS_MPIE);
kassert(prev_priv <= PRV_S);
mstatus = INSERT_FIELD(mstatus, MSTATUS_SPP, prev_priv);
mstatus = INSERT_FIELD(mstatus, MSTATUS_SPIE, prev_ie);
mstatus = INSERT_FIELD(mstatus, MSTATUS_MPP, PRV_S);
mstatus = INSERT_FIELD(mstatus, MSTATUS_MPIE, 0);
write_csr(mstatus, mstatus);
extern void __redirect_trap();
return __redirect_trap();
}
static void machine_page_fault(uintptr_t* regs, uintptr_t mepc)
{
// MPRV=1 iff this trap occurred while emulating an instruction on behalf

136
pk/mtrap.h

@ -9,142 +9,6 @@
#include "sbi.h"
#define GET_MACRO(_1,_2,_3,_4,NAME,...) NAME
#define unpriv_mem_access(a, b, c, ...) GET_MACRO(__VA_ARGS__, unpriv_mem_access3, unpriv_mem_access2, unpriv_mem_access1, unpriv_mem_access0)(a, b, c, __VA_ARGS__)
#define unpriv_mem_access0(a, b, c, d) ({ uintptr_t z = 0, z1 = 0, z2 = 0; unpriv_mem_access_base(a, b, c, d, z, z1, z2); })
#define unpriv_mem_access1(a, b, c, d, e) ({ uintptr_t z = 0, z1 = 0; unpriv_mem_access_base(a, b, c, d, e, z, z1); })
#define unpriv_mem_access2(a, b, c, d, e, f) ({ uintptr_t z = 0; unpriv_mem_access_base(a, b, c, d, e, f, z); })
#define unpriv_mem_access3(a, b, c, d, e, f, g) unpriv_mem_access_base(a, b, c, d, e, f, g)
#define unpriv_mem_access_base(code, o0, o1, o2, i0, i1, i2) ({ \
register uintptr_t mstatus asm ("a3") = MSTATUS_MPRV; \
register uintptr_t __mepc asm ("a2") = mepc; \
uintptr_t unused1, unused2, unused3 __attribute__((unused)); \
asm volatile ("csrrs %[mstatus], mstatus, %[mstatus]\n" \
code "\n" \
"csrw mstatus, %[mstatus]\n" \
: [o0] "=&r"(o0), [o1] "=&r"(o1), [o2] "=&r"(o2), \
[mstatus] "+&r"(mstatus) \
: [i0] "rJ"(i0), [i1] "rJ"(i1), [i2] "rJ"(i2), \
"r"(__mepc)); \
(mstatus); \
})
typedef uint32_t insn_t;
typedef void (*emulation_func)(uintptr_t*, uintptr_t, uintptr_t, uintptr_t, insn_t);
#define DECLARE_EMULATION_FUNC(name) void name(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn)
void truly_illegal_insn(uintptr_t* regs, uintptr_t mcause, uintptr_t mepc, uintptr_t mstatus, insn_t insn);
void redirect_trap(uintptr_t epc, uintptr_t mstatus);
#define GET_REG(insn, pos, regs) ({ \
int mask = (1 << (5+LOG_REGBYTES)) - (1 << LOG_REGBYTES); \
(uintptr_t*)((uintptr_t)regs + (((insn) >> ((pos) - LOG_REGBYTES)) & mask)); \
})
#define GET_RS1(insn, regs) (*GET_REG(insn, 15, regs))
#define GET_RS2(insn, regs) (*GET_REG(insn, 20, regs))
#define SET_RD(insn, regs, val) (*GET_REG(insn, 7, regs) = (val))
#define IMM_I(insn) ((int32_t)(insn) >> 20)
#define IMM_S(insn) (((int32_t)(insn) >> 25 << 5) | (int32_t)(((insn) >> 7) & 0x1f))
#define MASK_FUNCT3 0x7000
#define GET_PRECISION(insn) (((insn) >> 25) & 3)
#define GET_RM(insn) (((insn) >> 12) & 7)
#define PRECISION_S 0
#define PRECISION_D 1
#ifdef __riscv_hard_float
# define GET_F32_REG(insn, pos, regs) ({ \
register int32_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm ("1: auipc %0, %%pcrel_hi(get_f32_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
value; })
# define SET_F32_REG(insn, pos, regs, val) ({ \
register uint32_t value asm("a0") = (val); \
uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm volatile ("1: auipc %0, %%pcrel_hi(put_f32_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
# define init_fp_reg(i) SET_F32_REG((i) << 3, 3, 0, 0)
# define GET_F64_REG(insn, pos, regs) ({ \
register uintptr_t value asm("a0") = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm ("1: auipc %0, %%pcrel_hi(get_f64_reg); add %0, %0, %1; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp), "+&r"(value) :: "t0"); \
sizeof(uintptr_t) == 4 ? *(int64_t*)value : (int64_t)value; })
# define SET_F64_REG(insn, pos, regs, val) ({ \
uint64_t __val = (val); \
register uintptr_t value asm("a0") = sizeof(uintptr_t) == 4 ? (uintptr_t)&__val : (uintptr_t)__val; \
uintptr_t offset = ((insn) >> ((pos)-3)) & 0xf8; \
uintptr_t tmp; \
asm volatile ("1: auipc %0, %%pcrel_hi(put_f64_reg); add %0, %0, %2; jalr t0, %0, %%pcrel_lo(1b)" : "=&r"(tmp) : "r"(value), "r"(offset) : "t0"); })
# define GET_FCSR() read_csr(fcsr)
# define SET_FCSR(value) write_csr(fcsr, (value))
# define GET_FRM() read_csr(frm)
# define SET_FRM(value) write_csr(frm, (value))
# define GET_FFLAGS() read_csr(fflags)
# define SET_FFLAGS(value) write_csr(fflags, (value))
# define SETUP_STATIC_ROUNDING(insn) ({ \
register long tp asm("tp") = read_csr(frm); \
if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) ; \
else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
else tp = GET_RM(insn); \
asm volatile ("":"+r"(tp)); })
# define softfloat_raiseFlags(which) set_csr(fflags, which)
# define softfloat_roundingMode ({ register int tp asm("tp"); tp; })
#else
# define GET_F64_REG(insn, pos, regs) (((int64_t*)(&(regs)[32]))[((insn) >> (pos)) & 0x1f])
# define SET_F64_REG(insn, pos, regs, val) (GET_F64_REG(insn, pos, regs) = (val))
# define GET_F32_REG(insn, pos, regs) (*(int32_t*)&GET_F64_REG(insn, pos, regs))
# define SET_F32_REG(insn, pos, regs, val) (GET_F32_REG(insn, pos, regs) = (val))
# define GET_FCSR() ({ register int tp asm("tp"); tp & 0xFF; })
# define SET_FCSR(value) ({ asm volatile("add tp, x0, %0" :: "rI"((value) & 0xFF)); })
# define GET_FRM() (GET_FCSR() >> 5)
# define SET_FRM(value) SET_FCSR(GET_FFLAGS() | ((value) << 5))
# define GET_FFLAGS() (GET_FCSR() & 0x1F)
# define SET_FFLAGS(value) SET_FCSR((GET_FRM() << 5) | ((value) & 0x1F))
# define SETUP_STATIC_ROUNDING(insn) ({ \
register int tp asm("tp"); tp &= 0xFF; \
if (likely(((insn) & MASK_FUNCT3) == MASK_FUNCT3)) tp |= tp << 8; \
else if (GET_RM(insn) > 4) return truly_illegal_insn(regs, mcause, mepc, mstatus, insn); \
else tp |= GET_RM(insn) << 13; \
asm volatile ("":"+r"(tp)); })
# define softfloat_raiseFlags(which) ({ asm volatile ("or tp, tp, %0" :: "rI"(which)); })
# define softfloat_roundingMode ({ register int tp asm("tp"); tp >> 13; })
#endif
#define GET_F32_RS1(insn, regs) (GET_F32_REG(insn, 15, regs))
#define GET_F32_RS2(insn, regs) (GET_F32_REG(insn, 20, regs))
#define GET_F32_RS3(insn, regs) (GET_F32_REG(insn, 27, regs))
#define GET_F64_RS1(insn, regs) (GET_F64_REG(insn, 15, regs))
#define GET_F64_RS2(insn, regs) (GET_F64_REG(insn, 20, regs))
#define GET_F64_RS3(insn, regs) (GET_F64_REG(insn, 27, regs))
#define SET_F32_RD(insn, regs, val) (SET_F32_REG(insn, 7, regs, val), SET_FS_DIRTY())
#define SET_F64_RD(insn, regs, val) (SET_F64_REG(insn, 7, regs, val), SET_FS_DIRTY())
#define SET_FS_DIRTY() set_csr(mstatus, MSTATUS_FS)
static insn_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus)
{
insn_t insn;
#ifdef __riscv_compressed
int rvc_mask = 3, insn_hi;
*mstatus = unpriv_mem_access("lhu %[insn], 0(%[mepc]);"
"and %[insn_hi], %[insn], %[rvc_mask];"
"bne %[insn_hi], %[rvc_mask], 1f;"
"lh %[insn_hi], 2(%[mepc]);"
"sll %[insn_hi], %[insn_hi], 16;"
"or %[insn], %[insn], %[insn_hi];"
"1:",
insn, insn_hi, unused1, mepc, rvc_mask);
#else
*mstatus = unpriv_mem_access("lw %[insn], 0(%[mepc])",
insn, unused1, unused2, mepc);
#endif
return insn;
}
#define read_const_csr(reg) ({ unsigned long __tmp; \
asm ("csrr %0, " #reg : "=r"(__tmp)); \
__tmp; })

3
pk/pk.h

@ -41,9 +41,6 @@ void kassert_fail(const char* s) __attribute__((noreturn));
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
#define EXTRACT_FIELD(val, which) (((val) & (which)) / ((which) & ~((which)-1)))
#define INSERT_FIELD(val, which, fieldval) (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1))))

15
pk/pk.mk.in

@ -2,18 +2,27 @@ pk_subproject_deps = \
softfloat \
pk_hdrs = \
mtrap.h \
encoding.h \
atomic.h \
bits.h \
elf.h \
emulation.h \
encoding.h \
file.h \
fp_emulation.h \
frontend.h \
elf.h \
mcall.h \
mtrap.h \
pk.h \
sbi.h \
syscall.h \
unprivileged_memory.h \
vm.h \
pk_c_srcs = \
mtrap.c \
minit.c \
emulation.c \
fp_emulation.c \
sbi_impl.c \
init.c \
file.c \

79
pk/unprivileged_memory.h

@ -0,0 +1,79 @@
#ifndef _RISCV_MISALIGNED_H
#define _RISCV_MISALIGNED_H
#include "encoding.h"
#include <stdint.h>
#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \
static inline type load_##type(const type* addr, uintptr_t mepc) \
{ \
register uintptr_t __mepc asm ("a2") = mepc; \
register uintptr_t __mstatus asm ("a3"); \
type val; \
asm ("csrrs %0, mstatus, %3\n" \
#insn " %1, %2\n" \
"csrw mstatus, %0" \
: "+&r" (__mstatus), "=&r" (val) \
: "m" (*addr), "r" (MSTATUS_MPRV), "r" (__mepc)); \
return val; \
}
#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \
static inline void store_##type(type* addr, type val, uintptr_t mepc) \
{ \
register uintptr_t __mepc asm ("a2") = mepc; \
register uintptr_t __mstatus asm ("a3"); \
asm volatile ("csrrs %0, mstatus, %3\n" \
#insn " %1, %2\n" \
"csrw mstatus, %0" \
: "+&r" (__mstatus) \
: "r" (val), "m" (*addr), "r" (MSTATUS_MPRV), \
"r" (__mepc)); \
}
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint8_t, lbu)
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint16_t, lhu)
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int8_t, lb)
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int16_t, lh)
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(int32_t, lw)
DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint8_t, sb)
DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint16_t, sh)
DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint32_t, sw)
#ifdef __riscv64
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lwu)
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint64_t, ld)
DECLARE_UNPRIVILEGED_STORE_FUNCTION(uint64_t, sd)
#else
DECLARE_UNPRIVILEGED_LOAD_FUNCTION(uint32_t, lw)
#endif
static uint32_t __attribute__((always_inline)) get_insn(uintptr_t mepc, uintptr_t* mstatus)
{
register uintptr_t __mepc asm ("a2") = mepc;
register uintptr_t __mstatus asm ("a3");
uint32_t val;
#ifndef __riscv_compressed
asm ("csrrs %[mstatus], mstatus, %[mprv]\n"
"lw %[insn], (%[addr])\n"
"csrw mstatus, %[mstatus]"
: [mstatus] "+&r" (__mstatus), [insn] "=&r" (val)
: [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc));
#else
uintptr_t rvc_mask = 3, tmp;
asm ("csrrs %[mstatus], mstatus, %[mprv]\n"
"lhu %[insn], (%[addr])\n"
"and %[tmp], %[insn], %[rvc_mask]\n"
"bne %[tmp], %[rvc_mask], 1f\n"
"lh %[tmp], 2(%[addr])\n"
"sll %[tmp], %[tmp], 16\n"
"add %[insn], %[insn], %[tmp]\n"
"1: csrw mstatus, %[mstatus]"
: [mstatus] "+&r" (__mstatus), [insn] "=&r" (val), [tmp] "=&r" (tmp)
: [mprv] "r" (MSTATUS_MPRV), [addr] "r" (__mepc),
[rvc_mask] "r" (rvc_mask));
#endif
*mstatus = __mstatus;
return val;
}
#endif

2
softfloat/specialize.h

@ -112,6 +112,6 @@ uint_fast64_t softfloat_commonNaNToF64UI( struct commonNaN );
*----------------------------------------------------------------------------*/
uint_fast64_t softfloat_propagateNaNF64UI( uint_fast64_t, uint_fast64_t );
#include "../pk/mtrap.h"
#include "fp_emulation.h"
#endif

Loading…
Cancel
Save