Browse Source

pk: support >2 GiB of user memory for RV64

Previously, the pk would always run from virtual address MEM_START.
Instead, remap it into the negative virtual addresses, allowing user
processes to expand beyond MEM_START.
pull/237/head
Andrew Waterman 5 years ago
parent
commit
b9eba474fb
  1. 21
      pk/console.c
  2. 3
      pk/frontend.c
  3. 4
      pk/handlers.c
  4. 35
      pk/mmap.c
  5. 9
      pk/mmap.h
  6. 36
      pk/pk.c
  7. 2
      pk/syscall.c

21
pk/console.c

@ -1,6 +1,7 @@
// See LICENSE for license details. // See LICENSE for license details.
#include "pk.h" #include "pk.h"
#include "mmap.h"
#include "file.h" #include "file.h"
#include "frontend.h" #include "frontend.h"
#include <stdint.h> #include <stdint.h>
@ -23,21 +24,27 @@ void printk(const char* s, ...)
va_end(vl); va_end(vl);
} }
void dump_tf(trapframe_t* tf) static const char* get_regname(int r)
{ {
static const char* regnames[] = { static const char regnames[] = {
"z ", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "z \0" "ra\0" "sp\0" "gp\0" "tp\0" "t0\0" "t1\0" "t2\0"
"s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", "s0\0" "s1\0" "a0\0" "a1\0" "a2\0" "a3\0" "a4\0" "a5\0"
"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "a6\0" "a7\0" "s2\0" "s3\0" "s4\0" "s5\0" "s6\0" "s7\0"
"s8", "s9", "sA", "sB", "t3", "t4", "t5", "t6" "s8\0" "s9\0" "sA\0" "sB\0" "t3\0" "t4\0" "t5\0" "t6"
}; };
return &regnames[r * 3];
}
void dump_tf(trapframe_t* tf)
{
tf->gpr[0] = 0; tf->gpr[0] = 0;
for(int i = 0; i < 32; i+=4) for(int i = 0; i < 32; i+=4)
{ {
for(int j = 0; j < 4; j++) for(int j = 0; j < 4; j++)
printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n'); printk("%s %lx%c", get_regname(i+j), tf->gpr[i+j], j < 3 ? ' ' : '\n');
} }
printk("pc %lx va %lx insn %x sr %lx\n", tf->epc, tf->badvaddr, printk("pc %lx va %lx insn %x sr %lx\n", tf->epc, tf->badvaddr,
(uint32_t)tf->insn, tf->status); (uint32_t)tf->insn, tf->status);

3
pk/frontend.c

@ -5,6 +5,7 @@
#include "frontend.h" #include "frontend.h"
#include "syscall.h" #include "syscall.h"
#include "htif.h" #include "htif.h"
#include "mmap.h"
#include <stdint.h> #include <stdint.h>
long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6) long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3, uint64_t a4, uint64_t a5, uint64_t a6)
@ -23,7 +24,7 @@ long frontend_syscall(long n, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3
magic_mem[6] = a5; magic_mem[6] = a5;
magic_mem[7] = a6; magic_mem[7] = a6;
htif_syscall((uintptr_t)magic_mem); htif_syscall(kva2pa_maybe(magic_mem));
long ret = magic_mem[0]; long ret = magic_mem[0];

4
pk/handlers.c

@ -122,5 +122,7 @@ void handle_trap(trapframe_t* tf)
kassert(tf->cause < ARRAY_SIZE(trap_handlers) && trap_handlers[tf->cause]); kassert(tf->cause < ARRAY_SIZE(trap_handlers) && trap_handlers[tf->cause]);
trap_handlers[tf->cause](tf); trap_handler f = (void*)pa2kva(trap_handlers[tf->cause]);
f(tf);
} }

35
pk/mmap.c

@ -10,6 +10,8 @@
#include <stdbool.h> #include <stdbool.h>
#include <errno.h> #include <errno.h>
uintptr_t kva2pa_offset;
typedef struct vmr_t { typedef struct vmr_t {
struct vmr_t* next; struct vmr_t* next;
uintptr_t addr; uintptr_t addr;
@ -68,7 +70,9 @@ static uintptr_t __page_alloc()
node->next = NULL; node->next = NULL;
page_freelist_depth--; page_freelist_depth--;
return (uintptr_t)memset((void*)addr, 0, RISCV_PGSIZE); memset((void*)pa2kva(addr), 0, RISCV_PGSIZE);
return addr;
} }
static uintptr_t __page_alloc_assert() static uintptr_t __page_alloc_assert()
@ -103,7 +107,7 @@ static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
size_t offset, unsigned refcnt, int prot) size_t offset, unsigned refcnt, int prot)
{ {
if (vmr_freelist_head == NULL) { if (vmr_freelist_head == NULL) {
vmr_t* new_vmrs = (vmr_t*)__page_alloc(); vmr_t* new_vmrs = (vmr_t*)pa2kva(__page_alloc());
if (new_vmrs == NULL) if (new_vmrs == NULL)
return NULL; return NULL;
@ -161,7 +165,7 @@ static size_t pt_idx(uintptr_t addr, int level)
static pte_t* __walk_internal(uintptr_t addr, int create, int level) static pte_t* __walk_internal(uintptr_t addr, int create, int level)
{ {
pte_t* t = root_page_table; pte_t* t = (pte_t*)pa2kva(root_page_table);
for (int i = RISCV_PGLEVELS - 1; i > level; i--) { for (int i = RISCV_PGLEVELS - 1; i > level; i--) {
size_t idx = pt_idx(addr, i); size_t idx = pt_idx(addr, i);
if (unlikely(!(t[idx] & PTE_V))) { if (unlikely(!(t[idx] & PTE_V))) {
@ -174,7 +178,7 @@ static pte_t* __walk_internal(uintptr_t addr, int create, int level)
return 0; return 0;
} }
} }
t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT); t = (pte_t*)pa2kva(pte_ppn(t[idx]) << RISCV_PGSHIFT);
} }
return &t[pt_idx(addr, level)]; return &t[pt_idx(addr, level)];
} }
@ -251,8 +255,8 @@ static int __handle_page_fault(uintptr_t vaddr, int prot)
return -1; return -1;
else if (!(*pte & PTE_V)) else if (!(*pte & PTE_V))
{ {
uintptr_t kva = __page_alloc_assert(); uintptr_t ppn = __page_alloc_assert() / RISCV_PGSIZE;
uintptr_t ppn = kva / RISCV_PGSIZE; uintptr_t kva = pa2kva(ppn * RISCV_PGSIZE);
vmr_t* v = (vmr_t*)*pte; vmr_t* v = (vmr_t*)*pte;
*pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0)); *pte = pte_create(ppn, prot_to_type(PROT_READ|PROT_WRITE, 0));
@ -488,6 +492,10 @@ void populate_mapping(const void* start, size_t size, int prot)
uintptr_t pk_vm_init() uintptr_t pk_vm_init()
{ {
// PA space must fit within half of VA space
uintptr_t user_size = -KVA_START;
mem_size = MIN(mem_size, user_size);
extern char _end; extern char _end;
first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE); first_free_page = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE; free_pages = (mem_size - (first_free_page - MEM_START)) / RISCV_PGSIZE;
@ -498,19 +506,18 @@ uintptr_t pk_vm_init()
next_free_page = freelist_node_array_size / RISCV_PGSIZE; next_free_page = freelist_node_array_size / RISCV_PGSIZE;
root_page_table = (void*)__page_alloc_assert(); root_page_table = (void*)__page_alloc_assert();
__map_kernel_range(MEM_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC); __map_kernel_range(KVA_START, MEM_START, mem_size, PROT_READ|PROT_WRITE|PROT_EXEC);
current.mmap_max = current.brk_max = MEM_START; current.mmap_max = current.brk_max = user_size;
size_t mem_pages = mem_size >> RISCV_PGSHIFT;
size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE;
size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
kassert(stack_bottom != (uintptr_t)-1);
current.stack_top = stack_bottom + stack_size;
flush_tlb(); flush_tlb();
write_csr(sptbr, ((uintptr_t)root_page_table >> RISCV_PGSHIFT) | SATP_MODE_CHOICE); write_csr(sptbr, ((uintptr_t)root_page_table >> RISCV_PGSHIFT) | SATP_MODE_CHOICE);
uintptr_t kernel_stack_top = __page_alloc_assert() + RISCV_PGSIZE; uintptr_t kernel_stack_top = __page_alloc_assert() + RISCV_PGSIZE;
// relocate
kva2pa_offset = KVA_START - MEM_START;
page_freelist_node_array = (void*)pa2kva(page_freelist_node_array);
return kernel_stack_top; return kernel_stack_top;
} }

9
pk/mmap.h

@ -34,7 +34,12 @@ uintptr_t do_mremap(uintptr_t addr, size_t old_size, size_t new_size, int flags)
uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot); uintptr_t do_mprotect(uintptr_t addr, size_t length, int prot);
uintptr_t do_brk(uintptr_t addr); uintptr_t do_brk(uintptr_t addr);
#define kva2pa(va) ((uintptr_t)(va)) #define KVA_START ((uintptr_t)-1 << (VA_BITS-1))
#define is_uva(va) ((uintptr_t)(va) < MEM_START)
extern uintptr_t kva2pa_offset;
#define kva2pa(va) ((uintptr_t)(va) - kva2pa_offset)
#define pa2kva(pa) ((uintptr_t)(pa) + kva2pa_offset)
#define kva2pa_maybe(va) ((uintptr_t)(va) >= KVA_START ? kva2pa(va) : (uintptr_t)(va))
#define is_uva(va) ((uintptr_t)(va) < KVA_START)
#endif #endif

36
pk/pk.c

@ -6,6 +6,7 @@
#include "elf.h" #include "elf.h"
#include "mtrap.h" #include "mtrap.h"
#include "frontend.h" #include "frontend.h"
#include "bits.h"
#include "usermem.h" #include "usermem.h"
#include <stdbool.h> #include <stdbool.h>
@ -67,11 +68,11 @@ static size_t parse_args(arg_buf* args)
uint64_t* pk_argv = &args->buf[1]; uint64_t* pk_argv = &args->buf[1];
// pk_argv[0] is the proxy kernel itself. skip it and any flags. // pk_argv[0] is the proxy kernel itself. skip it and any flags.
size_t pk_argc = args->buf[0], arg = 1; size_t pk_argc = args->buf[0], arg = 1;
for ( ; arg < pk_argc && *(char*)(uintptr_t)pk_argv[arg] == '-'; arg++) for ( ; arg < pk_argc && *(char*)pa2kva(pk_argv[arg]) == '-'; arg++)
handle_option((const char*)(uintptr_t)pk_argv[arg]); handle_option((const char*)pa2kva(pk_argv[arg]));
for (size_t i = 0; arg + i < pk_argc; i++) for (size_t i = 0; arg + i < pk_argc; i++)
args->argv[i] = (char*)(uintptr_t)pk_argv[arg + i]; args->argv[i] = (char*)pa2kva(pk_argv[arg + i]);
return pk_argc - arg; return pk_argc - arg;
} }
@ -85,6 +86,12 @@ static void init_tf(trapframe_t* tf, long pc, long sp)
static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top) static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top)
{ {
size_t mem_pages = mem_size >> RISCV_PGSHIFT;
size_t stack_size = MIN(mem_pages >> 5, 2048) * RISCV_PGSIZE;
size_t stack_bottom = __do_mmap(current.mmap_max - stack_size, stack_size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 0, 0);
kassert(stack_bottom != (uintptr_t)-1);
current.stack_top = stack_bottom + stack_size;
// copy phdrs to user stack // copy phdrs to user stack
size_t stack_top = current.stack_top - current.phdr_size; size_t stack_top = current.stack_top - current.phdr_size;
memcpy_to_user((void*)stack_top, (void*)current.phdr, current.phdr_size); memcpy_to_user((void*)stack_top, (void*)current.phdr, current.phdr_size);
@ -167,15 +174,25 @@ static void run_loaded_program(size_t argc, char** argv, uintptr_t kstack_top)
start_user(&tf); start_user(&tf);
} }
static void rest_of_boot_loader(uintptr_t kstack_top) void rest_of_boot_loader(uintptr_t kstack_top);
asm ("\n\
.globl rest_of_boot_loader\n\
rest_of_boot_loader:\n\
mv sp, a0\n\
tail rest_of_boot_loader_2");
void rest_of_boot_loader_2(uintptr_t kstack_top)
{ {
arg_buf args; file_init();
static arg_buf args; // avoid large stack allocation
size_t argc = parse_args(&args); size_t argc = parse_args(&args);
if (!argc) if (!argc)
panic("tell me what ELF to load!"); panic("tell me what ELF to load!");
// load program named by argv[0] // load program named by argv[0]
long phdrs[128]; static long phdrs[128]; // avoid large stack allocation
current.phdr = (uintptr_t)phdrs; current.phdr = (uintptr_t)phdrs;
current.phdr_size = sizeof(phdrs); current.phdr_size = sizeof(phdrs);
load_elf(args.argv[0], &current); load_elf(args.argv[0], &current);
@ -185,14 +202,15 @@ static void rest_of_boot_loader(uintptr_t kstack_top)
void boot_loader(uintptr_t dtb) void boot_loader(uintptr_t dtb)
{ {
uintptr_t kernel_stack_top = pk_vm_init();
extern char trap_entry; extern char trap_entry;
write_csr(stvec, &trap_entry); write_csr(stvec, pa2kva(&trap_entry));
write_csr(sscratch, 0); write_csr(sscratch, 0);
write_csr(sie, 0); write_csr(sie, 0);
set_csr(sstatus, SSTATUS_FS | SSTATUS_VS); set_csr(sstatus, SSTATUS_FS | SSTATUS_VS);
file_init(); enter_supervisor_mode((void*)pa2kva(rest_of_boot_loader), pa2kva(kernel_stack_top), 0);
enter_supervisor_mode(rest_of_boot_loader, pk_vm_init(), 0);
} }
void boot_other_hart(uintptr_t dtb) void boot_other_hart(uintptr_t dtb)

2
pk/syscall.c

@ -662,5 +662,7 @@ long do_syscall(long a0, long a1, long a2, long a3, long a4, long a5, unsigned l
if (!f) if (!f)
panic("bad syscall #%ld!",n); panic("bad syscall #%ld!",n);
f = (void*)pa2kva(f);
return f(a0, a1, a2, a3, a4, a5, n); return f(a0, a1, a2, a3, a4, a5, n);
} }

Loading…
Cancel
Save