Browse Source

Support Linux ABI and (optionally) virtual memory

pull/2/head
Andrew Waterman 13 years ago
parent
commit
cc72987e65
  1. 12
      pk/atomic.h
  2. 147
      pk/console.c
  3. 78
      pk/elf.c
  4. 9
      pk/elf.h
  5. 1
      pk/entry.S
  6. 108
      pk/file.c
  7. 11
      pk/file.h
  8. 4
      pk/fp.c
  9. 33
      pk/handlers.c
  10. 220
      pk/init.c
  11. 14
      pk/pcr.h
  12. 39
      pk/pk.h
  13. 3
      pk/pk.mk.in
  14. 121
      pk/syscall.c
  15. 4
      pk/syscall.h
  16. 392
      pk/vm.c
  17. 26
      pk/vm.h

12
pk/atomic.h

@ -43,6 +43,18 @@ static inline long atomic_swap(atomic_t* a, long val)
#endif
}
static inline long atomic_cas(atomic_t* a, long compare, long swap)
{
#ifdef PK_ENABLE_ATOMICS
return __sync_val_compare_and_swap(&a->val, compare, swap);
#else
long ret = atomic_read(a);
if (ret == compare)
atomic_set(a, swap);
return ret;
#endif
}
static inline void spinlock_lock(spinlock_t* lock)
{
do

147
pk/console.c

@ -0,0 +1,147 @@
#include "pk.h"
#include "file.h"
#include "syscall.h"
#include <stdint.h>
#include <stdarg.h>
#include <stdbool.h>
#include <string.h>
static void vsprintk(char* out, const char* s, va_list vl)
{
bool format = false;
bool longarg = false;
for( ; *s; s++)
{
if(format)
{
switch(*s)
{
case 'l':
longarg = true;
break;
case 'p':
longarg = true;
*out++ = '0';
*out++ = 'x';
case 'x':
{
long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--)
{
int d = (n >> (4*i)) & 0xF;
*out++ = (d < 10 ? '0'+d : 'a'+d-10);
}
longarg = false;
format = false;
break;
}
case 'd':
{
long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
if(n < 0)
{
n = -n;
*out++ = '-';
}
long digits = 1;
for(long nn = n ; nn /= 10; digits++);
for(int i = digits-1; i >= 0; i--)
{
out[i] = '0' + n%10;
n /= 10;
}
out += digits;
longarg = false;
format = false;
break;
}
case 's':
{
const char* s2 = va_arg(vl,const char*);
while(*s2)
*out++ = *s2++;
longarg = false;
format = false;
break;
}
case 'c':
{
*out++ = (char)va_arg(vl,int);
longarg = false;
format = false;
break;
}
default:
panic("bad fmt");
}
}
else if(*s == '%')
format = true;
else
*out++ = *s;
}
*out++ = '\0';
}
static void vprintk(const char* s, va_list vl)
{
char out[1024]; // XXX
vsprintk(out, s, vl);
file_write(stderr, out, strlen(out));
}
void printk(const char* s, ...)
{
va_list vl;
va_start(vl, s);
vprintk(s, vl);
va_end(vl);
}
void sprintk(char* out, const char* s, ...)
{
va_list vl;
va_start(vl,s);
vsprintk(out,s,vl);
va_end(vl);
}
void dump_tf(trapframe_t* tf)
{
static const char* regnames[] = {
"z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5",
"s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp",
"v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5",
"a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD"
};
tf->gpr[0] = 0;
for(int i = 0; i < 32; i+=4)
{
for(int j = 0; j < 4; j++)
printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n');
}
printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr,
(uint32_t)tf->insn);
}
void do_panic(const char* s, ...)
{
va_list vl;
va_start(vl, s);
vprintk(s, vl);
sys_exit(-1);
va_end(vl);
}
void kassert_fail(const char* s)
{
do_panic("assertion failed: %s\n", s);
}

78
pk/elf.c

@ -1,72 +1,74 @@
// See LICENSE for license details.
#include "file.h"
#include "pk.h"
#include "pcr.h"
#include "vm.h"
#include <sys/stat.h>
#include <fcntl.h>
#include <elf.h>
#include <string.h>
#include "file.h"
#include "pk.h"
long load_elf(const char* fn, int* user64)
void load_elf(const char* fn, elf_info* info)
{
sysret_t ret = file_open(fn, strlen(fn)+1, O_RDONLY, 0);
sysret_t ret = file_open(fn, O_RDONLY, 0);
file_t* file = (file_t*)ret.result;
if(ret.result == -1)
if (ret.result == -1)
goto fail;
char buf[2048]; // XXX
int header_size = file_read(file, buf, sizeof(buf)).result;
const Elf64_Ehdr* eh64 = (const Elf64_Ehdr*)buf;
if(header_size < (int)sizeof(Elf64_Ehdr) ||
!(eh64->e_ident[0] == '\177' && eh64->e_ident[1] == 'E' &&
eh64->e_ident[2] == 'L' && eh64->e_ident[3] == 'F'))
Elf64_Ehdr eh64;
ssize_t ehdr_size = file_pread(file, &eh64, sizeof(eh64), 0).result;
if (ehdr_size < (ssize_t)sizeof(eh64) ||
!(eh64.e_ident[0] == '\177' && eh64.e_ident[1] == 'E' &&
eh64.e_ident[2] == 'L' && eh64.e_ident[3] == 'F'))
goto fail;
#define LOAD_ELF do { \
eh = (typeof(eh))buf; \
kassert(header_size >= eh->e_phoff + eh->e_phnum*sizeof(*ph)); \
ph = (typeof(ph))(buf+eh->e_phoff); \
eh = (typeof(eh))&eh64; \
size_t phdr_size = eh->e_phnum*sizeof(*ph); \
if (info->phdr_top - phdr_size < info->stack_bottom) \
goto fail; \
info->phdr = info->phdr_top - phdr_size; \
ssize_t ret = file_pread(file, (void*)info->phdr, phdr_size, eh->e_phoff).result; \
if (ret < (ssize_t)phdr_size) goto fail; \
info->entry = eh->e_entry; \
info->phnum = eh->e_phnum; \
info->phent = sizeof(*ph); \
ph = (typeof(ph))info->phdr; \
for(int i = 0; i < eh->e_phnum; i++, ph++) { \
if(ph->p_type == SHT_PROGBITS && ph->p_memsz) { \
extern char _end; \
if((char*)(long)ph->p_vaddr < &_end) \
{ \
long diff = &_end - (char*)(long)ph->p_vaddr; \
ph->p_vaddr += diff; \
ph->p_offset += diff; \
ph->p_memsz = diff >= ph->p_memsz ? 0 : ph->p_memsz - diff; \
ph->p_filesz = diff >= ph->p_filesz ? 0 : ph->p_filesz - diff; \
} \
if(file_pread(file, (char*)(long)ph->p_vaddr, ph->p_filesz, ph->p_offset).result != ph->p_filesz) \
info->brk_min = MAX(info->brk_min, ph->p_vaddr + ph->p_memsz); \
size_t vaddr = ROUNDDOWN(ph->p_vaddr, RISCV_PGSIZE), prepad = ph->p_vaddr - vaddr; \
size_t memsz = ph->p_memsz + prepad, filesz = ph->p_filesz + prepad; \
size_t offset = ph->p_offset - prepad; \
if (__do_mmap(vaddr, filesz, -1, MAP_FIXED|MAP_PRIVATE, file, offset) != vaddr) \
goto fail; \
memset((char*)(long)ph->p_vaddr+ph->p_filesz, 0, ph->p_memsz-ph->p_filesz); \
size_t mapped = ROUNDUP(filesz, RISCV_PGSIZE); \
if (memsz > mapped) \
if (__do_mmap(vaddr + mapped, memsz - mapped, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) != vaddr + mapped) \
goto fail; \
} \
} \
} while(0)
long entry;
*user64 = 0;
if (IS_ELF32(*eh64))
info->elf64 = IS_ELF64(eh64);
if (info->elf64)
{
Elf32_Ehdr* eh;
Elf32_Phdr* ph;
Elf64_Ehdr* eh;
Elf64_Phdr* ph;
LOAD_ELF;
entry = eh->e_entry;
}
else if (IS_ELF64(*eh64))
else if (IS_ELF32(eh64))
{
*user64 = 1;
Elf64_Ehdr* eh;
Elf64_Phdr* ph;
Elf32_Ehdr* eh;
Elf32_Phdr* ph;
LOAD_ELF;
entry = eh->e_entry;
}
else
goto fail;
file_decref(file);
return entry;
return;
fail:
panic("couldn't open ELF program: %s!", fn);

9
pk/elf.h

@ -1,7 +1,5 @@
// See LICENSE for license details.
// See LICENSE for details.
#ifndef _ELF_H
#define _ELF_H
@ -17,6 +15,13 @@
#define SHT_PROGBITS 1
#define SHT_NOBITS 8
#define AT_NULL 0
#define AT_PHDR 3
#define AT_PHENT 4
#define AT_PHNUM 5
#define AT_PAGESZ 6
#define AT_ENTRY 9
typedef struct {
uint8_t e_ident[16];
uint16_t e_type;

1
pk/entry.S

@ -140,6 +140,7 @@ trap_entry:
jal handle_trap
.bss
.align 4
.global stack_bot
.global stack_top
stack_bot:

108
pk/file.c

@ -6,66 +6,52 @@
#include "pk.h"
#include "frontend.h"
#include "pcr.h"
#include "vm.h"
#define MAX_FDS 32
file_t* fds[MAX_FDS];
static file_t* fds[MAX_FDS];
#define MAX_FILES 32
file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}};
static file_t files[MAX_FILES] = {[0 ... MAX_FILES-1] = {-1,{0}}};
file_t *stdout, *stdin, *stderr;
static void file_incref(file_t* f)
void file_incref(file_t* f)
{
atomic_add(&f->refcnt,1);
atomic_add(&f->refcnt, 1);
}
void file_decref(file_t* f)
{
if(atomic_add(&f->refcnt,-1) == 2)
if (atomic_add(&f->refcnt, -1) == 2)
{
if(f->kfd != -1)
{
frontend_syscall(SYS_close,f->kfd,0,0,0);
f->kfd = -1;
}
atomic_add(&f->refcnt,-1); // I think this could just be atomic_set(..,0)
int kfd = f->kfd;
mb();
atomic_set(&f->refcnt, 0);
frontend_syscall(SYS_close, kfd, 0, 0, 0);
}
}
static file_t* file_get_free()
{
for(int i = 0; i < MAX_FILES; i++)
{
if(atomic_read(&files[i].refcnt) == 0)
{
if(atomic_add(&files[i].refcnt,1) == 0)
{
atomic_add(&files[i].refcnt,1);
return &files[i];
}
file_decref(&files[i]);
}
}
for (file_t* f = files; f < files + MAX_FILES; f++)
if (atomic_read(&f->refcnt) == 0 && atomic_cas(&f->refcnt, 0, 2) == 0)
return f;
return NULL;
}
static int fd_get_free()
int file_dup(file_t* f)
{
for(int i = 0; i < MAX_FDS; i++)
if(fds[i] == NULL)
for (int i = 0; i < MAX_FDS; i++)
{
if (fds[i] == NULL && __sync_bool_compare_and_swap(&fds[i], 0, f))
{
file_incref(f);
return i;
}
}
return -1;
}
int file_dup(file_t* f)
{
int fd = fd_get_free();
if(fd == -1)
return -1;
file_incref(f);
fds[fd] = f;
return fd;
}
void file_init()
{
stdin = file_get_free();
@ -84,16 +70,28 @@ void file_init()
file_t* file_get(int fd)
{
return fd < 0 || fd >= MAX_FDS ? NULL : fds[fd];
file_t* f;
if (fd < 0 || fd >= MAX_FDS || (f = fds[fd]) == NULL)
return 0;
long old_cnt;
do {
old_cnt = atomic_read(&f->refcnt);
if (old_cnt == 0)
return 0;
} while (atomic_cas(&f->refcnt, old_cnt, old_cnt+1) != old_cnt);
return f;
}
sysret_t file_open(const char* fn, size_t len, int flags, int mode)
sysret_t file_open(const char* fn, int flags, int mode)
{
file_t* f = file_get_free();
if(!f)
return (sysret_t){-1,ENOMEM};
sysret_t ret = frontend_syscall(SYS_open,(long)fn,len,flags,mode);
size_t fn_size = strlen(fn)+1;
sysret_t ret = frontend_syscall(SYS_open, (long)fn, fn_size, flags, mode);
if(ret.result != -1)
{
f->kfd = ret.result;
@ -108,39 +106,47 @@ sysret_t file_open(const char* fn, size_t len, int flags, int mode)
int fd_close(int fd)
{
file_t* f = file_get(fd);
if(!f)
if (!f)
return -1;
int success = __sync_bool_compare_and_swap(&fds[fd], f, 0);
file_decref(f);
if (!success)
return -1;
fds[fd] = NULL;
file_decref(f);
return 0;
}
sysret_t file_read(file_t* f, char* buf, size_t size)
sysret_t file_read(file_t* f, void* buf, size_t size)
{
return frontend_syscall(SYS_read,f->kfd,(long)buf,size,0);
populate_mapping(buf, size, PROT_WRITE);
return frontend_syscall(SYS_read, f->kfd, (uintptr_t)buf, size, 0);
}
sysret_t file_pread(file_t* f, char* buf, size_t size, off_t offset)
sysret_t file_pread(file_t* f, void* buf, size_t size, off_t offset)
{
return frontend_syscall(SYS_pread,f->kfd,(long)buf,size,offset);
populate_mapping(buf, size, PROT_WRITE);
return frontend_syscall(SYS_pread, f->kfd, (uintptr_t)buf, size, offset);
}
sysret_t file_write(file_t* f, const char* buf, size_t size)
sysret_t file_write(file_t* f, const void* buf, size_t size)
{
return frontend_syscall(SYS_write,f->kfd,(long)buf,size,0);
populate_mapping(buf, size, PROT_READ);
return frontend_syscall(SYS_write, f->kfd, (uintptr_t)buf, size, 0);
}
sysret_t file_pwrite(file_t* f, const char* buf, size_t size, off_t offset)
sysret_t file_pwrite(file_t* f, const void* buf, size_t size, off_t offset)
{
return frontend_syscall(SYS_pwrite,f->kfd,(long)buf,size,offset);
populate_mapping(buf, size, PROT_READ);
return frontend_syscall(SYS_pwrite, f->kfd, (uintptr_t)buf, size, offset);
}
sysret_t file_stat(file_t* f, struct stat* s)
{
return frontend_syscall(SYS_fstat,f->kfd,(long)s,0,0);
populate_mapping(s, sizeof(*s), PROT_WRITE);
return frontend_syscall(SYS_fstat, f->kfd, (uintptr_t)s, 0, 0);
}
sysret_t file_lseek(file_t* f, size_t ptr, int dir)
{
return frontend_syscall(SYS_lseek,f->kfd,ptr,dir,0);
return frontend_syscall(SYS_lseek, f->kfd, ptr, dir, 0);
}

11
pk/file.h

@ -16,14 +16,15 @@ typedef struct file
extern file_t *stdin, *stdout, *stderr;
file_t* file_get(int fd);
sysret_t file_open(const char* fn, size_t len, int flags, int mode);
sysret_t file_open(const char* fn, int flags, int mode);
void file_decref(file_t*);
void file_incref(file_t*);
int file_dup(file_t*);
sysret_t file_pwrite(file_t* f, const char* buf, size_t n, off_t off);
sysret_t file_pread(file_t* f, char* buf, size_t n, off_t off);
sysret_t file_write(file_t* f, const char* buf, size_t n);
sysret_t file_read(file_t* f, char* buf, size_t n);
sysret_t file_pwrite(file_t* f, const void* buf, size_t n, off_t off);
sysret_t file_pread(file_t* f, void* buf, size_t n, off_t off);
sysret_t file_write(file_t* f, const void* buf, size_t n);
sysret_t file_read(file_t* f, void* buf, size_t n);
sysret_t file_stat(file_t* f, struct stat* s);
sysret_t file_lseek(file_t* f, size_t ptr, int dir);
int fd_close(int fd);

4
pk/fp.c

@ -21,10 +21,6 @@ static uint64_t get_fp_reg(unsigned int which, unsigned int dp);
static inline void
validate_address(trapframe_t* tf, long addr, int size, int store)
{
if(addr & (size-1))
store ? handle_misaligned_store(tf) : handle_misaligned_load(tf);
if(addr < USER_START)
store ? handle_fault_store(tf) : handle_fault_load(tf);
}
int emulate_fp(trapframe_t* tf)

33
pk/handlers.c

@ -3,6 +3,8 @@
#include "pcr.h"
#include "pk.h"
#include "config.h"
#include "syscall.h"
#include "vm.h"
int have_fp = 1; // initialized to 1 because it can't be in the .bss section!
int have_vector = 1;
@ -55,8 +57,6 @@ static void handle_illegal_instruction(trapframe_t* tf)
static void handle_fp_disabled(trapframe_t* tf)
{
setpcr(PCR_SR, SR_ET);
if(have_fp && !(mfpcr(PCR_SR) & SR_EF))
init_fp(tf);
else
@ -88,39 +88,46 @@ void handle_misaligned_store(trapframe_t* tf)
panic("Misaligned store!");
}
static void handle_fault_fetch(trapframe_t* tf)
static void segfault(trapframe_t* tf, uintptr_t addr, const char* type)
{
dump_tf(tf);
panic("Faulting instruction access!");
const char* who = (tf->sr & SR_PS) ? "Kernel" : "User";
panic("%s %s segfault @ %p", who, type, addr);
}
static void handle_fault_fetch(trapframe_t* tf)
{
if (handle_page_fault(tf->epc, PROT_EXEC) != 0)
segfault(tf, tf->epc, "fetch");
}
void handle_fault_load(trapframe_t* tf)
{
dump_tf(tf);
panic("Faulting load!");
if (handle_page_fault(tf->badvaddr, PROT_READ) != 0)
segfault(tf, tf->badvaddr, "load");
}
void handle_fault_store(trapframe_t* tf)
{
dump_tf(tf);
panic("Faulting store!");
if (handle_page_fault(tf->badvaddr, PROT_WRITE) != 0)
segfault(tf, tf->badvaddr, "store");
}
static void handle_syscall(trapframe_t* tf)
{
setpcr(PCR_SR, SR_ET);
long n = tf->gpr[16];
sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21], n);
sysret_t ret = syscall(tf->gpr[18], tf->gpr[19], tf->gpr[20], tf->gpr[21],
tf->gpr[22], tf->gpr[23], tf->gpr[16]);
tf->gpr[16] = ret.result;
tf->gpr[17] = ret.result == -1 ? ret.err : 0;
tf->gpr[21] = ret.err;
advance_pc(tf);
}
void handle_trap(trapframe_t* tf)
{
setpcr(PCR_SR, SR_ET);
typedef void (*trap_handler)(trapframe_t*);
const static trap_handler trap_handlers[] = {

220
pk/init.c

@ -3,189 +3,97 @@
#include "pcr.h"
#include "pk.h"
#include "file.h"
#include "vm.h"
#include "frontend.h"
#include <stdarg.h>
#include "elf.h"
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
static void vsprintk(char* out, const char* s, va_list vl)
{
bool format = false;
bool longarg = false;
for( ; *s; s++)
{
if(format)
{
switch(*s)
{
case 'l':
longarg = true;
break;
case 'x':
{
long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
for(int i = 2*(longarg ? sizeof(long) : sizeof(int))-1; i >= 0; i--)
{
int d = (n >> (4*i)) & 0xF;
*out++ = (d < 10 ? '0'+d : 'a'+d-10);
}
longarg = false;
format = false;
break;
}
case 'd':
{
long n = longarg ? va_arg(vl,long) : va_arg(vl,int);
if(n < 0)
{
n = -n;
*out++ = '-';
}
long digits = 1;
for(long nn = n ; nn /= 10; digits++);
for(int i = digits-1; i >= 0; i--)
{
out[i] = '0' + n%10;
n /= 10;
}
out += digits;
longarg = false;
format = false;
break;
}
case 's':
{
const char* s2 = va_arg(vl,const char*);
while(*s2)
*out++ = *s2++;
longarg = false;
format = false;
break;
}
case 'c':
{
*out++ = (char)va_arg(vl,int);
longarg = false;
format = false;
break;
}
default:
panic("bad fmt");
}
}
else if(*s == '%')
format = true;
else
*out++ = *s;
}
*out++ = '\0';
}
void printk(const char* s, ...)
{
va_list vl;
va_start(vl,s);
char out[1024]; // XXX
vsprintk(out,s,vl);
file_write(stderr,out,strlen(out));
va_end(vl);
}
void sprintk(char* out, const char* s, ...)
{
va_list vl;
va_start(vl,s);
vsprintk(out,s,vl);
va_end(vl);
}
void dump_tf(trapframe_t* tf)
{
static const char* regnames[] = {
"z ", "ra", "s0", "s1", "s2", "s3", "s4", "s5",
"s6", "s7", "s8", "s9", "sA", "sB", "sp", "tp",
"v0", "v1", "a0", "a1", "a2", "a3", "a4", "a5",
"a6", "a7", "a8", "a9", "aA", "aB", "aC", "aD"
};
tf->gpr[0] = 0;
for(int i = 0; i < 32; i+=4)
{
for(int j = 0; j < 4; j++)
printk("%s %lx%c",regnames[i+j],tf->gpr[i+j],j < 3 ? ' ' : '\n');
}
printk("sr %lx pc %lx va %lx insn %x\n",tf->sr,tf->epc,tf->badvaddr,
(uint32_t)tf->insn);
}
elf_info current;
void init_tf(trapframe_t* tf, long pc, long sp, int user64)
{
memset(tf,0,sizeof(*tf));
if(sizeof(void*) != 8)
kassert(!user64);
tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64)) | SR_S | SR_EC;
tf->sr = (mfpcr(PCR_SR) & (SR_IM | SR_S64 | SR_VM)) | SR_S | SR_EC;
if(user64)
tf->sr |= SR_U64;
tf->gpr[14] = sp;
tf->epc = pc;
}
static void bss_init()
static void user_init()
{
// front-end server zeroes the bss automagically
}
struct args {
uint64_t argc;
uint64_t argv[];
};
struct args
{
uint64_t argc;
uint64_t argv[];
};
const int argc_argv_size = 1024;
size_t stack_top = current.stack_top;
struct args* args = (struct args*)(stack_top - argc_argv_size);
populate_mapping(args, argc_argv_size, PROT_WRITE);
sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, argc_argv_size, 0, 0);
kassert(r.result == 0);
static struct args* stack_init(unsigned long* stack_top)
{
*stack_top -= USER_MAINVARS_SIZE;
// argv[0] is the proxy kernel itself. skip it.
args->argv[0] = args->argc - 1;
args = (struct args*)args->argv;
stack_top = (uintptr_t)args;
// load program named by argv[0]
current.phdr_top = stack_top;
load_elf((char*)args->argv[0], &current);
struct {
long key;
long value;
} aux[] = {
{AT_ENTRY, current.entry},
{AT_PHNUM, current.phnum},
{AT_PHENT, current.phent},
{AT_PHDR, current.phdr},
{AT_PAGESZ, RISCV_PGSIZE},
{AT_NULL, 0}
};
struct args* args = (struct args*)(*stack_top - sizeof(args->argc));
sysret_t r = frontend_syscall(SYS_getmainvars, (long)args, USER_MAINVARS_SIZE, 0, 0);
kassert(r.result == 0);
// chop off argv[0]
args->argv[0] = args->argc-1;
return (struct args*)args->argv;
}
// place argc, argv, envp, auxp on stack
#define PUSH_ARG(type, value) do { \
*((type*)sp) = value; \
sp += sizeof(type); \
} while (0)
#define STACK_INIT(type) do { \
unsigned naux = sizeof(aux)/sizeof(aux[0]); \
stack_top -= (1 + args->argc + 1 + 1 + 2*naux) * sizeof(type); \
stack_top &= -16; \
long sp = stack_top; \
PUSH_ARG(type, args->argc); \
for (unsigned i = 0; i < args->argc; i++) \
PUSH_ARG(type, args->argv[i]); \
PUSH_ARG(type, 0); /* argv[argc] = NULL */ \
PUSH_ARG(type, 0); /* envp[0] = NULL */ \
for (unsigned i = 0; i < naux; i++) { \
PUSH_ARG(type, aux[i].key); \
PUSH_ARG(type, aux[i].value); \
} \
} while (0)
if (current.elf64)
STACK_INIT(uint64_t);
else
STACK_INIT(uint32_t);
static void jump_usrstart(const char* fn, long sp)
{
trapframe_t tf;
int user64;
long start = load_elf(fn, &user64);
init_tf(&tf, current.entry, stack_top, current.elf64);
__clear_cache(0, 0);
init_tf(&tf, start, sp, user64);
pop_tf(&tf);
}
uint32_t mem_mb;
void boot()
{
bss_init();
file_init();
// word 0 of memory contains # of MB of memory
mem_mb = *(uint32_t*)0;
unsigned long stack_top = 0x80000000;
if (mem_mb < stack_top / (1024 * 1024))
stack_top = mem_mb * (1024 * 1024);
struct args* args = stack_init(&stack_top);
jump_usrstart((char*)(long)args->argv[0], stack_top);
vm_init();
user_init();
}

14
pk/pcr.h

@ -64,6 +64,16 @@
#ifdef __riscv
#ifdef __riscv64
# define RISCV_PGLEVELS 3
# define RISCV_PGSHIFT 13
#else
# define RISCV_PGLEVELS 2
# define RISCV_PGSHIFT 12
#endif
#define RISCV_PGLEVEL_BITS 10
#define RISCV_PGSIZE (1 << RISCV_PGSHIFT)
#define ASM_CR(r) _ASM_CR(r)
#define _ASM_CR(r) cr##r
@ -85,6 +95,10 @@
asm volatile ("clearpcr %0,cr%2,%1" : "=r"(__tmp) : "i"(val), "i"(reg)); \
__tmp; })
#define rdcycle() ({ unsigned long __tmp; \
asm volatile ("rdcycle %0" : "=r"(__tmp)); \
__tmp; })
#endif
#endif

39
pk/pk.h

@ -3,13 +3,10 @@
#ifndef _PK_H
#define _PK_H
#define USER_MAINVARS_SIZE 0x1000
#define USER_START 0x10000
#ifndef __ASSEMBLER__
#include <stdint.h>
#include <machine/syscall.h>
#include <string.h>
typedef struct
{
@ -21,8 +18,15 @@ typedef struct
long insn;
} trapframe_t;
#define panic(s,...) do { printk(s"\n", ##__VA_ARGS__); sys_exit(-1); } while(0)
#define kassert(cond) do { if(!(cond)) panic("assertion failed: "#cond); } while(0)
#define panic(s,...) do { do_panic(s"\n", ##__VA_ARGS__); } while(0)
#define kassert(cond) do { if(!(cond)) kassert_fail(""#cond); } while(0)
void do_panic(const char* s, ...) __attribute__((noreturn));
void kassert_fail(const char* s) __attribute__((noreturn));
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
#define ROUNDUP(a, b) ((((a)-1)/(b)+1)*(b))
#define ROUNDDOWN(a, b) ((a)/(b)*(b))
#ifdef __cplusplus
extern "C" {
@ -48,10 +52,25 @@ void handle_fault_load(trapframe_t*);
void handle_fault_store(trapframe_t*);
void boot();
void sys_exit(int code) __attribute__((noreturn));
sysret_t syscall(long a0, long a1, long a2, long a3, long n);
long load_elf(const char* fn, int* user64);
typedef struct {
int elf64;
int phent;
int phnum;
size_t user_min;
size_t entry;
size_t brk_min;
size_t brk;
size_t brk_max;
size_t mmap_max;
size_t stack_bottom;
size_t phdr;
size_t phdr_top;
size_t stack_top;
} elf_info;
extern elf_info current;
void load_elf(const char* fn, elf_info* info);
static inline void advance_pc(trapframe_t* tf)
{

3
pk/pk.mk.in

@ -12,6 +12,7 @@ pk_hdrs = \
frontend.h \
riscv-opc.h \
elf.h \
vm.h \
pk_c_srcs = \
init.c \
@ -22,6 +23,8 @@ pk_c_srcs = \
fp.c \
int.c \
elf.c \
console.c \
vm.c \
pk_asm_srcs = \
entry.S \

121
pk/syscall.c

@ -1,44 +1,53 @@
// See LICENSE for license details.
#include <machine/syscall.h>
#include <string.h>
#include <errno.h>
#include "syscall.h"
#include "pk.h"
#include "pcr.h"
#include "file.h"
#include "frontend.h"
#include "vm.h"
#include <string.h>
#include <errno.h>
typedef sysret_t (*syscall_t)(long,long,long,long,long);
typedef sysret_t (*syscall_t)(long, long, long, long, long, long, long);
void sys_exit(int code)
{
frontend_syscall(SYS_exit,code,0,0,0);
panic("exit didn't exit!");
frontend_syscall(SYS_exit, code, 0, 0, 0);
while (1);
}
sysret_t sys_read(int fd, char* buf, size_t n)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
if(!f)
return r;
return file_read(f,buf,n);
if (f)
{
r = file_read(f, buf, n);
file_decref(f);
}
return r;
}
sysret_t sys_write(int fd, const char* buf, size_t n)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
if(!f)
return r;
return file_write(f,buf,n);
if (f)
{
r = file_write(f, buf, n);
file_decref(f);
}
return r;
}
sysret_t sys_open(const char* name, size_t len, int flags, int mode)
sysret_t sys_open(const char* name, int flags, int mode)
{
sysret_t ret = file_open(name, len, flags, mode);
sysret_t ret = file_open(name, flags, mode);
if(ret.result == -1)
return ret;
@ -50,62 +59,98 @@ sysret_t sys_open(const char* name, size_t len, int flags, int mode)
sysret_t sys_close(int fd)
{
return (sysret_t){fd_close(fd),EBADF};
int ret = fd_close(fd);
return (sysret_t){ret, ret & EBADF};
}
sysret_t sys_fstat(int fd, void* st)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
if(!f)
return r;
return file_stat(f,st);
if (f)
{
r = file_stat(f, st);
file_decref(f);
}
return r;
}
sysret_t sys_lseek(int fd, size_t ptr, int dir)
{
sysret_t r = {-1,EBADF};
file_t* f = file_get(fd);
if(!f)
return r;
return file_lseek(f,ptr,dir);
if (f)
{
r = file_lseek(f, ptr, dir);
file_decref(f);
}
return r;
}
sysret_t sys_stat(const char* name, size_t len, void* st)
sysret_t sys_stat(const char* name, void* st)
{
return frontend_syscall(SYS_stat,(long)name,len,(long)st,0);
size_t name_size = strlen(name)+1;
populate_mapping(st, sizeof(struct stat), PROT_WRITE);
return frontend_syscall(SYS_stat, (uintptr_t)name, name_size, (uintptr_t)st, 0);
}
sysret_t sys_lstat(const char* name, size_t len, void* st)
sysret_t sys_lstat(const char* name, void* st)
{
return frontend_syscall(SYS_lstat,(long)name,len,(long)st,0);
size_t name_size = strlen(name)+1;
populate_mapping(st, sizeof(struct stat), PROT_WRITE);
return frontend_syscall(SYS_lstat, (uintptr_t)name, name_size, (uintptr_t)st, 0);
}
sysret_t sys_link(const char* old_name, size_t old_len,
const char* new_name, size_t new_len)
sysret_t sys_link(const char* old_name, const char* new_name)
{
return frontend_syscall(SYS_link,(long)old_name,old_len,
(long)new_name,new_len);
size_t old_size = strlen(old_name)+1;
size_t new_size = strlen(new_name)+1;
return frontend_syscall(SYS_link, (uintptr_t)old_name, old_size,
(uintptr_t)new_name, new_size);
}
sysret_t sys_unlink(const char* name, size_t len)
{
return frontend_syscall(SYS_unlink,(long)name,len,0,0);
size_t name_size = strlen(name)+1;
return frontend_syscall(SYS_unlink, (uintptr_t)name, name_size, 0, 0);
}
sysret_t sys_brk(size_t pos)
{
if(pos / (1024 * 1024) >= mem_mb)
return (sysret_t){-1, ENOMEM};
return do_brk(pos);
}
sysret_t sys_uname(void* buf)
{
const int sz = 65;
strcpy(buf + 0*sz, "Proxy Kernel");
strcpy(buf + 1*sz, "");
strcpy(buf + 2*sz, "3.4.5");
strcpy(buf + 3*sz, "");
strcpy(buf + 4*sz, "");
strcpy(buf + 5*sz, "");
return (sysret_t){0,0};
}
sysret_t syscall(long a0, long a1, long a2, long a3, long n)
sysret_t sys_getuid()
{
return (sysret_t){0,0};
}
sysret_t sys_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset)
{
return do_mmap(addr, length, prot, flags, fd, offset);
}
sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n)
{
const static void* syscall_table[] = {
[SYS_exit] = sys_exit,
[SYS_exit_group] = sys_exit,
[SYS_read] = sys_read,
[SYS_write] = sys_write,
[SYS_open] = sys_open,
@ -117,10 +162,18 @@ sysret_t syscall(long a0, long a1, long a2, long a3, long n)
[SYS_link] = sys_link,
[SYS_unlink] = sys_unlink,
[SYS_brk] = sys_brk,
[SYS_uname] = sys_uname,
[SYS_getuid] = sys_getuid,
[SYS_geteuid] = sys_getuid,
[SYS_getgid] = sys_getuid,
[SYS_getegid] = sys_getuid,
[SYS_mmap] = sys_mmap,
};
if(n >= ARRAY_SIZE(syscall_table) || !syscall_table[n])
panic("bad syscall #%ld!",n);
return ((syscall_t)syscall_table[n])(a0, a1, a2, a3, n);
sysret_t r = ((syscall_t)syscall_table[n])(a0, a1, a2, a3, a4, a5, n);
printk("syscall %d %x %x %x = %x\n", n, a0, a1, a2, r.result);
return r;
}

4
pk/syscall.h

@ -1,4 +1,6 @@
// See LICENSE for license details.
#include <machine/syscall.h>
void sys_exit(int code) __attribute__((noreturn));
sysret_t syscall(long a0, long a1, long a2, long a3, long a4, long a5, long n);

392
pk/vm.c

@ -0,0 +1,392 @@
#include "vm.h"
#include "file.h"
#include "atomic.h"
#include "pcr.h"
#include "pk.h"
#include <stdint.h>
#include <errno.h>
typedef struct {
uintptr_t addr;
size_t length;
file_t* file;
size_t offset;
size_t refcnt;
int prot;
} vmr_t;
#define MAX_VMR 32
spinlock_t vm_lock = SPINLOCK_INIT;
static vmr_t vmrs[MAX_VMR];
typedef uintptr_t pte_t;
static pte_t* root_page_table;
static uintptr_t first_free_page;
static size_t next_free_page;
static size_t free_pages;
static int have_vm;
static uintptr_t __page_alloc()
{
if (next_free_page == free_pages)
return 0;
uintptr_t addr = first_free_page + RISCV_PGSIZE * next_free_page++;
memset((void*)addr, 0, RISCV_PGSIZE);
return addr;
}
static vmr_t* __vmr_alloc(uintptr_t addr, size_t length, file_t* file,
size_t offset, size_t refcnt, int prot)
{
for (vmr_t* v = vmrs; v < vmrs + MAX_VMR; v++)
{
if (v->refcnt == 0)
{
v->addr = addr;
v->length = length;
v->file = file;
v->offset = offset;
v->refcnt = refcnt;
v->prot = prot;
return v;
}
}
return NULL;
}
static void __vmr_decref(vmr_t* v, size_t dec)
{
if ((v->refcnt -= dec) == 0)
{
if (v->file)
file_decref(v->file);
}
}
static int pte_valid(pte_t pte)
{
return pte & 2;
}
static size_t pte_ppn(pte_t pte)
{
return pte >> RISCV_PGSHIFT;
}
static int ptd_valid(pte_t pte)
{
return pte & 1;
}
static pte_t ptd_create(uintptr_t ppn)
{
return ppn << RISCV_PGSHIFT | 1;
}
static uintptr_t ppn(uintptr_t addr)
{
return addr >> RISCV_PGSHIFT;
}
static size_t pt_idx(uintptr_t addr, int level)
{
size_t idx = addr >> (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT);
return idx & ((1 << RISCV_PGLEVEL_BITS) - 1);
}
static int prot2perm[] = {
[0] = 0,
[PROT_READ] = 4,
[PROT_WRITE] = 2,
[PROT_WRITE|PROT_READ] = 6,
[PROT_EXEC] = 1,
[PROT_EXEC|PROT_READ] = 5,
[PROT_EXEC|PROT_WRITE] = 3,
[PROT_EXEC|PROT_WRITE|PROT_READ] = 7
};
static pte_t super_pte_create(uintptr_t ppn, int kprot, int uprot, int level)
{
int perm = prot2perm[kprot&7] << 7 | prot2perm[uprot&7] << 4 | 2;
return (ppn << (RISCV_PGLEVEL_BITS*level + RISCV_PGSHIFT)) | perm;
}
static pte_t pte_create(uintptr_t ppn, int kprot, int uprot)
{
return super_pte_create(ppn, kprot, uprot, 0);
}
static __attribute__((always_inline)) pte_t* __walk_internal(uintptr_t addr, int create)
{
const size_t pte_per_page = RISCV_PGSIZE/sizeof(void*);
pte_t* t = root_page_table;
for (unsigned i = RISCV_PGLEVELS-1; i > 0; i--)
{
size_t idx = pt_idx(addr, i);
kassert(!pte_valid(t[idx]));
if (!ptd_valid(t[idx]))
{
if (!create)
return 0;
uintptr_t page = __page_alloc();
if (page == 0)
return 0;
t[idx] = ptd_create(ppn(page));
}
t = (pte_t*)(pte_ppn(t[idx]) << RISCV_PGSHIFT);
}
return &t[pt_idx(addr, 0)];
}
static pte_t* __walk(uintptr_t addr)
{
return __walk_internal(addr, 0);
}
static pte_t* __walk_create(uintptr_t addr)
{
return __walk_internal(addr, 1);
}
static int __va_avail(uintptr_t vaddr)
{
pte_t* pte = __walk(vaddr);
return pte == 0 || *pte == 0;
}
static uintptr_t __vm_alloc(size_t npage)
{
uintptr_t start = current.brk, end = current.mmap_max - npage*RISCV_PGSIZE;
for (uintptr_t a = start; a <= end; a += RISCV_PGSIZE)
{
if (!__va_avail(a))
continue;
uintptr_t first = a, last = a + (npage-1) * RISCV_PGSIZE;
for (a = last; a > first && __va_avail(a); a -= RISCV_PGSIZE)
;
if (a > first)
continue;
return a;
}
return 0;
}
static void flush_tlb()
{
mtpcr(PCR_PTBR, mfpcr(PCR_PTBR));
}
static int __handle_page_fault(uintptr_t vaddr, int prot)
{
uintptr_t vpn = vaddr >> RISCV_PGSHIFT;
vaddr = vpn << RISCV_PGSHIFT;
pte_t* pte = __walk(vaddr);
if (pte == 0 || *pte == 0)
return -1;
else if (!pte_valid(*pte))
{
kassert(vaddr < current.stack_top && vaddr >= current.user_min);
uintptr_t ppn = vpn;
vmr_t* v = (vmr_t*)*pte;
*pte = pte_create(ppn, PROT_READ|PROT_WRITE, 0);
if (v->file)
{
size_t flen = MIN(RISCV_PGSIZE, v->length - (vaddr - v->addr));
kassert(flen == file_pread(v->file, (void*)vaddr, flen, vaddr - v->addr + v->offset).result);
if (flen < RISCV_PGSIZE)
memset((void*)vaddr + flen, 0, RISCV_PGSIZE - flen);
}
else
memset((void*)vaddr, 0, RISCV_PGSIZE);
*pte = pte_create(ppn, v->prot, v->prot);
}
pte_t perms = pte_create(0, prot, prot);
if ((*pte & perms) != perms)
return -1;
flush_tlb();
return 0;
}
int handle_page_fault(uintptr_t vaddr, int prot)
{
spinlock_lock(&vm_lock);
int ret = __handle_page_fault(vaddr, prot);
spinlock_unlock(&vm_lock);
return ret;
}
uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* f, off_t offset)
{
size_t npage = (length-1)/RISCV_PGSIZE+1;
vmr_t* v = __vmr_alloc(addr, length, f, offset, npage, prot);
if (!v)
goto fail_vmr;
if (flags & MAP_FIXED)
{
if ((addr & (RISCV_PGSIZE-1)) || addr < current.user_min ||
addr + length > current.stack_top || addr + length < addr)
goto fail_vma;
}
else if ((addr = __vm_alloc(npage)) == 0)
goto fail_vma;
for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE)
{
pte_t* pte = __walk_create(a);
kassert(pte);
if (*pte)
kassert(*pte == 0); // TODO __do_munmap
*pte = (pte_t)v;
}
if (!have_vm || (flags & MAP_POPULATE))
for (uintptr_t a = addr; a < addr + length; a += RISCV_PGSIZE)
kassert(__handle_page_fault(a, prot) == 0);
if (f) file_incref(f);
return addr;
fail_vma:
__vmr_decref(v, npage);
fail_vmr:
return (uintptr_t)-1;
}
sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset)
{
if (!(flags & MAP_PRIVATE) || length == 0 || (offset & (RISCV_PGSIZE-1)))
return (sysret_t){-1, EINVAL};
file_t* f = NULL;
if (!(flags & MAP_ANONYMOUS) && (f = file_get(fd)) == NULL)
return (sysret_t){-1, EBADF};
spinlock_lock(&vm_lock);
addr = __do_mmap(addr, length, prot, flags, f, offset);
if (addr < current.brk_max)
current.brk_max = addr;
spinlock_unlock(&vm_lock);
if (f) file_decref(f);
return (sysret_t){addr, 0};
}
size_t __do_brk(size_t addr)
{
size_t newbrk = addr;
if (addr < current.brk_min)
newbrk = current.brk_min;
else if (addr > current.brk_max)
newbrk = current.brk_max;
if (current.brk == 0)
current.brk = ROUNDUP(current.brk_min, RISCV_PGSIZE);
size_t newbrk_page = ROUNDUP(newbrk, RISCV_PGSIZE);
if (current.brk > newbrk_page)
kassert(0); // TODO __do_munmap
else if (current.brk < newbrk_page)
kassert(__do_mmap(current.brk, newbrk_page - current.brk, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == current.brk);
current.brk = newbrk_page;
return newbrk;
}
sysret_t do_brk(size_t addr)
{
spinlock_lock(&vm_lock);
addr = __do_brk(addr);
spinlock_unlock(&vm_lock);
return (sysret_t){addr, 0};
}
static void __map_kernel_range(uintptr_t paddr, size_t len, int prot)
{
pte_t perms = pte_create(0, prot, 0);
for (uintptr_t a = paddr; a < paddr + len; a += RISCV_PGSIZE)
{
pte_t* pte = __walk_create(a);
kassert(pte);
*pte = a | perms;
}
}
void populate_mapping(const void* start, size_t size, int prot)
{
uintptr_t a0 = ROUNDDOWN((uintptr_t)start, RISCV_PGSIZE);
for (uintptr_t a = a0; a < (uintptr_t)start+size; a += RISCV_PGSIZE)
{
atomic_t* atom = (atomic_t*)(a & -sizeof(atomic_t));
if (prot & PROT_WRITE)
atomic_add(atom, 0);
else
atomic_read(atom);
}
}
void vm_init()
{
extern char _end;
current.user_min = ROUNDUP((uintptr_t)&_end, RISCV_PGSIZE);
current.brk_min = current.user_min;
current.brk = 0;
uint32_t mem_mb = *(volatile uint32_t*)0;
if (mem_mb == 0)
{
current.stack_bottom = 0;
current.stack_top = 0;
current.brk_max = 0;
current.mmap_max = 0;
}
else
{
uintptr_t max_addr = (uintptr_t)mem_mb << 20;
size_t mem_pages = max_addr >> RISCV_PGSHIFT;
const size_t min_free_pages = 2*RISCV_PGLEVELS;
const size_t min_stack_pages = 8;
const size_t max_stack_pages = 128;
kassert(mem_pages > min_free_pages + min_stack_pages);
free_pages = MAX(mem_pages >> (RISCV_PGLEVEL_BITS-1), min_free_pages);
size_t stack_pages = CLAMP(mem_pages/32, min_stack_pages, max_stack_pages);
first_free_page = max_addr - free_pages * RISCV_PGSIZE;
uintptr_t root_page_table_paddr = __page_alloc();
kassert(root_page_table_paddr);
root_page_table = (pte_t*)root_page_table_paddr;
__map_kernel_range(0, current.user_min, PROT_READ|PROT_WRITE|PROT_EXEC);
mtpcr(PCR_PTBR, root_page_table_paddr);
setpcr(PCR_SR, SR_VM);
have_vm = mfpcr(PCR_SR) & SR_VM;
clearpcr(PCR_SR, SR_VM);
size_t stack_size = RISCV_PGSIZE * stack_pages;
current.stack_top = first_free_page;
uintptr_t stack_bot = current.stack_top - stack_size;
if (have_vm)
{
__map_kernel_range(first_free_page, free_pages * RISCV_PGSIZE, PROT_READ|PROT_WRITE);
kassert(__do_mmap(stack_bot, stack_size, -1, MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, 0, 0) == stack_bot);
setpcr(PCR_SR, SR_VM);
}
current.stack_bottom = stack_bot;
stack_bot -= RISCV_PGSIZE; // guard page
current.mmap_max = current.brk_max = stack_bot;
}
}

26
pk/vm.h

@ -0,0 +1,26 @@
#ifndef _VM_H
#define _VM_H
#include "syscall.h"
#include "file.h"
#include <string.h>
#include <stdint.h>
#include <sys/types.h>
#define PROT_READ 1
#define PROT_WRITE 2
#define PROT_EXEC 4
#define MAP_PRIVATE 0x2
#define MAP_FIXED 0x10
#define MAP_ANONYMOUS 0x20
#define MAP_POPULATE 0x8000
void vm_init();
int handle_page_fault(uintptr_t vaddr, int prot);
void populate_mapping(const void* start, size_t size, int prot);
uintptr_t __do_mmap(uintptr_t addr, size_t length, int prot, int flags, file_t* file, off_t offset);
sysret_t do_mmap(uintptr_t addr, size_t length, int prot, int flags, int fd, off_t offset);
sysret_t do_brk(uintptr_t addr);
#endif
Loading…
Cancel
Save