Skip to content
This repository was archived by the owner on Jun 9, 2020. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/noah.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ struct proc {
khash_t(pfutex) *pfutex; /* TODO: modify khash and make this field being non-pointer */
};
struct fileinfo fileinfo;
gaddr_t vsyscall_page;
};

extern struct proc proc;
Expand Down
87 changes: 85 additions & 2 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <sys/sysctl.h>

#include <mach-o/dyld.h>
#include <sys/mman.h>

static int
get_cpuid_count (unsigned int leaf,
Expand Down Expand Up @@ -79,6 +80,84 @@ handle_syscall(void)
return 0;
}

#define VSYSCALL_PAGE_ADDR 0xffffffffff600000

static inline bool
is_vsyscall(gaddr_t gladdr)
{
if (gladdr < VSYSCALL_PAGE_ADDR || gladdr > VSYSCALL_PAGE_ADDR + 0x1000) {
//printk("Page Fault is not for vsyscall: %llx\n", gladdr);
return false;
}
return true;
}

/* vsyscall (and its latter day replacement vDSO) is a way to implement fast
* paths for frequently called syscalls like `gettimeofday` and `time` without
* generating the overhead of a context switch into the kernel.
*
* Darwin/XNU has a similar functionality in the form of COMMPAGE:
* https://wiki.darlinghq.org/documentation:commpage
*
* Currently, instead of providing a fast path, we rely on vsyscall emulation
* by executing the syscall in the way all syscalls are currently implemented.
* This is similar to what the Linux kernel does as well:
* https://github.com/torvalds/linux/blob/v4.20/arch/x86/entry/vsyscall/vsyscall_emu_64.S
*/

static inline bool
handle_vsyscall(gaddr_t gladdr)
{
if (!is_vsyscall(gladdr))
return false;

// Define a location in the process' address space to execute the syscall
if (proc.vsyscall_page == 0) {
// raw OP code for `syscall;retq`
char data[3] = {0x0f,0x05,0xC3};

proc.vsyscall_page = do_mmap(0, sizeof(data), PROT_WRITE | PROT_READ,
LINUX_PROT_READ | LINUX_PROT_EXEC, LINUX_MAP_ANONYMOUS |
LINUX_MAP_PRIVATE, -1, 0);

printk("allocated %llx for vsyscall_page\n", proc.vsyscall_page);


copy_to_user(proc.vsyscall_page, data, sizeof(data));
}

bool handled = false;

// These are the hardcoded offsets on x86_64, I see no reason to be more
// clever than this here given this is likely to be our only emulation
// target
switch(gladdr) {
case VSYSCALL_PAGE_ADDR:
vmm_write_register(HV_X86_RAX, 96 /* gettimeofday */);
handled = true;
break;
case VSYSCALL_PAGE_ADDR + 0x400:
vmm_write_register(HV_X86_RAX, 201 /* time */);
handled = true;
break;
case VSYSCALL_PAGE_ADDR + 0x800:
vmm_write_register(HV_X86_RAX, 309 /* getcpu */);
handled = true;
break;
default:
printk("page fault for vsyscall -- 0x%llx\n", gladdr);
break;
}

if (handled) {
// set RIP to our vsyscall emulation, where the CPU will end up upon
// resumption
vmm_write_register(HV_X86_RIP, proc.vsyscall_page);
}

return handled;
}

int
task_run()
{
Expand Down Expand Up @@ -243,8 +322,11 @@ main_loop(int return_on_sigret)
/* FIXME */
uint64_t gladdr;
vmm_read_vmcs(VMCS_RO_EXIT_QUALIFIC, &gladdr);
printk("page fault: caused by guest linear address 0x%llx\n", gladdr);
send_signal(getpid(), LINUX_SIGSEGV);
if (!handle_vsyscall(gladdr)) {
printk("page fault: caused by guest linear address 0x%llx\n", gladdr);
send_signal(getpid(), LINUX_SIGSEGV);
}
break;
}
case X86_VEC_UD: {
uint64_t instlen, rip;
Expand Down Expand Up @@ -535,6 +617,7 @@ init_first_proc(const char *root)
.nr_tasks = 1,
.lock = PTHREAD_RWLOCK_INITIALIZER,
.mm = malloc(sizeof(struct mm)),
.vsyscall_page = 0,
};
INIT_LIST_HEAD(&proc.tasks);
list_add(&task.head, &proc.tasks);
Expand Down
1 change: 1 addition & 0 deletions src/proc/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ prepare_newproc(void)
destroy_mm(proc.mm); // munlock is also done by unmapping mm
init_mm(proc.mm);
init_reg_state();
proc.vsyscall_page = 0;
reset_signal_state();
// TODO: destroy LDT if it is implemented

Expand Down