Remove the frontend appserver code
[akaros.git] / kern / src / syscall.c
index b5cd624..d4ffa34 100644 (file)
@@ -23,8 +23,6 @@
 #include <kmalloc.h>
 #include <profiler.h>
 #include <stdio.h>
-#include <frontend.h>
-#include <colored_caches.h>
 #include <hashtable.h>
 #include <bitmask.h>
 #include <vfs.h>
 #include <kprof.h>
 #include <termios.h>
 #include <manager.h>
+#include <ros/procinfo.h>
 
-/* Tracing Globals */
-int systrace_flags = 0;
-struct systrace_record *systrace_buffer = 0;
-uint32_t systrace_bufidx = 0;
-size_t systrace_bufsize = 0;
-spinlock_t systrace_lock = SPINLOCK_INITIALIZER_IRQSAVE;
+static int execargs_stringer(struct proc *p, char *d, size_t slen,
+                            char *path, size_t path_l,
+                            char *argenv, size_t argenv_l);
 
-static bool __trace_this_proc(struct proc *p)
-{
-       return (systrace_flags & SYSTRACE_ON) &&
-               ((systrace_flags & SYSTRACE_ALLPROC) || is_traced_proc(p));
-}
+/* Global, used by the kernel monitor for syscall debugging. */
+bool systrace_loud = FALSE;
 
-static size_t systrace_fill_pretty_buf(struct systrace_record *trace)
+/* Helper, given the trace record, pretty-print the trace's contents into the
+ * trace's pretty buf.  'entry' says whether we're an entry record or not
+ * (exit).  Returns the number of bytes put into the pretty_buf. */
+static size_t systrace_fill_pretty_buf(struct systrace_record *trace,
+                                       bool entry)
 {
        size_t len = 0;
-       struct timespec ts_start;
-       struct timespec ts_end;
-       char what = 'X';
-       tsc2timespec(trace->start_timestamp, &ts_start);
-       tsc2timespec(trace->end_timestamp, &ts_end);
-       if (trace->end_timestamp == 0)
-               what = 'E';
-
-       len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
-                  "%c [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
-                  "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: 0x%llx proc: %d core: %d "
-                  "vcore: %d data: ",
-                  what,
-                  ts_start.tv_sec,
-                  ts_start.tv_nsec,
-                  ts_end.tv_sec,
-                  ts_end.tv_nsec,
-                  trace->syscallno,
-                  syscall_table[trace->syscallno].name,
-                  trace->arg0,
-                  trace->arg1,
-                  trace->arg2,
-                  trace->arg3,
-                  trace->arg4,
-                  trace->arg5,
-                  trace->retval,
-                  trace->pid,
-                  trace->coreid,
-                  trace->vcoreid);
-
-       len += printdump(trace->pretty_buf + len,
-                        trace->datalen,
+       struct timespec ts_start = tsc2timespec(trace->start_timestamp);
+       struct timespec ts_end = tsc2timespec(trace->end_timestamp);
+
+       /* Slightly different formats between entry and exit.  Entry has retval set
+        * to ---, and begins with E.  Exit begins with X. */
+       if (entry) {
+               len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
+                     "E [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
+                     "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: --- proc: %d core: %d "
+                     "vcore: %d data: ",
+                              ts_start.tv_sec,
+                              ts_start.tv_nsec,
+                              ts_end.tv_sec,
+                              ts_end.tv_nsec,
+                              trace->syscallno,
+                              syscall_table[trace->syscallno].name,
+                              trace->arg0,
+                              trace->arg1,
+                              trace->arg2,
+                              trace->arg3,
+                              trace->arg4,
+                              trace->arg5,
+                              trace->pid,
+                              trace->coreid,
+                              trace->vcoreid);
+       } else {
+               len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
+                     "X [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
+                     "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: 0x%llx proc: %d core: %d "
+                     "vcore: %d data: ",
+                              ts_start.tv_sec,
+                              ts_start.tv_nsec,
+                              ts_end.tv_sec,
+                              ts_end.tv_nsec,
+                              trace->syscallno,
+                              syscall_table[trace->syscallno].name,
+                              trace->arg0,
+                              trace->arg1,
+                              trace->arg2,
+                              trace->arg3,
+                              trace->arg4,
+                              trace->arg5,
+                              trace->retval,
+                              trace->pid,
+                              trace->coreid,
+                              trace->vcoreid);
+       }
+       len += printdump(trace->pretty_buf + len, trace->datalen,
                         SYSTR_PRETTY_BUF_SZ - len - 1,
                         trace->data);
        len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len, "\n");
        return len;
 }
 
-/* On enter, we have !trace, a sysc, and retval is meaningless.  On exit, we had
- * trace, retval and !sysc */
-static struct systrace_record *sctrace(struct systrace_record *trace,
-                                       struct proc *p, struct syscall *sysc,
-                                       long retval)
+/* If some syscalls block, then they can really hurt the user and the
+ * kernel.  For instance, if you blocked another call because the trace queue is
+ * full, the 2LS will want to yield the vcore, but then *that* call would block
+ * too.  Since that caller was in vcore context, the core will just spin
+ * forever.
+ *
+ * Even worse, some syscalls operate on the calling core or current context,
+ * thus accessing pcpui.  If we block, then that old context is gone.  Worse, we
+ * could migrate and then be operating on a different core.  Imagine
+ * SYS_halt_core.  Doh! */
+static bool sysc_can_block(unsigned int sysc_num)
+{
+       switch (sysc_num) {
+       case SYS_proc_yield:
+       case SYS_fork:
+       case SYS_exec:
+       case SYS_pop_ctx:
+       case SYS_getvcoreid:
+       case SYS_halt_core:
+       case SYS_vc_entry:
+       case SYS_change_vcore:
+       case SYS_change_to_m:
+               return FALSE;
+       }
+       return TRUE;
+}
+
+/* Helper: spits out our trace to the various sinks. */
+static void systrace_output(struct systrace_record *trace,
+                            struct strace *strace, bool entry)
 {
-       int n;
-       uintreg_t cp = 0;
-       int datalen = 0;
+       ERRSTACK(1);
+       size_t pretty_len;
 
-       assert(p->strace);
+       /* qio ops can throw, especially the blocking qwrite.  I had it block on the
+        * outbound path of sys_proc_destroy().  The rendez immediately throws. */
+       if (waserror()) {
+               poperror();
+               return;
+       }
+       pretty_len = systrace_fill_pretty_buf(trace, entry);
+       if (strace) {
+               /* At this point, we're going to emit the exit trace.  It's just a
+                * question of whether or not we block while doing it. */
+               if (strace->drop_overflow || !sysc_can_block(trace->syscallno))
+                       qiwrite(strace->q, trace->pretty_buf, pretty_len);
+               else
+                       qwrite(strace->q, trace->pretty_buf, pretty_len);
+       }
+       if (systrace_loud)
+               printk("%s", trace->pretty_buf);
+       poperror();
+}
 
-       if (!trace) {
-               /* We're using qiwrite, which has no flow control.  We'll do it
-                * manually. */
-               if (qfull(p->strace->q)) {
-                       atomic_inc(&p->strace->nr_drops);
-                       return NULL;
-               }
-               // TODO: could we allocb and then write that block?
-               // Still, if we're tracing, we take a hit, and this is so
-               // much more efficient than strace it's not clear we care.
-               trace = kmalloc(SYSTR_BUF_SZ, 0);
+static bool should_strace(struct proc *p, struct syscall *sysc)
+{
+       unsigned int sysc_num;
 
-               if (!trace) {
+       if (systrace_loud)
+               return TRUE;
+       if (!p->strace || !p->strace->tracing)
+               return FALSE;
+       /* TOCTTOU concerns - sysc is __user. */
+       sysc_num = ACCESS_ONCE(sysc->num);
+       if (qfull(p->strace->q)) {
+               if (p->strace->drop_overflow || !sysc_can_block(sysc_num)) {
                        atomic_inc(&p->strace->nr_drops);
-                       return NULL;
-               }
-               /* Avoiding the atomic op.  We sacrifice accuracy for less overhead. */
-               p->strace->appx_nr_sysc++;
-
-               int coreid, vcoreid;
-               struct proc *p = current;
-
-               coreid = core_id();
-               vcoreid = proc_get_vcoreid(p);
-
-               // TODO: functionalize this, if we decide this
-               // approach is OK.
-               trace->start_timestamp = read_tsc();
-               trace->end_timestamp = 0;
-               trace->syscallno = sysc->num;
-               trace->arg0 = sysc->arg0;
-               trace->arg1 = sysc->arg1;
-               trace->arg2 = sysc->arg2;
-               trace->arg3 = sysc->arg3;
-               trace->arg4 = sysc->arg4;
-               trace->arg5 = sysc->arg5;
-               trace->pid = p->pid;
-               trace->coreid = coreid;
-               trace->vcoreid = vcoreid;
-               trace->pretty_buf = (char*)trace + sizeof(struct systrace_record);
-               trace->datalen = 0;
-               trace->data[0] = 0;
-               switch (sysc->num) {
-               case SYS_write:
-                       cp = sysc->arg1;
-                       datalen = sysc->arg2;
-                       break;
-               case SYS_openat:
-                       cp = sysc->arg1;
-                       datalen = sysc->arg2;
-                       break;
-               }
-       } else {
-               trace->end_timestamp = read_tsc();
-               trace->retval = retval;
-               switch (trace->syscallno) {
-               case SYS_read:
-                       cp = trace->arg1;
-                       datalen = retval < 0 ? 0 : retval;
-                       break;
+                       return FALSE;
                }
        }
+       if (sysc_num > MAX_SYSCALL_NR)
+               return FALSE;
+       return test_bit(sysc_num, p->strace->trace_set);
+}
 
-       trace->datalen = MIN(sizeof(trace->data), datalen);
-       memmove(trace->data, (void *)cp, trace->datalen);
-       n = systrace_fill_pretty_buf(trace);
-       qiwrite(p->strace->q, trace->pretty_buf, n);
-       return trace;
+/* Helper, copies len bytes from u_data to the trace->data, if there's room. */
+static void copy_tracedata_from_user(struct systrace_record *trace,
+                                     long u_data, size_t len)
+{
+       size_t copy_amt;
+
+       copy_amt = MIN(sizeof(trace->data) - trace->datalen, len);
+       copy_from_user(trace->data + trace->datalen, (void*)u_data, copy_amt);
+       trace->datalen += copy_amt;
+}
+
+/* Helper, snprintfs to the trace, if there's room. */
+static void snprintf_to_trace(struct systrace_record *trace, const char *fmt,
+                              ...)
+{
+       va_list ap;
+       int rc;
+
+       va_start(ap, fmt);
+       rc = vsnprintf((char*)trace->data + trace->datalen,
+                      sizeof(trace->data) - trace->datalen, fmt, ap);
+       va_end(ap);
+       if (!snprintf_error(rc, sizeof(trace->data) - trace->datalen))
+               trace->datalen += rc;
 }
 
+/* Starts a trace for p running sysc, attaching it to kthread.  Pairs with
+ * systrace_finish_trace(). */
 static void systrace_start_trace(struct kthread *kthread, struct syscall *sysc)
 {
-       struct systrace_record *trace;
-       int coreid, vcoreid;
        struct proc *p = current;
+       struct systrace_record *trace;
 
-       if (p->strace_on)
-               kthread->strace = sctrace(NULL, p, sysc, 0);
-       else
-               kthread->strace = 0;
-
-       /* TODO: merge these two types of tracing, or just remove this old one */
-       if (!__trace_this_proc(p))
+       kthread->strace = 0;
+       if (!should_strace(p, sysc))
                return;
-       assert(!kthread->trace);        /* catch memory leaks */
-       coreid = core_id();
-       vcoreid = proc_get_vcoreid(p);
-       if (systrace_flags & SYSTRACE_LOUD) {
-               printk("ENTER [%16llu] Syscall %3d (%12s):(0x%llx, 0x%llx, 0x%llx, "
-                      "0x%llx, 0x%llx, 0x%llx) proc: %d core: %d vcore: %d\n",
-                      read_tsc(),
-                      sysc->num, syscall_table[sysc->num].name,
-                          sysc->arg0, sysc->arg1, sysc->arg2, sysc->arg3, sysc->arg4,
-                          sysc->arg5, p->pid, coreid, vcoreid);
-       }
-       trace = kmalloc(SYSTR_BUF_SZ, 0);
-       if (!trace)
-               return;
-       kthread->trace = trace;
+       /* TODO: consider a block_alloc and qpass, though note that we actually
+        * write the same trace in twice (entry and exit). */
+       trace = kpages_alloc(SYSTR_BUF_SZ, MEM_ATOMIC);
+       if (p->strace) {
+               if (!trace) {
+                       atomic_inc(&p->strace->nr_drops);
+                       return;
+               }
+               /* Avoiding the atomic op.  We sacrifice accuracy for less overhead. */
+               p->strace->appx_nr_sysc++;
+       } else {
+               if (!trace)
+                       return;
+       }
+       /* if you ever need to debug just one strace function, this is
+        * handy way to do it: just bail out if it's not the one you
+        * want.
+        * if (sysc->num != SYS_exec)
+        * return; */
        trace->start_timestamp = read_tsc();
+       trace->end_timestamp = 0;
        trace->syscallno = sysc->num;
        trace->arg0 = sysc->arg0;
        trace->arg1 = sysc->arg1;
@@ -209,48 +242,117 @@ static void systrace_start_trace(struct kthread *kthread, struct syscall *sysc)
        trace->arg3 = sysc->arg3;
        trace->arg4 = sysc->arg4;
        trace->arg5 = sysc->arg5;
+       trace->retval = 0;
        trace->pid = p->pid;
-       trace->coreid = coreid;
-       trace->vcoreid = vcoreid;
+       trace->coreid = core_id();
+       trace->vcoreid = proc_get_vcoreid(p);
        trace->pretty_buf = (char*)trace + sizeof(struct systrace_record);
        trace->datalen = 0;
        trace->data[0] = 0;
-}
 
+       switch (sysc->num) {
+       case SYS_write:
+               copy_tracedata_from_user(trace, sysc->arg1, sysc->arg2);
+               break;
+       case SYS_openat:
+       case SYS_chdir:
+       case SYS_rmdir:
+       case SYS_nmount:
+               copy_tracedata_from_user(trace, sysc->arg1, sysc->arg2);
+               break;
+       case SYS_stat:
+       case SYS_lstat:
+       case SYS_access:
+       case SYS_unlink:
+       case SYS_mkdir:
+       case SYS_wstat:
+               copy_tracedata_from_user(trace, sysc->arg0, sysc->arg1);
+               break;
+       case SYS_link:
+       case SYS_symlink:
+       case SYS_rename:
+       case SYS_nbind:
+               copy_tracedata_from_user(trace, sysc->arg0, sysc->arg1);
+               snprintf_to_trace(trace, " -> ");
+               copy_tracedata_from_user(trace, sysc->arg2, sysc->arg3);
+               break;
+       case SYS_nunmount:
+               copy_tracedata_from_user(trace, sysc->arg2, sysc->arg3);
+               break;
+       case SYS_exec:
+               trace->datalen = execargs_stringer(current,
+                                                  (char *)trace->data,
+                                                  sizeof(trace->data),
+                                                  (char *)sysc->arg0,
+                                                  sysc->arg1,
+                                                  (char *)sysc->arg2,
+                                                  sysc->arg3);
+               break;
+       case SYS_proc_create:
+               trace->datalen = execargs_stringer(current,
+                                                  (char *)trace->data,
+                                                  sizeof(trace->data),
+                                                  (char *)sysc->arg0,
+                                                  sysc->arg1,
+                                                  (char *)sysc->arg2,
+                                                  sysc->arg3);
+               break;
+       }
+       systrace_output(trace, p->strace, TRUE);
+
+       kthread->strace = trace;
+}
+
+/* Finishes the trace on kthread for p, with retval being the return from the
+ * syscall we're tracing.  Pairs with systrace_start_trace(). */
 static void systrace_finish_trace(struct kthread *kthread, long retval)
 {
-       struct systrace_record *trace = kthread->trace;
-       size_t pretty_len;
+       struct proc *p = current;
+       struct systrace_record *trace;
 
-       if (trace) {
-               trace->end_timestamp = read_tsc();
-               trace->retval = retval;
-               kthread->trace = 0;
-               pretty_len = systrace_fill_pretty_buf(trace);
-               kprof_tracedata_write(trace->pretty_buf, pretty_len);
-               if (systrace_flags & SYSTRACE_LOUD)
-                       printk("EXIT %s", trace->pretty_buf);
-               kfree(trace);
-       }
-       /* TODO: merge with or remove the old tracer */
-       if (kthread->strace) {
-               sctrace(kthread->strace, current, 0, retval);
-               kfree(kthread->strace);
-               kthread->strace = 0;
+       if (!kthread->strace)
+               return;
+       trace = kthread->strace;
+       trace->end_timestamp = read_tsc();
+       trace->retval = retval;
+
+       /* Only try to do the trace data if we didn't do it on entry */
+       if (!trace->datalen) {
+               switch (trace->syscallno) {
+               case SYS_read:
+                       if (retval <= 0)
+                               break;
+                       copy_tracedata_from_user(trace, trace->arg1, retval);
+                       break;
+               case SYS_readlink:
+                       if (retval <= 0)
+                               break;
+                       copy_tracedata_from_user(trace, trace->arg0, trace->arg1);
+                       snprintf_to_trace(trace, " -> ");
+                       copy_tracedata_from_user(trace, trace->arg2, trace->arg3);
+                       break;
+               }
        }
+
+       systrace_output(trace, p->strace, FALSE);
+       kpages_free(kthread->strace, SYSTR_BUF_SZ);
+       kthread->strace = 0;
 }
 
 #ifdef CONFIG_SYSCALL_STRING_SAVING
 
 static void alloc_sysc_str(struct kthread *kth)
 {
-       kth->name = kmalloc(SYSCALL_STRLEN, KMALLOC_WAIT);
+       kth->name = kmalloc(SYSCALL_STRLEN, MEM_ATOMIC);
+       if (!kth->name)
+               return;
        kth->name[0] = 0;
 }
 
 static void free_sysc_str(struct kthread *kth)
 {
        char *str = kth->name;
+
        kth->name = 0;
        kfree(str);
 }
@@ -258,7 +360,9 @@ static void free_sysc_str(struct kthread *kth)
 #define sysc_save_str(...)                                                     \
 {                                                                              \
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];                     \
-       snprintf(pcpui->cur_kthread->name, SYSCALL_STRLEN, __VA_ARGS__);           \
+                                                                               \
+       if (pcpui->cur_kthread->name)                                              \
+               snprintf(pcpui->cur_kthread->name, SYSCALL_STRLEN, __VA_ARGS__);       \
 }
 
 #else
@@ -285,7 +389,7 @@ static void finish_sysc(struct syscall *sysc, struct proc *p)
         * to not muck with the flags while we're signalling. */
        atomic_or(&sysc->flags, SC_K_LOCK | SC_DONE);
        __signal_syscall(sysc, p);
-       atomic_and(&sysc->flags, ~SC_K_LOCK); 
+       atomic_and(&sysc->flags, ~SC_K_LOCK);
 }
 
 /* Helper that "finishes" the current async syscall.  This should be used with
@@ -355,6 +459,7 @@ void set_errstr(const char *fmt, ...)
 {
        va_list ap;
 
+       assert(fmt);
        va_start(ap, fmt);
        vset_errstr(fmt, ap);
        va_end(ap);
@@ -374,8 +479,9 @@ void set_error(int error, const char *fmt, ...)
 
        set_errno(error);
 
+       assert(fmt);
        va_start(ap, fmt);
-       vset_errstr(fmt != NULL ? fmt: errno_to_string(error), ap);
+       vset_errstr(fmt, ap);
        va_end(ap);
 }
 
@@ -461,12 +567,10 @@ static int sys_null(void)
 
 /* Diagnostic function: blocks the kthread/syscall, to help userspace test its
  * async I/O handling. */
-static int sys_block(struct proc *p, unsigned int usec)
+static int sys_block(struct proc *p, unsigned long usec)
 {
-       /* Note printing takes a few ms, so your printds won't be perfect. */
-       printd("[kernel] sys_block(), sleeping at %llu\n", read_tsc());
+       sysc_save_str("block for %lu usec", usec);
        kthread_usleep(usec);
-       printd("[kernel] sys_block(), waking up at %llu\n", read_tsc());
        return 0;
 }
 
@@ -511,91 +615,18 @@ static int sys_nanosleep(struct proc *p,
         * years, which should be sufficiently long enough to ensure we don't
         * overflow). */
        if (waserror()) {
-               tsc2timespec(read_tsc() - tsc, &krem);
+               krem = tsc2timespec(read_tsc() - tsc);
                if (rem && memcpy_to_user(p, rem, &krem, sizeof(struct timespec)))
                        set_errno(EFAULT);
                poperror();
                return -1;
        }
+       sysc_save_str("nanosleep for %d usec", usec);
        kthread_usleep(usec);
        poperror();
        return 0;
 }
 
-// Writes 'val' to 'num_writes' entries of the well-known array in the kernel
-// address space.  It's just #defined to be some random 4MB chunk (which ought
-// to be boot_alloced or something).  Meant to grab exclusive access to cache
-// lines, to simulate doing something useful.
-static int sys_cache_buster(struct proc *p, uint32_t num_writes,
-                             uint32_t num_pages, uint32_t flags)
-{
-       #define BUSTER_ADDR             0xd0000000L  // around 512 MB deep
-       #define MAX_WRITES              1048576*8
-       #define MAX_PAGES               32
-       #define INSERT_ADDR     (UINFO + 2*PGSIZE) // should be free for these tests
-       uint32_t* buster = (uint32_t*)BUSTER_ADDR;
-       static spinlock_t buster_lock = SPINLOCK_INITIALIZER;
-       uint64_t ticks = -1;
-       page_t* a_page[MAX_PAGES];
-
-       /* Strided Accesses or Not (adjust to step by cachelines) */
-       uint32_t stride = 1;
-       if (flags & BUSTER_STRIDED) {
-               stride = 16;
-               num_writes *= 16;
-       }
-
-       /* Shared Accesses or Not (adjust to use per-core regions)
-        * Careful, since this gives 8MB to each core, starting around 512MB.
-        * Also, doesn't separate memory for core 0 if it's an async call.
-        */
-       if (!(flags & BUSTER_SHARED))
-               buster = (uint32_t*)(BUSTER_ADDR + core_id() * 0x00800000);
-
-       /* Start the timer, if we're asked to print this info*/
-       if (flags & BUSTER_PRINT_TICKS)
-               ticks = start_timing();
-
-       /* Allocate num_pages (up to MAX_PAGES), to simulate doing some more
-        * realistic work.  Note we don't write to these pages, even if we pick
-        * unshared.  Mostly due to the inconvenience of having to match up the
-        * number of pages with the number of writes.  And it's unnecessary.
-        */
-       if (num_pages) {
-               spin_lock(&buster_lock);
-               for (int i = 0; i < MIN(num_pages, MAX_PAGES); i++) {
-                       upage_alloc(p, &a_page[i],1);
-                       page_insert(p->env_pgdir, a_page[i], (void*)INSERT_ADDR + PGSIZE*i,
-                                   PTE_USER_RW);
-                       page_decref(a_page[i]);
-               }
-               spin_unlock(&buster_lock);
-       }
-
-       if (flags & BUSTER_LOCKED)
-               spin_lock(&buster_lock);
-       for (int i = 0; i < MIN(num_writes, MAX_WRITES); i=i+stride)
-               buster[i] = 0xdeadbeef;
-       if (flags & BUSTER_LOCKED)
-               spin_unlock(&buster_lock);
-
-       if (num_pages) {
-               spin_lock(&buster_lock);
-               for (int i = 0; i < MIN(num_pages, MAX_PAGES); i++) {
-                       page_remove(p->env_pgdir, (void*)(INSERT_ADDR + PGSIZE * i));
-                       page_decref(a_page[i]);
-               }
-               spin_unlock(&buster_lock);
-       }
-
-       /* Print info */
-       if (flags & BUSTER_PRINT_TICKS) {
-               ticks = stop_timing(ticks);
-               printk("%llu,", ticks);
-       }
-       return 0;
-}
-
 static int sys_cache_invalidate(void)
 {
        #ifdef CONFIG_X86
@@ -606,34 +637,6 @@ static int sys_cache_invalidate(void)
 
 /* sys_reboot(): called directly from dispatch table. */
 
-/* Print a string to the system console. */
-static ssize_t sys_cputs(struct proc *p, const char *string,
-                         size_t strlen)
-{
-       char *t_string;
-       t_string = user_strdup_errno(p, string, strlen);
-       if (!t_string)
-               return -1;
-       printk("%.*s", strlen, t_string);
-       user_memdup_free(p, t_string);
-       return (ssize_t)strlen;
-}
-
-// Read a character from the system console.
-// Returns the character.
-/* TODO: remove me */
-static uint16_t sys_cgetc(struct proc *p)
-{
-       uint16_t c;
-
-       // The cons_get_any_char() primitive doesn't wait for a character,
-       // but the sys_cgetc() system call does.
-       while ((c = cons_get_any_char()) == 0)
-               cpu_relax();
-
-       return c;
-}
-
 /* Returns the id of the physical core this syscall is executed on. */
 static uint32_t sys_getpcoreid(void)
 {
@@ -649,22 +652,14 @@ static size_t sys_getvcoreid(struct proc *p)
 
 /************** Process management syscalls **************/
 
-/* Returns the calling process's pid */
-static pid_t sys_getpid(struct proc *p)
-{
-       return p->pid;
-}
-
 /* Helper for proc_create and fork */
 static void inherit_strace(struct proc *parent, struct proc *child)
 {
-       if (parent->strace && parent->strace_inherit) {
+       if (parent->strace && parent->strace->inherit) {
                /* Refcnt on both, put in the child's ->strace. */
                kref_get(&parent->strace->users, 1);
                kref_get(&parent->strace->procs, 1);
                child->strace = parent->strace;
-               child->strace_on = TRUE;
-               child->strace_inherit = TRUE;
        }
 }
 
@@ -688,42 +683,44 @@ static int sys_proc_create(struct proc *p, char *path, size_t path_l,
        /* TODO: 9ns support */
        program = do_file_open(t_path, O_READ, 0);
        if (!program)
-               goto error_user_memdup;
-
+               goto error_with_path;
+       if (!is_valid_elf(program)) {
+               set_errno(ENOEXEC);
+               goto error_with_file;
+       }
        /* Check the size of the argenv array, error out if too large. */
        if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
                set_error(EINVAL, "The argenv array has an invalid size: %lu\n",
                                  argenv_l);
-               goto error_user_memdup;
+               goto error_with_file;
        }
        /* Copy the argenv array into a kernel buffer. Delay processing of the
         * array to load_elf(). */
        kargenv = user_memdup_errno(p, argenv, argenv_l);
        if (!kargenv) {
-               set_errstr("Failed to copy in the args");
-               goto error_user_memdup;
+               set_error(EINVAL, "Failed to copy in the args");
+               goto error_with_file;
        }
        /* Unpack the argenv array into more usable variables. Integrity checking
         * done along side this as well. */
        if (unpack_argenv(kargenv, argenv_l, &argc, &argv, &envc, &envp)) {
-               set_errstr("Failed to unpack the args");
-               goto error_unpack;
+               set_error(EINVAL, "Failed to unpack the args");
+               goto error_with_kargenv;
        }
-
        /* TODO: need to split the proc creation, since you must load after setting
         * args/env, since auxp gets set up there. */
        //new_p = proc_create(program, 0, 0);
        if (proc_alloc(&new_p, current, flags)) {
-               set_errstr("Failed to alloc new proc");
-               goto error_proc_alloc;
+               set_error(ENOMEM, "Failed to alloc new proc");
+               goto error_with_kargenv;
        }
        inherit_strace(p, new_p);
        /* close the CLOEXEC ones, even though this isn't really an exec */
        close_fdt(&new_p->open_files, TRUE);
        /* Load the elf. */
        if (load_elf(new_p, program, argc, argv, envc, envp)) {
-               set_errstr("Failed to load elf");
-               goto error_load_elf;
+               set_error(EINVAL, "Failed to load elf");
+               goto error_with_proc;
        }
        /* progname is argv0, which accounts for symlinks */
        proc_set_progname(new_p, argc ? argv[0] : NULL);
@@ -735,18 +732,17 @@ static int sys_proc_create(struct proc *p, char *path, size_t path_l,
        profiler_notify_new_process(new_p);
        proc_decref(new_p);     /* give up the reference created in proc_create() */
        return pid;
-error_load_elf:
-       set_errno(EINVAL);
+error_with_proc:
        /* proc_destroy will decref once, which is for the ref created in
         * proc_create().  We don't decref again (the usual "+1 for existing"),
         * since the scheduler, which usually handles that, hasn't heard about the
         * process (via __proc_ready()). */
        proc_destroy(new_p);
-error_proc_alloc:
-       kref_put(&program->f_kref);
-error_unpack:
+error_with_kargenv:
        user_memdup_free(p, kargenv);
-error_user_memdup:
+error_with_file:
+       kref_put(&program->f_kref);
+error_with_path:
        free_path(p, t_path);
        return -1;
 }
@@ -788,7 +784,6 @@ static error_t sys_proc_destroy(struct proc *p, pid_t pid, int exitcode)
                printd("[%d] destroying proc %d\n", p->pid, p_to_die->pid);
        }
        proc_destroy(p_to_die);
-       /* we only get here if we weren't the one to die */
        proc_decref(p_to_die);
        return 0;
 }
@@ -823,6 +818,7 @@ static ssize_t sys_fork(env_t* e)
 {
        uintptr_t temp;
        int ret;
+       struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
 
        // TODO: right now we only support fork for single-core processes
        if (e->state != PROC_RUNNING_S) {
@@ -842,13 +838,9 @@ static ssize_t sys_fork(env_t* e)
                set_errno(EINVAL);
                return -1;
        }
+       assert(pcpui->cur_proc == pcpui->owning_proc);
        copy_current_ctx_to(&env->scp_ctx);
 
-       env->cache_colors_map = cache_colors_map_alloc();
-       for (int i = 0; i < llc_cache->num_colors; i++)
-               if (GET_BITMASK_BIT(e->cache_colors_map,i))
-                       cache_color_alloc(llc_cache, env->cache_colors_map);
-
        /* Make the new process have the same VMRs as the older.  This will copy the
         * contents of non MAP_SHARED pages to the new VMRs. */
        if (duplicate_vmrs(e, env)) {
@@ -866,7 +858,6 @@ static ssize_t sys_fork(env_t* e)
        switch_back(env, temp);
 
        /* Copy some state from the original proc into the new proc. */
-       env->heap_top = e->heap_top;
        env->env_flags = e->env_flags;
 
        inherit_strace(e, env);
@@ -874,7 +865,7 @@ static ssize_t sys_fork(env_t* e)
        /* In general, a forked process should be a fresh process, and we copy over
         * whatever stuff is needed between procinfo/procdata. */
        *env->procdata = *e->procdata;
-       env->procinfo->heap_bottom = e->procinfo->heap_bottom;
+       env->procinfo->program_end = e->procinfo->program_end;
 
        /* FYI: once we call ready, the proc is open for concurrent usage */
        __proc_ready(env);
@@ -892,11 +883,65 @@ static ssize_t sys_fork(env_t* e)
        return ret;
 }
 
+/* string for sys_exec arguments. Assumes that d is pointing to zero'd
+ * storage or storage that does not require null termination or
+ * provides the null. */
+static int execargs_stringer(struct proc *p, char *d, size_t slen,
+                            char *path, size_t path_l,
+                            char *argenv, size_t argenv_l)
+{
+       int argc, envc, i;
+       char **argv, **envp;
+       struct argenv *kargenv;
+       int amt;
+       char *s = d;
+       char *e = d + slen;
+
+       if (path_l > slen)
+               path_l = slen;
+       if (memcpy_from_user(p, d, path, path_l)) {
+               s = seprintf(s, e, "Invalid exec path");
+               return s - d;
+       }
+       s += path_l;
+
+       /* yes, this code is cloned from below. I wrote a helper but
+        * Barret and I concluded after talking about it that the
+        * helper was not really helper-ful, as it has almost 10
+        * arguments. Please, don't suggest a cpp macro. Thank you. */
+       /* Check the size of the argenv array, error out if too large. */
+       if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
+               s = seprintf(s, e, "The argenv array has an invalid size: %lu\n",
+                                 argenv_l);
+               return s - d;
+       }
+       /* Copy the argenv array into a kernel buffer. */
+       kargenv = user_memdup_errno(p, argenv, argenv_l);
+       if (!kargenv) {
+               s = seprintf(s, e, "Failed to copy in the args and environment");
+               return s - d;
+       }
+       /* Unpack the argenv array into more usable variables. Integrity checking
+        * done along side this as well. */
+       if (unpack_argenv(kargenv, argenv_l, &argc, &argv, &envc, &envp)) {
+               s = seprintf(s, e, "Failed to unpack the args");
+               user_memdup_free(p, kargenv);
+               return s - d;
+       }
+       s = seprintf(s, e, "[%d]{", argc);
+       for (i = 0; i < argc; i++)
+               s = seprintf(s, e, "%s, ", argv[i]);
+       s = seprintf(s, e, "}");
+
+       user_memdup_free(p, kargenv);
+       return s - d;
+}
+
 /* Load the binary "path" into the current process, and start executing it.
  * argv and envp are magically bundled in procinfo for now.  Keep in sync with
  * glibc's sysdeps/ros/execve.c.  Once past a certain point, this function won't
  * return.  It assumes (and checks) that it is current.  Don't give it an extra
- * refcnt'd *p (syscall won't do that). 
+ * refcnt'd *p (syscall won't do that).
  * Note: if someone batched syscalls with this call, they could clobber their
  * old memory (and will likely PF and die).  Don't do it... */
 static int sys_exec(struct proc *p, char *path, size_t path_l,
@@ -928,16 +973,8 @@ static int sys_exec(struct proc *p, char *path, size_t path_l,
        }
        /* Preemptively copy out the cur_ctx, in case we fail later (easier on
         * cur_ctx if we do this now) */
+       assert(pcpui->cur_proc == pcpui->owning_proc);
        copy_current_ctx_to(&p->scp_ctx);
-       /* Clear the current_ctx.  We won't be returning the 'normal' way.  Even if
-        * we want to return with an error, we need to go back differently in case
-        * we succeed.  This needs to be done before we could possibly block, but
-        * unfortunately happens before the point of no return.
-        *
-        * Note that we will 'hard block' if we block at all.  We can't return to
-        * userspace and then asynchronously finish the exec later. */
-       clear_owning_proc(core_id());
-
        /* Check the size of the argenv array, error out if too large. */
        if ((argenv_l < sizeof(struct argenv)) || (argenv_l > ARG_MAX)) {
                set_error(EINVAL, "The argenv array has an invalid size: %lu\n",
@@ -965,6 +1002,14 @@ static int sys_exec(struct proc *p, char *path, size_t path_l,
        /* This could block: */
        /* TODO: 9ns support */
        program = do_file_open(t_path, O_READ, 0);
+       /* Clear the current_ctx.  We won't be returning the 'normal' way.  Even if
+        * we want to return with an error, we need to go back differently in case
+        * we succeed.  This needs to be done before we could possibly block, but
+        * unfortunately happens before the point of no return.
+        *
+        * Note that we will 'hard block' if we block at all.  We can't return to
+        * userspace and then asynchronously finish the exec later. */
+       clear_owning_proc(core_id());
        if (!program)
                goto early_error;
        if (!is_valid_elf(program)) {
@@ -976,7 +1021,7 @@ static int sys_exec(struct proc *p, char *path, size_t path_l,
        proc_replace_binary_path(p, t_path);
        proc_set_progname(p, argc ? argv[0] : NULL);
        proc_init_procdata(p);
-       p->procinfo->heap_bottom = 0;
+       p->procinfo->program_end = 0;
        /* When we destroy our memory regions, accessing cur_sysc would PF */
        pcpui->cur_kthread->sysc = 0;
        unmap_and_destroy_vmrs(p);
@@ -1035,7 +1080,7 @@ all_out:
 static pid_t try_wait(struct proc *parent, struct proc *child, int *ret_status,
                       int options)
 {
-       if (child->state == PROC_DYING) {
+       if (proc_is_dying(child)) {
                /* Disown returns -1 if it's already been disowned or we should o/w
                 * abort.  This can happen if we have concurrent waiters, both with
                 * pointers to the child (only one should reap).  Note that if we don't
@@ -1097,7 +1142,7 @@ static pid_t wait_one(struct proc *parent, struct proc *child, int *ret_status,
                /* If we're dying, then we don't need to worry about waiting.  We don't
                 * do this yet, but we'll need this outlet when we deal with orphaned
                 * children and having init inherit them. */
-               if (parent->state == PROC_DYING)
+               if (proc_is_dying(parent))
                        goto out_unlock;
                /* Any child can wake us up, but we check for the particular child we
                 * care about */
@@ -1127,7 +1172,7 @@ static pid_t wait_any(struct proc *parent, int *ret_status, int options)
        while (!retval) {
                cpu_relax();
                cv_wait(&parent->child_wait);
-               if (parent->state == PROC_DYING)
+               if (proc_is_dying(parent))
                        goto out_unlock;
                /* Any child can wake us up from the CV.  This is a linear try_wait
                 * scan.  If we have a lot of children, we could optimize this. */
@@ -1158,6 +1203,7 @@ static pid_t sys_waitpid(struct proc *parent, pid_t pid, int *status,
        pid_t retval = 0;
        int ret_status = 0;
 
+       sysc_save_str("waitpid on %d", pid);
        /* -1 is the signal for 'any child' */
        if (pid == -1) {
                retval = wait_any(parent, &ret_status, options);
@@ -1309,6 +1355,19 @@ static int sys_self_notify(struct proc *p, uint32_t vcoreid,
        return 0;
 }
 
+static int sys_send_event(struct proc *p, struct event_queue *ev_q,
+                          struct event_msg *u_msg, uint32_t vcoreid)
+{
+       struct event_msg local_msg = {0};
+
+       if (memcpy_from_user(p, &local_msg, u_msg, sizeof(struct event_msg))) {
+               set_errno(EINVAL);
+               return -1;
+       }
+       send_event(p, ev_q, &local_msg, vcoreid);
+       return 0;
+}
+
 /* Puts the calling core into vcore context, if it wasn't already, via a
  * self-IPI / active notification.  Barring any weird unmappings, we just send
  * ourselves a __notify. */
@@ -1336,7 +1395,7 @@ static int sys_vc_entry(struct proc *p)
  * is trying to halt.  The core need not abort the halt for notif_pending for
  * the vcore, only for a __notify or other RKM.  Anyone setting notif_pending
  * should then attempt to __notify (o/w it's probably a bug). */
-static int sys_halt_core(struct proc *p, unsigned int usec)
+static int sys_halt_core(struct proc *p, unsigned long usec)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
        struct preempt_data *vcpd;
@@ -1430,17 +1489,82 @@ static int sys_pop_ctx(struct proc *p, struct user_context *ctx)
        return 0;
 }
 
-/* Initializes a process to run virtual machine contexts, returning the number
- * initialized, optionally setting errno */
-static int sys_vmm_setup(struct proc *p, unsigned int nr_guest_pcores,
-                         struct vmm_gpcore_init *gpcis, int flags)
+static int sys_vmm_add_gpcs(struct proc *p, unsigned int nr_more_gpcs,
+                            struct vmm_gpcore_init *gpcis)
 {
-       return vmm_struct_init(p, nr_guest_pcores, gpcis, flags);
+       ERRSTACK(1);
+       struct vmm *vmm = &p->vmm;
+
+       qlock(&vmm->qlock);
+       if (waserror()) {
+               qunlock(&vmm->qlock);
+               poperror();
+               return -1;
+       }
+       __vmm_struct_init(p);
+       __vmm_add_gpcs(p, nr_more_gpcs, gpcis);
+       qunlock(&vmm->qlock);
+       poperror();
+       return nr_more_gpcs;
+}
+
+static int sys_vmm_poke_guest(struct proc *p, int guest_pcoreid)
+{
+       return vmm_poke_guest(p, guest_pcoreid);
+}
+
+static int sys_vmm_ctl(struct proc *p, int cmd, unsigned long arg1,
+                       unsigned long arg2, unsigned long arg3,
+                       unsigned long arg4)
+{
+       ERRSTACK(1);
+       int ret;
+       struct vmm *vmm = &p->vmm;
+
+       /* Protects against concurrent setters and for gets that are not atomic
+        * reads (say, multiple exec ctls). */
+       qlock(&vmm->qlock);
+       if (waserror()) {
+               qunlock(&vmm->qlock);
+               poperror();
+               return -1;
+       }
+       __vmm_struct_init(p);
+       switch (cmd) {
+       case VMM_CTL_GET_EXITS:
+               if (vmm->amd)
+                       error(ENOTSUP, "AMD VMMs unsupported");
+               ret = vmx_ctl_get_exits(&vmm->vmx);
+               break;
+       case VMM_CTL_SET_EXITS:
+               if (arg1 & ~VMM_CTL_ALL_EXITS)
+                       error(EINVAL, "Bad vmm_ctl_exits %x (%x)", arg1,
+                             VMM_CTL_ALL_EXITS);
+               if (vmm->amd)
+                       error(ENOTSUP, "AMD VMMs unsupported");
+               ret = vmx_ctl_set_exits(&vmm->vmx, arg1);
+               break;
+       case VMM_CTL_GET_FLAGS:
+               ret = vmm->flags;
+               break;
+       case VMM_CTL_SET_FLAGS:
+               if (arg1 & ~VMM_CTL_ALL_FLAGS)
+                       error(EINVAL, "Bad vmm_ctl flags.  Got 0x%lx, allowed 0x%lx\n",
+                             arg1, VMM_CTL_ALL_FLAGS);
+               vmm->flags = arg1;
+               ret = 0;
+               break;
+       default:
+               error(EINVAL, "Bad vmm_ctl cmd %d", cmd);
+       }
+       qunlock(&vmm->qlock);
+       poperror();
+       return ret;
 }
 
 /* Pokes the ksched for the given resource for target_pid.  If the target pid
  * == 0, we just poke for the calling process.  The common case is poking for
- * self, so we avoid the lookup. 
+ * self, so we avoid the lookup.
  *
  * Not sure if you could harm someone via asking the kernel to look at them, so
  * we'll do a 'controls' check for now.  In the future, we might have something
@@ -1491,7 +1615,6 @@ static unsigned long sys_populate_va(struct proc *p, uintptr_t va,
 static intreg_t sys_read(struct proc *p, int fd, void *buf, size_t len)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
-       struct systrace_record *t = pcpui->cur_kthread->trace;
        ssize_t ret;
        struct file *file = get_file_from_fd(&p->open_files, fd);
        sysc_save_str("read on fd %d", fd);
@@ -1512,21 +1635,15 @@ static intreg_t sys_read(struct proc *p, int fd, void *buf, size_t len)
                /* plan9, should also handle errors (EBADF) */
                ret = sysread(fd, buf, len);
        }
-
-       if ((ret > 0) && t) {
-               t->datalen = MIN(sizeof(t->data), ret);
-               memcpy(t->data, buf, t->datalen);
-       }
-
        return ret;
 }
 
 static intreg_t sys_write(struct proc *p, int fd, const void *buf, size_t len)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
-       struct systrace_record *t = pcpui->cur_kthread->trace;
        ssize_t ret;
        struct file *file = get_file_from_fd(&p->open_files, fd);
+
        sysc_save_str("write on fd %d", fd);
        /* VFS */
        if (file) {
@@ -1542,13 +1659,7 @@ static intreg_t sys_write(struct proc *p, int fd, const void *buf, size_t len)
                /* plan9, should also handle errors */
                ret = syswrite(fd, (void*)buf, len);
        }
-
-       if (t) {
-               t->datalen = MIN(sizeof(t->data), len);
-               memcpy(t->data, buf, t->datalen);
-       }
        return ret;
-
 }
 
 /* Checks args/reads in the path, opens the file (relative to fromfd if the path
@@ -1620,19 +1731,9 @@ static intreg_t sys_close(struct proc *p, int fd)
        }
        /* 9ns, should also handle errors (bad FD, etc) */
        retval = sysclose(fd);
-       if (retval < 0) {
-               /* no one checks their retvals.  a double close will cause problems. */
-               printk("[kernel] sys_close failed: proc %d fd %d.  Check your rets.\n",
-                      p->pid, fd);
-       }
        return retval;
 }
 
-/* kept around til we remove the last ufe */
-#define ufe(which,a0,a1,a2,a3) \
-       frontend_syscall_errno(p,APPSERVER_SYSCALL_##which,\
-                          (int)(a0),(int)(a1),(int)(a2),(int)(a3))
-
 static intreg_t sys_fstat(struct proc *p, int fd, struct kstat *u_stat)
 {
        struct kstat *kbuf;
@@ -1837,10 +1938,8 @@ static intreg_t sys_llseek(struct proc *p, int fd, off_t offset_hi,
                ret = file->f_op->llseek(file, tempoff, &retoff, whence);
                kref_put(&file->f_kref);
        } else {
-               /* won't return here if error ... */
-               ret = sysseek(fd, tempoff, whence);
-               retoff = ret;
-               ret = 0;
+               retoff = sysseek(fd, tempoff, whence);
+               ret = (retoff < 0);
        }
 
        if (ret)
@@ -1916,7 +2015,7 @@ intreg_t sys_readlink(struct proc *p, char *path, size_t path_l,
        path_d = lookup_dentry(t_path, 0);
        if (!path_d){
                int n = 2048;
-               buf = kmalloc(n*2, KMALLOC_WAIT);
+               buf = kmalloc(n*2, MEM_WAIT);
                struct dir *d = (void *)&buf[n];
                /* try 9ns. */
                if (sysstat(t_path, buf, n) > 0) {
@@ -2036,46 +2135,6 @@ intreg_t sys_rmdir(struct proc *p, const char *path, size_t path_l)
        return retval;
 }
 
-intreg_t sys_pipe(struct proc *p, int *u_pipefd, int flags)
-{
-       int pipefd[2] = {0};
-       int retval = syspipe(pipefd);
-
-       if (retval)
-               return -1;
-       if (memcpy_to_user_errno(p, u_pipefd, pipefd, sizeof(pipefd))) {
-               sysclose(pipefd[0]);
-               sysclose(pipefd[1]);
-               set_errno(EFAULT);
-               return -1;
-       }
-       return 0;
-}
-
-intreg_t sys_gettimeofday(struct proc *p, int *buf)
-{
-       static spinlock_t gtod_lock = SPINLOCK_INITIALIZER;
-       static int t0 = 0;
-
-       spin_lock(&gtod_lock);
-       if(t0 == 0)
-
-#if (defined CONFIG_APPSERVER)
-       t0 = ufe(time,0,0,0,0);
-#else
-       // Nanwan's birthday, bitches!!
-       t0 = 1242129600;
-#endif
-       spin_unlock(&gtod_lock);
-
-       long long dt = read_tsc();
-       /* TODO: This probably wants its own function, using a struct timeval */
-       long kbuf[2] = {t0+dt/system_timing.tsc_freq,
-           (dt%system_timing.tsc_freq)*1000000/system_timing.tsc_freq};
-
-       return memcpy_to_user_errno(p,buf,kbuf,sizeof(kbuf));
-}
-
 intreg_t sys_tcgetattr(struct proc *p, int fd, void *termios_p)
 {
        int retval = 0;
@@ -2269,7 +2328,7 @@ static int vfs_wstat(struct file *file, uint8_t *stat_m, size_t stat_sz,
        int m_sz;
        int retval = 0;
 
-       dir = kzmalloc(sizeof(struct dir) + stat_sz, KMALLOC_WAIT);
+       dir = kzmalloc(sizeof(struct dir) + stat_sz, MEM_WAIT);
        m_sz = convM2D(stat_m, stat_sz, &dir[0], (char*)&dir[1]);
        if (m_sz != stat_sz) {
                set_error(EINVAL, ERROR_FIXME);
@@ -2350,7 +2409,6 @@ intreg_t sys_rename(struct proc *p, char *old_path, size_t old_path_l,
                     char *new_path, size_t new_path_l)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
-       struct systrace_record *t = pcpui->cur_kthread->trace;
        ERRSTACK(1);
        int mountpointlen = 0;
        char *from_path = copy_in_path(p, old_path, old_path_l);
@@ -2361,9 +2419,6 @@ intreg_t sys_rename(struct proc *p, char *old_path, size_t old_path_l,
        if ((!from_path) || (!to_path))
                return -1;
        printd("sys_rename :%s: to :%s: : ", from_path, to_path);
-       if (t) {
-               t->datalen = snprintf((char *)t->data, sizeof(t->data), "Rename :%s: to :%s:", from_path, to_path);
-       }
 
        /* we need a fid for the wstat. */
        /* TODO: maybe wrap the 9ns stuff better.  sysrename maybe? */
@@ -2408,7 +2463,7 @@ intreg_t sys_rename(struct proc *p, char *old_path, size_t old_path_l,
        printd("Newchan: %C\n", newchan);
        printd("Newchan: mchan %C\n", newchan->mchan);
 
-       if ((newchan->dev != oldchan->dev) || 
+       if ((newchan->dev != oldchan->dev) ||
                (newchan->type != oldchan->type)) {
                printd("Old chan and new chan do not match\n");
                set_errno(ENODEV);
@@ -2463,7 +2518,7 @@ intreg_t sys_rename(struct proc *p, char *old_path, size_t old_path_l,
        };
        printk("syswstat returns %d\n", retval);
 
-done: 
+done:
        free_path(p, from_path);
        free_path(p, to_path);
        cclose(oldchan);
@@ -2550,18 +2605,14 @@ static intreg_t sys_tap_fds(struct proc *p, struct fd_tap_req *tap_reqs,
 const struct sys_table_entry syscall_table[] = {
        [SYS_null] = {(syscall_t)sys_null, "null"},
        [SYS_block] = {(syscall_t)sys_block, "block"},
-       [SYS_cache_buster] = {(syscall_t)sys_cache_buster, "buster"},
        [SYS_cache_invalidate] = {(syscall_t)sys_cache_invalidate, "wbinv"},
        [SYS_reboot] = {(syscall_t)reboot, "reboot!"},
-       [SYS_cputs] = {(syscall_t)sys_cputs, "cputs"},
-       [SYS_cgetc] = {(syscall_t)sys_cgetc, "cgetc"},
        [SYS_getpcoreid] = {(syscall_t)sys_getpcoreid, "getpcoreid"},
        [SYS_getvcoreid] = {(syscall_t)sys_getvcoreid, "getvcoreid"},
-       [SYS_getpid] = {(syscall_t)sys_getpid, "getpid"},
        [SYS_proc_create] = {(syscall_t)sys_proc_create, "proc_create"},
        [SYS_proc_run] = {(syscall_t)sys_proc_run, "proc_run"},
        [SYS_proc_destroy] = {(syscall_t)sys_proc_destroy, "proc_destroy"},
-       [SYS_yield] = {(syscall_t)sys_proc_yield, "proc_yield"},
+       [SYS_proc_yield] = {(syscall_t)sys_proc_yield, "proc_yield"},
        [SYS_change_vcore] = {(syscall_t)sys_change_vcore, "change_vcore"},
        [SYS_fork] = {(syscall_t)sys_fork, "fork"},
        [SYS_exec] = {(syscall_t)sys_exec, "exec"},
@@ -2574,13 +2625,16 @@ const struct sys_table_entry syscall_table[] = {
        [SYS_provision] = {(syscall_t)sys_provision, "provision"},
        [SYS_notify] = {(syscall_t)sys_notify, "notify"},
        [SYS_self_notify] = {(syscall_t)sys_self_notify, "self_notify"},
+       [SYS_send_event] = {(syscall_t)sys_send_event, "send_event"},
        [SYS_vc_entry] = {(syscall_t)sys_vc_entry, "vc_entry"},
        [SYS_halt_core] = {(syscall_t)sys_halt_core, "halt_core"},
 #ifdef CONFIG_ARSC_SERVER
        [SYS_init_arsc] = {(syscall_t)sys_init_arsc, "init_arsc"},
 #endif
        [SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
-       [SYS_vmm_setup] = {(syscall_t)sys_vmm_setup, "vmm_setup"},
+       [SYS_vmm_add_gpcs] = {(syscall_t)sys_vmm_add_gpcs, "vmm_add_gpcs"},
+       [SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, "vmm_poke_guest"},
+       [SYS_vmm_ctl] = {(syscall_t)sys_vmm_ctl, "vmm_ctl"},
        [SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
        [SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
        [SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"},
@@ -2608,8 +2662,6 @@ const struct sys_table_entry syscall_table[] = {
        [SYS_getcwd] = {(syscall_t)sys_getcwd, "getcwd"},
        [SYS_mkdir] = {(syscall_t)sys_mkdir, "mkdir"},
        [SYS_rmdir] = {(syscall_t)sys_rmdir, "rmdir"},
-       [SYS_pipe] = {(syscall_t)sys_pipe, "pipe"},
-       [SYS_gettimeofday] = {(syscall_t)sys_gettimeofday, "gettime"},
        [SYS_tcgetattr] = {(syscall_t)sys_tcgetattr, "tcgetattr"},
        [SYS_tcsetattr] = {(syscall_t)sys_tcsetattr, "tcsetattr"},
        [SYS_setuid] = {(syscall_t)sys_setuid, "setuid"},
@@ -2689,6 +2741,7 @@ void run_local_syscall(struct syscall *sysc)
        }
        pcpui->cur_kthread->sysc = sysc;        /* let the core know which sysc it is */
        systrace_start_trace(pcpui->cur_kthread, sysc);
+       pcpui = &per_cpu_info[core_id()];       /* reload again */
        alloc_sysc_str(pcpui->cur_kthread);
        /* syscall() does not return for exec and yield, so put any cleanup in there
         * too. */
@@ -2698,6 +2751,7 @@ void run_local_syscall(struct syscall *sysc)
        pcpui = &per_cpu_info[core_id()];
        free_sysc_str(pcpui->cur_kthread);
        systrace_finish_trace(pcpui->cur_kthread, sysc->retval);
+       pcpui = &per_cpu_info[core_id()];       /* reload again */
        /* Some 9ns paths set errstr, but not errno.  glibc will ignore errstr.
         * this is somewhat hacky, since errno might get set unnecessarily */
        if ((current_errstr()[0] != 0) && (!sysc->err))
@@ -2711,7 +2765,6 @@ void run_local_syscall(struct syscall *sysc)
  * at least one, it will run it directly. */
 void prep_syscalls(struct proc *p, struct syscall *sysc, unsigned int nr_syscs)
 {
-       int retval;
        /* Careful with pcpui here, we could have migrated */
        if (!nr_syscs) {
                printk("[kernel] No nr_sysc, probably a bug, user!\n");
@@ -2747,110 +2800,6 @@ void __signal_syscall(struct syscall *sysc, struct proc *p)
        }
 }
 
-/* Syscall tracing */
-static void __init_systrace(void)
-{
-       systrace_buffer = kmalloc(MAX_SYSTRACES*sizeof(struct systrace_record), 0);
-       if (!systrace_buffer)
-               panic("Unable to alloc a trace buffer\n");
-       systrace_bufidx = 0;
-       systrace_bufsize = MAX_SYSTRACES;
-       /* Note we never free the buffer - it's around forever.  Feel free to change
-        * this if you want to change the size or something dynamically. */
-}
-
-/* If you call this while it is running, it will change the mode */
-void systrace_start(bool silent)
-{
-       static bool init = FALSE;
-       spin_lock_irqsave(&systrace_lock);
-       if (!init) {
-               __init_systrace();
-               init = TRUE;
-       }
-       systrace_flags = silent ? SYSTRACE_ON : SYSTRACE_ON | SYSTRACE_LOUD;
-       spin_unlock_irqsave(&systrace_lock);
-}
-
-int systrace_reg(bool all, struct proc *p)
-{
-       spin_lock_irqsave(&systrace_lock);
-       if (all) {
-               printk("Tracing syscalls for all processes\n");
-               systrace_flags |= SYSTRACE_ALLPROC;
-       } else {
-               set_traced_proc(p, TRUE);
-
-               printk("Tracing syscalls for process %d\n", p->pid);
-       }
-       spin_unlock_irqsave(&systrace_lock);
-       return 0;
-}
-
-int systrace_trace_pid(struct proc *p)
-{
-       if (systrace_reg(false, p))
-               error(EFAIL, "no more processes");
-       systrace_start(true);
-       return 0;
-}
-
-void systrace_stop(void)
-{
-       spin_lock_irqsave(&systrace_lock);
-       systrace_flags = 0;
-       spin_unlock_irqsave(&systrace_lock);
-}
-
-/* If you registered a process specifically, then you need to dereg it
- * specifically.  Or just fully stop, which will do it for all. */
-int systrace_dereg(bool all, struct proc *p)
-{
-       spin_lock_irqsave(&systrace_lock);
-       if (all) {
-               printk("No longer tracing syscalls for all processes.\n");
-               systrace_flags &= ~SYSTRACE_ALLPROC;
-       } else {
-               set_traced_proc(p, FALSE);
-
-               printk("No longer tracing syscalls for process %d\n", p->pid);
-       }
-       spin_unlock_irqsave(&systrace_lock);
-       return 0;
-}
-
-/* Regardless of locking, someone could be writing into the buffer */
-void systrace_print(bool all, struct proc *p)
-{
-       spin_lock_irqsave(&systrace_lock);
-       /* if you want to be clever, you could make this start from the earliest
-        * timestamp and loop around.  Careful of concurrent writes. */
-       for (int i = 0; i < systrace_bufsize; i++)
-               if (systrace_buffer[i].start_timestamp)
-                       printk("[%16llu] Syscall %3d (%12s):(%p, %p, %p, %p, %p,"
-                              "%p) proc: %d core: %d vcore: %d\n",
-                              systrace_buffer[i].start_timestamp,
-                              systrace_buffer[i].syscallno,
-                              syscall_table[systrace_buffer[i].syscallno].name,
-                              systrace_buffer[i].arg0,
-                              systrace_buffer[i].arg1,
-                              systrace_buffer[i].arg2,
-                              systrace_buffer[i].arg3,
-                              systrace_buffer[i].arg4,
-                              systrace_buffer[i].arg5,
-                              systrace_buffer[i].pid,
-                              systrace_buffer[i].coreid,
-                              systrace_buffer[i].vcoreid);
-       spin_unlock_irqsave(&systrace_lock);
-}
-
-void systrace_clear_buffer(void)
-{
-       spin_lock_irqsave(&systrace_lock);
-       memset(systrace_buffer, 0, sizeof(struct systrace_record) * MAX_SYSTRACES);
-       spin_unlock_irqsave(&systrace_lock);
-}
-
 bool syscall_uses_fd(struct syscall *sysc, int fd)
 {
        switch (sysc->num) {