Integrate 'sofar' into sized_allocs
[akaros.git] / kern / drivers / dev / proc.c
index 99c08cd..c6479fc 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the UCB release of Plan 9. It is subject to the license
  * terms in the LICENSE file found in the top-level directory of this
  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
@@ -17,8 +17,6 @@
  * rather than excise code that won't work, I'm bracketing it with
  * #if 0 until we know we don't want it
  */
-#include <vfs.h>
-#include <kfs.h>
 #include <slab.h>
 #include <kmalloc.h>
 #include <kref.h>
 #include <cpio.h>
 #include <pmap.h>
 #include <smp.h>
+#include <umem.h>
+#include <arch/vmm/vmm.h>
+#include <ros/vmm.h>
+
+struct dev procdevtab;
+
+static char *devname(void)
+{
+       return procdevtab.name;
+}
 
 enum {
        Qdir,
        Qtrace,
        Qtracepids,
+       Qself,
        Qns,
        Qargs,
        Qctl,
        Qfd,
        Qfpregs,
        Qkregs,
+       Qmaps,
        Qmem,
        Qnote,
        Qnoteid,
        Qnotepg,
        Qproc,
        Qregs,
+       Quser,
        Qsegment,
        Qstatus,
+       Qstrace,
+       Qstrace_traceset,
+       Qvmstatus,
        Qtext,
        Qwait,
        Qprofile,
@@ -58,22 +72,10 @@ enum {
 enum {
        CMclose,
        CMclosefiles,
-       CMfixedpri,
        CMhang,
-       CMkill,
-       CMnohang,
-       CMnoswap,
-       CMpri,
-       CMprivate,
-       CMprofile,
-       CMstart,
-       CMstartstop,
-       CMstartsyscall,
-       CMstop,
-       CMwaitstop,
-       CMwired,
-       CMtrace,
-       CMcore,
+       CMstraceme,
+       CMstraceall,
+       CMstrace_drop,
 };
 
 enum {
@@ -94,6 +96,7 @@ struct dirtab procdir[] = {
        {"fd", {Qfd}, 0, 0444},
        {"fpregs", {Qfpregs}, 0, 0000},
        //  {"kregs",   {Qkregs},   sizeof(Ureg),       0600},
+       {"maps", {Qmaps}, 0, 0000},
        {"mem", {Qmem}, 0, 0000},
        {"note", {Qnote}, 0, 0000},
        {"noteid", {Qnoteid}, 0, 0664},
@@ -101,8 +104,12 @@ struct dirtab procdir[] = {
        {"ns", {Qns}, 0, 0444},
        {"proc", {Qproc}, 0, 0400},
        //  {"regs",        {Qregs},    sizeof(Ureg),       0000},
+       {"user", {Quser}, 0, 0444},
        {"segment", {Qsegment}, 0, 0444},
        {"status", {Qstatus}, STATSIZE, 0444},
+       {"strace", {Qstrace}, 0, 0444},
+       {"strace_traceset", {Qstrace_traceset}, 0, 0666},
+       {"vmstatus", {Qvmstatus}, 0, 0444},
        {"text", {Qtext}, 0, 0000},
        {"wait", {Qwait}, 0, 0400},
        {"profile", {Qprofile}, 0, 0400},
@@ -113,23 +120,11 @@ struct dirtab procdir[] = {
 static
 struct cmdtab proccmd[] = {
        {CMclose, "close", 2},
-       {CMclosefiles, "closefiles", 1},
-       {CMfixedpri, "fixedpri", 2},
-       {CMhang, "hang", 1},
-       {CMnohang, "nohang", 1},
-       {CMnoswap, "noswap", 1},
-       {CMkill, "kill", 1},
-       {CMpri, "pri", 2},
-       {CMprivate, "private", 1},
-       {CMprofile, "profile", 1},
-       {CMstart, "start", 1},
-       {CMstartstop, "startstop", 1},
-       {CMstartsyscall, "startsyscall", 1},
-       {CMstop, "stop", 1},
-       {CMwaitstop, "waitstop", 1},
-       {CMwired, "wired", 2},
-       {CMtrace, "trace", 0},
-       {CMcore, "core", 2},
+       {CMclosefiles, "closefiles", 0},
+       {CMhang, "hang", 0},
+       {CMstraceme, "straceme", 0},
+       {CMstraceall, "straceall", 0},
+       {CMstrace_drop, "strace_drop", 2},
 };
 
 /*
@@ -194,24 +189,31 @@ procgen(struct chan *c, char *name, struct dirtab *tab, int unused, int s,
        uint32_t path, perm, len;
        if (s == DEVDOTDOT) {
                mkqid(&qid, Qdir, 0, QTDIR);
-               devdir(c, qid, "#p", 0, eve, 0555, dp);
+               devdir(c, qid, devname(), 0, eve.name, 0555, dp);
                return 1;
        }
 
        if (c->qid.path == Qdir) {
                if (s == 0) {
-                       strncpy(get_cur_genbuf(), "trace", GENBUF_SZ);
+                       strlcpy(get_cur_genbuf(), "trace", GENBUF_SZ);
                        mkqid(&qid, Qtrace, -1, QTFILE);
-                       devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
+                       devdir(c, qid, get_cur_genbuf(), 0, eve.name, 0444, dp);
                        return 1;
                }
                if (s == 1) {
-                       strncpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
+                       strlcpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
                        mkqid(&qid, Qtracepids, -1, QTFILE);
-                       devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
+                       devdir(c, qid, get_cur_genbuf(), 0, eve.name, 0444, dp);
                        return 1;
                }
-               s -= 2;
+               if (s == 2) {
+                       p = current;
+                       strlcpy(get_cur_genbuf(), "self", GENBUF_SZ);
+                       mkqid(&qid, (p->pid + 1) << QSHIFT, p->pid, QTDIR);
+                       devdir(c, qid, get_cur_genbuf(), 0, p->user.name, DMDIR | 0555, dp);
+                       return 1;
+               }
+               s -= 3;
                if (name != NULL) {
                        /* ignore s and use name to find pid */
                        pid = strtol(name, &ename, 10);
@@ -239,24 +241,24 @@ procgen(struct chan *c, char *name, struct dirtab *tab, int unused, int s,
                 */
                if (name != NULL && strcmp(name, get_cur_genbuf()) != 0) {
                        printk("pid-name mismatch, name: %s, pid %d\n", name, pid);
-                       kref_put(&p->p_kref);
+                       proc_decref(p);
                        return -1;
                }
                mkqid(&qid, (s + 1) << QSHIFT, pid, QTDIR);
-               devdir(c, qid, get_cur_genbuf(), 0, p->user, DMDIR | 0555, dp);
-               kref_put(&p->p_kref);
+               devdir(c, qid, get_cur_genbuf(), 0, p->user.name, DMDIR | 0555, dp);
+               proc_decref(p);
                return 1;
        }
        if (c->qid.path == Qtrace) {
-               strncpy(get_cur_genbuf(), "trace", GENBUF_SZ);
+               strlcpy(get_cur_genbuf(), "trace", GENBUF_SZ);
                mkqid(&qid, Qtrace, -1, QTFILE);
-               devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
+               devdir(c, qid, get_cur_genbuf(), 0, eve.name, 0444, dp);
                return 1;
        }
        if (c->qid.path == Qtracepids) {
-               strncpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
+               strlcpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
                mkqid(&qid, Qtracepids, -1, QTFILE);
-               devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
+               devdir(c, qid, get_cur_genbuf(), 0, eve.name, 0444, dp);
                return 1;
        }
        if (s >= ARRAY_SIZE(procdir))
@@ -295,8 +297,8 @@ procgen(struct chan *c, char *name, struct dirtab *tab, int unused, int s,
 #endif
 
        mkqid(&qid, path | tab->qid.path, c->qid.vers, QTFILE);
-       devdir(c, qid, tab->name, len, p->user, perm, dp);
-       kref_put(&p->p_kref);
+       devdir(c, qid, tab->name, len, p->user.name, perm, dp);
+       proc_decref(p);
        return 1;
 }
 
@@ -352,16 +354,16 @@ static void procinit(void)
 
 static struct chan *procattach(char *spec)
 {
-       return devattach('p', spec);
+       return devattach(devname(), spec);
 }
 
 static struct walkqid *procwalk(struct chan *c, struct chan *nc, char **name,
-                                                               int nname)
+                                                               unsigned int nname)
 {
        return devwalk(c, nc, name, nname, 0, 0, procgen);
 }
 
-static int procstat(struct chan *c, uint8_t * db, int n)
+static size_t procstat(struct chan *c, uint8_t *db, size_t n)
 {
        return devstat(c, db, n, 0, 0, procgen);
 }
@@ -378,14 +380,74 @@ static void nonone(struct proc *p)
 #if 0
        if (p == up)
                return;
-       if (strcmp(current->user, "none") != 0)
+       if (strcmp(current->user.name, "none") != 0)
                return;
        if (iseve())
                return;
-       error(Eperm);
+       error(EPERM, ERROR_FIXME);
 #endif
 }
 
+struct bm_helper {
+       struct sized_alloc                      *sza;
+       size_t                                          buflen;
+};
+
+static void get_needed_sz_cb(struct vm_region *vmr, void *arg)
+{
+       struct bm_helper *bmh = (struct bm_helper*)arg;
+
+       /* ballpark estimate of a line */
+       bmh->buflen += 150;
+}
+
+static void build_maps_cb(struct vm_region *vmr, void *arg)
+{
+       struct bm_helper *bmh = (struct bm_helper*)arg;
+       struct sized_alloc *sza = bmh->sza;
+       size_t old_sofar;
+       char path_buf[MAX_FILENAME_SZ];
+       char *path;
+       unsigned long inode_nr;
+
+       if (vmr_has_file(vmr)) {
+               path = foc_abs_path(vmr->__vm_foc, path_buf, sizeof(path_buf));
+               inode_nr = 0; /* TODO: do we care about this? */
+       } else {
+               strlcpy(path_buf, "[heap]", sizeof(path_buf));
+               path = path_buf;
+               inode_nr = 0;
+       }
+
+       old_sofar = sza->sofar;
+       sza_printf(sza, "%08lx-%08lx %c%c%c%c %08x %02d:%02d %d ",
+                       vmr->vm_base, vmr->vm_end,
+                       vmr->vm_prot & PROT_READ    ? 'r' : '-',
+                       vmr->vm_prot & PROT_WRITE   ? 'w' : '-',
+                       vmr->vm_prot & PROT_EXEC    ? 'x' : '-',
+                       vmr->vm_flags & MAP_PRIVATE ? 'p' : 's',
+                       vmr_has_file(vmr) ? vmr->vm_foff : 0,
+                       vmr_has_file(vmr) ? 1 : 0,      /* VFS == 1 for major */
+                       0,
+                       inode_nr);
+       /* Align the filename to the 74th char, like Linux (73 chars so far) */
+       sza_printf(sza, "%*s", 73 - (sza->sofar - old_sofar), "");
+       sza_printf(sza, "%s\n", path);
+}
+
+static struct sized_alloc *build_maps(struct proc *p)
+{
+       struct bm_helper bmh[1];
+
+       /* Try to figure out the size needed: start with extra space, then add a bit
+        * for each VMR */
+       bmh->buflen = 150;
+       enumerate_vmrs(p, get_needed_sz_cb, bmh);
+       bmh->sza = sized_kzmalloc(bmh->buflen, MEM_WAIT);
+       enumerate_vmrs(p, build_maps_cb, bmh);
+       return bmh->sza;
+}
+
 static struct chan *procopen(struct chan *c, int omode)
 {
        ERRSTACK(2);
@@ -398,28 +460,28 @@ static struct chan *procopen(struct chan *c, int omode)
                return devopen(c, omode, 0, 0, procgen);
 
        if (QID(c->qid) == Qtrace) {
-               error("proc: Qtrace: not yet");
+               error(ENOSYS, ERROR_FIXME);
 #if 0
                if (omode != OREAD)
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                lock(&tlock);
                if (waserror()) {
                        unlock(&tlock);
                        nexterror();
                }
                if (topens > 0)
-                       error("already open");
+                       error(EFAIL, "already open");
                topens++;
                if (tevents == NULL) {
                        tevents = (Traceevent *) kzmalloc(sizeof(Traceevent) * Nevents,
-                                                                                         KMALLOC_WAIT);
+                                                                                         MEM_WAIT);
                        if (tevents == NULL)
-                               error(Enomem);
-                       tpids = kzmalloc(Ntracedpids * 20, KMALLOC_WAIT);
+                               error(ENOMEM, ERROR_FIXME);
+                       tpids = kzmalloc(Ntracedpids * 20, MEM_WAIT);
                        if (tpids == NULL) {
                                kfree(tpids);
                                tpids = NULL;
-                               error(Enomem);
+                               error(ENOMEM, ERROR_FIXME);
                        }
                        tpidsc = tpids;
                        tpidse = tpids + Ntracedpids * 20;
@@ -437,10 +499,10 @@ static struct chan *procopen(struct chan *c, int omode)
 #endif
        }
        if (QID(c->qid) == Qtracepids) {
-               error("Proc: Qtracepids: not yet");
+               error(ENOSYS, ERROR_FIXME);
 #if 0
                if (omode != OREAD)
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                c->mode = openmode(omode);
                c->flag |= COPEN;
                c->offset = 0;
@@ -448,30 +510,30 @@ static struct chan *procopen(struct chan *c, int omode)
 #endif
        }
        if ((p = pid2proc(SLOT(c->qid))) == NULL)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
        //qlock(&p->debug);
        if (waserror()) {
                //qunlock(&p->debug);
-               kref_put(&p->p_kref);
+               proc_decref(p);
                nexterror();
        }
        pid = PID(c->qid);
        if (p->pid != pid)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        omode = openmode(omode);
 
        switch (QID(c->qid)) {
                case Qtext:
-                       error("notyet");
+                       error(ENOSYS, ERROR_FIXME);
 /*
                        if (omode != OREAD)
-                               error(Eperm);
+                               error(EPERM, ERROR_FIXME);
                        tc = proctext(c, p);
                        tc->offset = 0;
                        poperror();
                        qunlock(&p->debug);
-                       kref_put(&p->p_kref);
+                       proc_decref(p);
                        cclose(c);
                        return tc;
 */
@@ -479,18 +541,18 @@ static struct chan *procopen(struct chan *c, int omode)
                case Qsegment:
                case Qprofile:
                case Qfd:
-                       if (omode != OREAD)
-                               error(Eperm);
+                       if (omode != O_READ)
+                               error(EPERM, ERROR_FIXME);
                        break;
 
                case Qnote:
 //          if (p->privatemem)
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                        break;
 
                case Qmem:
 //          if (p->privatemem)
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                        //nonone(p);
                        break;
 
@@ -506,33 +568,62 @@ static struct chan *procopen(struct chan *c, int omode)
                        break;
 
                case Qns:
-                       if (omode != OREAD)
-                               error(Eperm);
-                       c->aux = kzmalloc(sizeof(struct mntwalk), KMALLOC_WAIT);
+                       if (omode != O_READ)
+                               error(EPERM, ERROR_FIXME);
+                       c->aux = kzmalloc(sizeof(struct mntwalk), MEM_WAIT);
                        break;
+               case Quser:
                case Qstatus:
+               case Qvmstatus:
                case Qctl:
                        break;
+
+               case Qstrace:
+                       if (!p->strace)
+                               error(ENOENT, "Process does not have tracing enabled");
+                       spin_lock(&p->strace->lock);
+                       if (p->strace->tracing) {
+                               spin_unlock(&p->strace->lock);
+                               error(EBUSY, "Process is already being traced");
+                       }
+                       /* It's not critical that we reopen before setting tracing, but it's
+                        * a little cleaner (concurrent syscalls could be trying to use the
+                        * queue before it was reopened, and they'd throw). */
+                       qreopen(p->strace->q);
+                       p->strace->tracing = TRUE;
+                       spin_unlock(&p->strace->lock);
+                       /* the ref we are upping is the one we put in __proc_free, which is
+                        * the one we got from CMstrace{on,me}.  We have a ref on p, so we
+                        * know we won't free until we decref the proc. */
+                       kref_get(&p->strace->users, 1);
+                       c->aux = p->strace;
+                       break;
+               case Qstrace_traceset:
+                       if (!p->strace)
+                               error(ENOENT, "Process does not have tracing enabled");
+                       kref_get(&p->strace->users, 1);
+                       c->aux = p->strace;
+                       break;
+               case Qmaps:
+                       c->aux = build_maps(p);
+                       break;
                case Qnotepg:
-                       error("not yet");
+                       error(ENOSYS, ERROR_FIXME);
 #if 0
                        nonone(p);
                        pg = p->pgrp;
                        if (pg == NULL)
-                               error(Eprocdied);
+                               error(ESRCH, ERROR_FIXME);
                        if (omode != OWRITE || pg->pgrpid == 1)
-                               error(Eperm);
+                               error(EPERM, ERROR_FIXME);
                        c->pgrpid.path = pg->pgrpid + 1;
                        c->pgrpid.vers = p->noteid;
 #endif
                        break;
 
                default:
-                       poperror();
-                       //qunlock(&p->debug);
-                       kref_put(&p->p_kref);
                        printk("procopen %#llux\n", c->qid.path);
-                       error(Egreg);
+                       error(EINVAL, ERROR_FIXME);
        }
 
        /* Affix pid to qid */
@@ -543,63 +634,63 @@ static struct chan *procopen(struct chan *c, int omode)
        /* TODO: think about what we really want here.  In akaros, we wouldn't have
         * our pid changed like that. */
        if (p->pid != pid)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        tc = devopen(c, omode, 0, 0, procgen);
        poperror();
        //qunlock(&p->debug);
-       kref_put(&p->p_kref);
+       proc_decref(p);
        return tc;
 }
 
-static int procwstat(struct chan *c, uint8_t * db, int n)
+static size_t procwstat(struct chan *c, uint8_t *db, size_t n)
 {
        ERRSTACK(2);
-       error("procwwstat: not yet");
+       error(ENOSYS, ERROR_FIXME);
 #if 0
        struct proc *p;
        struct dir *d;
 
        if (c->qid.type & QTDIR)
-               error(Eperm);
+               error(EPERM, ERROR_FIXME);
 
        if (QID(c->qid) == Qtrace)
                return devwstat(c, db, n);
 
        if ((p = pid2proc(SLOT(c->qid))) == NULL)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
        nonone(p);
        d = NULL;
        qlock(&p->debug);
        if (waserror()) {
                qunlock(&p->debug);
-               kref_put(&p->p_kref);
+               proc_decref(p);
                kfree(d);
                nexterror();
        }
 
        if (p->pid != PID(c->qid))
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
-       if (strcmp(current->user, p->user) != 0 && strcmp(current->user, eve) != 0)
-               error(Eperm);
+       if (strcmp(current->user.name, p->user.name) != 0 && !iseve())
+               error(EPERM, ERROR_FIXME);
 
-       d = kzmalloc(sizeof(struct dir) + n, KMALLOC_WAIT);
+       d = kzmalloc(sizeof(struct dir) + n, MEM_WAIT);
        n = convM2D(db, n, &d[0], (char *)&d[1]);
        if (n == 0)
-               error(Eshortstat);
-       if (!emptystr(d->uid) && strcmp(d->uid, p->user) != 0) {
-               if (strcmp(current->user, eve) != 0)
-                       error(Eperm);
+               error(ENOENT, ERROR_FIXME);
+       if (!emptystr(d->uid) && strcmp(d->uid, p->user.name) != 0) {
+               if (!iseve())
+                       error(EPERM, ERROR_FIXME);
                else
-                       kstrdup(&p->user, d->uid);
+                       proc_set_username(p, d->uid);
        }
-       if (d->mode != ~0UL)
+       if (d->mode != -1)
                p->procmode = d->mode & 0777;
 
        poperror();
        qunlock(&p->debug);
-       kref_put(&p->p_kref);
+       proc_decref(p);
        kfree(d);
 
        return n;
@@ -710,8 +801,31 @@ static void procclose(struct chan *c)
                 */
                spin_unlock(&tlock);
        }
+       if (QID(c->qid) == Qsyscall) {
+               if (c->aux)
+                       qclose(c->aux);
+               c->aux = NULL;
+       }
        if (QID(c->qid) == Qns && c->aux != 0)
                kfree(c->aux);
+       if (QID(c->qid) == Qmaps && c->aux != 0)
+               kfree(c->aux);
+       if (QID(c->qid) == Qstrace && c->aux != 0) {
+               struct strace *s = c->aux;
+
+               assert(c->flag & COPEN);        /* only way aux should have been set */
+               s->tracing = FALSE;
+               qhangup(s->q, NULL);
+               kref_put(&s->users);
+               c->aux = NULL;
+       }
+       if (QID(c->qid) == Qstrace_traceset && c->aux != 0) {
+               struct strace *s = c->aux;
+
+               assert(c->flag & COPEN);
+               kref_put(&s->users);
+               c->aux = NULL;
+       }
 }
 
 void int2flag(int flag, char *s)
@@ -742,11 +856,11 @@ static char *argcpy(char *s, char *p)
        if (n > 128)
                n = 128;
        if (n <= 0) {
-               t = kzmalloc(1, KMALLOC_WAIT);
+               t = kzmalloc(1, MEM_WAIT);
                *t = 0;
                return t;
        }
-       t = kzmalloc(n, KMALLOC_WAIT);
+       t = kzmalloc(n, MEM_WAIT);
        tp = t;
        te = t + n;
 
@@ -780,17 +894,20 @@ static int eventsavailable(void *)
        return tproduced > tconsumed;
 }
 #endif
-static long procread(struct chan *c, void *va, long n, int64_t off)
+
+static size_t procread(struct chan *c, void *va, size_t n, off64_t off)
 {
-       ERRSTACK(5);
+       ERRSTACK(1);
        struct proc *p;
        long l, r;
        int i, j, navail, pid, rsize;
-       char flag[10], *sps, *srv, statbuf[512];
+       char flag[10], *sps, *srv;
        uintptr_t offset, u;
        int tesz;
        uint8_t *rptr;
        struct mntwalk *mw;
+       struct strace *s;
+       struct sized_alloc *sza;
 
        if (c->qid.type & QTDIR) {
                int nn;
@@ -801,339 +918,94 @@ static long procread(struct chan *c, void *va, long n, int64_t off)
        }
 
        offset = off;
-#if 0
-       if (QID(c->qid) == Qtrace) {
-               if (!eventsavailable(NULL))
-                       return 0;
-
-               rptr = va;
-               tesz = BIT32SZ + BIT32SZ + BIT64SZ + BIT32SZ;
-               navail = tproduced - tconsumed;
-               if (navail > n / tesz)
-                       navail = n / tesz;
-               while (navail > 0) {
-                       PBIT32(rptr, tevents[tconsumed & Emask].pid);
-                       rptr += BIT32SZ;
-                       PBIT32(rptr, tevents[tconsumed & Emask].etype);
-                       rptr += BIT32SZ;
-                       PBIT64(rptr, tevents[tconsumed & Emask].time);
-                       rptr += BIT64SZ;
-                       PBIT32(rptr, tevents[tconsumed & Emask].core);
-                       rptr += BIT32SZ;
-                       tconsumed++;
-                       navail--;
-               }
-               return rptr - (uint8_t *) va;
+       /* Some shit in proc doesn't need to grab the reference.  For strace, we
+        * already have the chan open, and all we want to do is read the queue,
+        * which exists because of our kref on it. */
+       switch (QID(c->qid)) {
+       case Qstrace:
+               s = c->aux;
+               n = qread(s->q, va, n);
+               return n;
+       case Qstrace_traceset:
+               s = c->aux;
+               return readmem(offset, va, n, s->trace_set,
+                              bitmap_size(MAX_SYSCALL_NR));
        }
 
-       if (QID(c->qid) == Qtracepids)
-               if (tpids == NULL)
-                       return 0;
-               else
-                       return readstr(off, va, n, tpids);
-#endif
        if ((p = pid2proc(SLOT(c->qid))) == NULL)
-               error(Eprocdied);
+               error(ESRCH, "%d: no such process", SLOT(c->qid));
        if (p->pid != PID(c->qid)) {
-               kref_put(&p->p_kref);
-               error(Eprocdied);
+               proc_decref(p);
+               error(ESRCH, "weird: p->pid is %d, PID(c->qid) is %d: mismatch",
+                     p->pid, PID(c->qid));
        }
        switch (QID(c->qid)) {
                default:
-                       kref_put(&p->p_kref);
+                       proc_decref(p);
                        break;
-#if 0
-#warning check refcnting in here
-               case Qargs:
-                       qlock(&p->debug);
-                       j = procargs(p, current->genbuf, sizeof current->genbuf);
-                       qunlock(&p->debug);
-                       kref_put(&p->p_kref);
-                       if (offset >= j)
-                               return 0;
-                       if (offset + n > j)
-                               n = j - offset;
-                       memmove(va, &current->genbuf[offset], n);
-                       return n;
-
-               case Qsyscall:
-                       if (p->syscalltrace == NULL)
-                               return 0;
-                       return readstr(offset, va, n, p->syscalltrace);
-
-               case Qcore:
-                       i = 0;
-                       ac = p->ac;
-                       wired = p->wired;
-                       if (ac != NULL)
-                               i = ac->machno;
-                       else if (wired != NULL)
-                               i = wired->machno;
-                       snprint(statbuf, sizeof statbuf, "%d\n", i);
-                       return readstr(offset, va, n, statbuf);
-
-               case Qmem:
-                       if (offset < KZERO
-                               || (offset >= USTKTOP - USTKSIZE && offset < USTKTOP)) {
-                               r = procctlmemio(p, offset, n, va, 1);
-                               kref_put(&p->p_kref);
-                               return r;
-                       }
+               case Quser: {
+                               int i;
 
-                       if (!iseve()) {
-                               kref_put(&p->p_kref);
-                               error(Eperm);
-                       }
-
-                       /* validate kernel addresses */
-                       if (offset < PTR2UINT(end)) {
-                               if (offset + n > PTR2UINT(end))
-                                       n = PTR2UINT(end) - offset;
-                               memmove(va, UINT2PTR(offset), n);
-                               kref_put(&p->p_kref);
-                               return n;
-                       }
-                       for (i = 0; i < nelem(conf.mem); i++) {
-                               cm = &conf.mem[i];
-                               /* klimit-1 because klimit might be zero! */
-                               if (cm->kbase <= offset && offset <= cm->klimit - 1) {
-                                       if (offset + n >= cm->klimit - 1)
-                                               n = cm->klimit - offset;
-                                       memmove(va, UINT2PTR(offset), n);
-                                       kref_put(&p->p_kref);
-                                       return n;
-                               }
-                       }
-                       kref_put(&p->p_kref);
-                       error(Ebadarg);
-
-               case Qprofile:
-                       s = p->seg[TSEG];
-                       if (s == 0 || s->profile == 0)
-                               error("profile is off");
-                       i = (s->top - s->base) >> LRESPROF;
-                       i *= sizeof(*s->profile);
-                       if (offset >= i) {
-                               kref_put(&p->p_kref);
-                               return 0;
-                       }
-                       if (offset + n > i)
-                               n = i - offset;
-                       memmove(va, ((char *)s->profile) + offset, n);
-                       kref_put(&p->p_kref);
-                       return n;
-
-               case Qnote:
-                       qlock(&p->debug);
-                       if (waserror()) {
-                               qunlock(&p->debug);
-                               kref_put(&p->p_kref);
-                               nexterror();
-                       }
-                       if (p->pid != PID(c->qid))
-                               error(Eprocdied);
-                       if (n < 1)      /* must accept at least the '\0' */
-                               error(Etoosmall);
-                       if (p->nnote == 0)
-                               n = 0;
-                       else {
-                               i = strlen(p->note[0].msg) + 1;
-                               if (i > n)
-                                       i = n;
-                               rptr = va;
-                               memmove(rptr, p->note[0].msg, i);
-                               rptr[i - 1] = '\0';
-                               p->nnote--;
-                               memmove(p->note, p->note + 1, p->nnote * sizeof(Note));
-                               n = i;
-                       }
-                       if (p->nnote == 0)
-                               p->notepending = 0;
-                       poperror();
-                       qunlock(&p->debug);
-                       kref_put(&p->p_kref);
-                       return n;
-
-               case Qproc:
-                       if (offset >= sizeof(struct proc)) {
-                               kref_put(&p->p_kref);
-                               return 0;
-                       }
-                       if (offset + n > sizeof(struct proc))
-                               n = sizeof(struct proc) - offset;
-                       memmove(va, ((char *)p) + offset, n);
-                       kref_put(&p->p_kref);
-                       return n;
-
-               case Qregs:
-                       rptr = (uint8_t *) p->dbgreg;
-                       rsize = sizeof(Ureg);
-regread:
-                       if (rptr == 0) {
-                               kref_put(&p->p_kref);
-                               error(Enoreg);
-                       }
-                       if (offset >= rsize) {
-                               kref_put(&p->p_kref);
-                               return 0;
-                       }
-                       if (offset + n > rsize)
-                               n = rsize - offset;
-                       memmove(va, rptr + offset, n);
-                       kref_put(&p->p_kref);
-                       return n;
-
-               case Qkregs:
-                       memset(&kur, 0, sizeof(Ureg));
-                       setkernur(&kur, p);
-                       rptr = (uint8_t *) & kur;
-                       rsize = sizeof(Ureg);
-                       goto regread;
-
-               case Qfpregs:
-                       r = fpudevprocio(p, va, n, offset, 0);
-                       kref_put(&p->p_kref);
-                       return r;
-
-               case Qstatus:
-                       if (offset >= STATSIZE) {
-                               kref_put(&p->p_kref);
-                               return 0;
-                       }
-                       if (offset + n > STATSIZE)
-                               n = STATSIZE - offset;
-
-                       sps = p->psstate;
-                       if (sps == 0)
-                               sps = statename[p->state];
-                       memset(statbuf, ' ', sizeof statbuf);
-                       j = 2 * KNAMELEN + 12;
-                       snprint(statbuf, j + 1, "%-*.*s%-*.*s%-12.11s",
-                                       KNAMELEN, KNAMELEN - 1, p->text,
-                                       KNAMELEN, KNAMELEN - 1, p->user, sps);
-
-                       for (i = 0; i < 6; i++) {
-                               l = p->time[i];
-                               if (i == TReal)
-                                       l = sys->ticks - l;
-                               l = TK2MS(l);
-                               readnum(0, statbuf + j + NUMSIZE * i, NUMSIZE, l, NUMSIZE);
-                       }
-                       /* ignore stack, which is mostly non-existent */
-                       u = 0;
-                       for (i = 1; i < NSEG; i++) {
-                               s = p->seg[i];
-                               if (s)
-                                       u += s->top - s->base;
-                       }
-                       readnum(0, statbuf + j + NUMSIZE * 6, NUMSIZE, u >> 10u, NUMSIZE);      /* wrong size */
-                       readnum(0, statbuf + j + NUMSIZE * 7, NUMSIZE, p->basepri, NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 8, NUMSIZE, p->priority,
-                                       NUMSIZE);
-
-                       /*
-                        * NIX: added # of traps, syscalls, and iccs
-                        */
-                       readnum(0, statbuf + j + NUMSIZE * 9, NUMSIZE, p->ntrap, NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 10, NUMSIZE, p->nintr, NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 11, NUMSIZE, p->nsyscall,
-                                       NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 12, NUMSIZE, p->nicc, NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 13, NUMSIZE, p->nactrap,
-                                       NUMSIZE);
-                       readnum(0, statbuf + j + NUMSIZE * 14, NUMSIZE, p->nacsyscall,
-                                       NUMSIZE);
-                       memmove(va, statbuf + offset, n);
-                       kref_put(&p->p_kref);
-                       return n;
-
-               case Qsegment:
-                       j = 0;
-                       for (i = 0; i < NSEG; i++) {
-                               sg = p->seg[i];
-                               if (sg == 0)
-                                       continue;
-                               j += sprint(statbuf + j, "%-6s %c%c %p %p %4d\n",
-                                                       sname[sg->type & SG_TYPE],
-                                                       sg->type & SG_RONLY ? 'R' : ' ',
-                                                       sg->profile ? 'P' : ' ',
-                                                       sg->base, sg->top, sg->ref);
-                       }
-                       kref_put(&p->p_kref);
-                       if (offset >= j)
-                               return 0;
-                       if (offset + n > j)
-                               n = j - offset;
-                       if (n == 0 && offset == 0)
-                               exhausted("segments");
-                       memmove(va, &statbuf[offset], n);
-                       return n;
-
-               case Qwait:
-                       if (!canqlock(&p->qwaitr)) {
-                               kref_put(&p->p_kref);
-                               error(Einuse);
-                       }
-
-                       if (waserror()) {
-                               qunlock(&p->qwaitr);
-                               kref_put(&p->p_kref);
-                               nexterror();
-                       }
-
-                       lock(&p->exl);
-                       if (up == p && p->nchild == 0 && p->waitq == 0) {
-                               unlock(&p->exl);
-                               error(Enochild);
-                       }
-                       pid = p->pid;
-                       while (p->waitq == 0) {
-                               unlock(&p->exl);
-                               rendez_sleep(&p->waitr, haswaitq, p);
-                               if (p->pid != pid)
-                                       error(Eprocdied);
-                               lock(&p->exl);
+                               i = readstr(off, va, n, p->user.name);
+                               proc_decref(p);
+                               return i;
                        }
-                       wq = p->waitq;
-                       p->waitq = wq->next;
-                       p->nwait--;
-                       unlock(&p->exl);
-
-                       poperror();
-                       qunlock(&p->qwaitr);
-                       kref_put(&p->p_kref);
-                       n = snprint(va, n, "%d %lu %lud %lud %q",
-                                               wq->w.pid,
-                                               wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
-                                               wq->w.msg);
-                       kfree(wq);
-                       return n;
-#endif
                case Qstatus:{
-                               /* the extra 2 is paranoia */
-                               char buf[8 + 1 + PROC_PROGNAME_SZ + 1 + 10 + 1 + 6 + 2];
-                               snprintf(buf, sizeof(buf),
+                               /* the old code grew the stack and was hideous.
+                                * status is not a high frequency operation; just malloc. */
+                               char *buf = kmalloc(4096, MEM_WAIT);
+                               char *s = buf, *e = buf + 4096;
+                               int i;
+
+                               s = seprintf(s, e,
                                         "%8d %-*s %-10s %6d", p->pid, PROC_PROGNAME_SZ,
                                         p->progname, procstate2str(p->state),
                                         p->ppid);
-                               kref_put(&p->p_kref);
-                               return readstr(off, va, n, buf);
+                               if (p->strace)
+                                       s = seprintf(s, e, " %d trace users %d traced procs",
+                                                    kref_refcnt(&p->strace->users),
+                                                    kref_refcnt(&p->strace->procs));
+                               proc_decref(p);
+                               i = readstr(off, va, n, buf);
+                               kfree(buf);
+                               return i;
                        }
 
+               case Qvmstatus:
+                       {
+                               size_t buflen = 50 * 65 + 2;
+                               char *buf = kmalloc(buflen, MEM_WAIT);
+                               int i, offset;
+                               offset = 0;
+                               offset += snprintf(buf + offset, buflen - offset, "{\n");
+                               for (i = 0; i < 65; i++) {
+                                       if (p->vmm.vmexits[i] != 0) {
+                                               offset += snprintf(buf + offset, buflen - offset,
+                                                                  "\"%s\":\"%lld\",\n",
+                                                                  VMX_EXIT_REASON_NAMES[i],
+                                                                  p->vmm.vmexits[i]);
+                                       }
+                               }
+                               offset += snprintf(buf + offset, buflen - offset, "}\n");
+                               proc_decref(p);
+                               n = readstr(off, va, n, buf);
+                               kfree(buf);
+                               return n;
+                       }
                case Qns:
                        //qlock(&p->debug);
                        if (waserror()) {
                                //qunlock(&p->debug);
-                               kref_put(&p->p_kref);
+                               proc_decref(p);
                                nexterror();
                        }
                        if (p->pgrp == NULL || p->pid != PID(c->qid))
-                               error(Eprocdied);
+                               error(ESRCH, ERROR_FIXME);
                        mw = c->aux;
                        if (mw->cddone) {
                                poperror();
                                //qunlock(&p->debug);
-                               kref_put(&p->p_kref);
+                               proc_decref(p);
                                return 0;
                        }
                        mntscan(mw, p);
@@ -1142,7 +1014,7 @@ regread:
                                i = snprintf(va, n, "cd %s\n", p->dot->name->s);
                                poperror();
                                //qunlock(&p->debug);
-                               kref_put(&p->p_kref);
+                               proc_decref(p);
                                return i;
                        }
                        int2flag(mw->cm->mflag, flag);
@@ -1158,21 +1030,15 @@ regread:
                                                         mw->cm->to->name->s, mw->mh->from->name->s);
                        poperror();
                        //qunlock(&p->debug);
-                       kref_put(&p->p_kref);
+                       proc_decref(p);
+                       return i;
+               case Qmaps:
+                       sza = c->aux;
+                       i = readstr(off, va, n, sza->buf);
+                       proc_decref(p);
                        return i;
-#if 0
-               case Qnoteid:
-                       r = readnum(offset, va, n, p->noteid, NUMSIZE);
-                       kref_put(&p->p_kref);
-                       return r;
-               case Qfd:
-                       r = procfds(p, va, n, offset);
-                       kref_put(&p->p_kref);
-                       return r;
-#endif
        }
-
-       error(Egreg);
+       error(EINVAL, "QID %d did not match any QIDs for #proc", QID(c->qid));
        return 0;       /* not reached */
 }
 
@@ -1211,27 +1077,28 @@ static void mntscan(struct mntwalk *mw, struct proc *p)
        runlock(&pg->ns);
 }
 
-static long procwrite(struct chan *c, void *va, long n, int64_t off)
+static size_t procwrite(struct chan *c, void *va, size_t n, off64_t off)
 {
        ERRSTACK(2);
 
        struct proc *p, *t;
        int i, id, l;
        char *args;
-       uintptr_t offset;
+       uintptr_t offset = off;
+       struct strace *s;
 
        if (c->qid.type & QTDIR)
-               error(Eisdir);
+               error(EISDIR, ERROR_FIXME);
 
        if ((p = pid2proc(SLOT(c->qid))) == NULL)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        if (waserror()) {
-               kref_put(&p->p_kref);
+               proc_decref(p);
                nexterror();
        }
        if (p->pid != PID(c->qid))
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        offset = off;
 
@@ -1239,13 +1106,13 @@ static long procwrite(struct chan *c, void *va, long n, int64_t off)
 #if 0
                case Qargs:
                        if (n == 0)
-                               error(Eshort);
+                               error(EINVAL, ERROR_FIXME);
                        if (n >= sizeof buf - strlen(p->text) - 1)
-                               error(Etoobig);
+                               error(E2BIG, ERROR_FIXME);
                        l = snprintf(buf, sizeof buf, "%s [%s]", p->text, (char *)va);
-                       args = kzmalloc(l + 1, KMALLOC_WAIT);
+                       args = kzmalloc(l + 1, MEM_WAIT);
                        if (args == NULL)
-                               error(Enomem);
+                               error(ENOMEM, ERROR_FIXME);
                        memmove(args, buf, l);
                        args[l] = 0;
                        kfree(p->args);
@@ -1256,7 +1123,7 @@ static long procwrite(struct chan *c, void *va, long n, int64_t off)
 
                case Qmem:
                        if (p->state != Stopped)
-                               error(Ebadctl);
+                               error(EINVAL, ERROR_FIXME);
 
                        n = procctlmemio(p, offset, n, va, 0);
                        break;
@@ -1267,7 +1134,7 @@ static long procwrite(struct chan *c, void *va, long n, int64_t off)
                        else if (offset + n > sizeof(Ureg))
                                n = sizeof(Ureg) - offset;
                        if (p->dbgreg == 0)
-                               error(Enoreg);
+                               error(ENODATA, ERROR_FIXME);
                        setregisters(p->dbgreg, (char *)(p->dbgreg) + offset, va, n);
                        break;
 
@@ -1278,39 +1145,42 @@ static long procwrite(struct chan *c, void *va, long n, int64_t off)
                case Qctl:
                        procctlreq(p, va, n);
                        break;
-
+               case Qstrace_traceset:
+                       s = c->aux;
+                       if (n + offset > bitmap_size(MAX_SYSCALL_NR))
+                               error(EINVAL, "strace_traceset: Short write (%llu at off %llu)",
+                                     n, offset);
+                       if (memcpy_from_user(current, (void*)s->trace_set + offset, va, n))
+                               error(EFAULT, "strace_traceset: Bad addr (%p + %llu)", va, n);
+                       break;
                default:
-                       poperror();
-                       kref_put(&p->p_kref);
-                       error("unknown qid %#llux in procwrite\n", c->qid.path);
+                       error(EFAIL, "unknown qid %#llux in procwrite\n", c->qid.path);
        }
        poperror();
-       kref_put(&p->p_kref);
+       proc_decref(p);
        return n;
-
 }
 
 struct dev procdevtab __devtab = {
-       'p',
-       "proc",
-
-       devreset,
-       procinit,
-       devshutdown,
-       procattach,
-       procwalk,
-       procstat,
-       procopen,
-       devcreate,
-       procclose,
-       procread,
-       devbread,
-       procwrite,
-       devbwrite,
-       devremove,
-       procwstat,
-       devpower,
-       devchaninfo,
+       .name = "proc",
+
+       .reset = devreset,
+       .init = procinit,
+       .shutdown = devshutdown,
+       .attach = procattach,
+       .walk = procwalk,
+       .stat = procstat,
+       .open = procopen,
+       .create = devcreate,
+       .close = procclose,
+       .read = procread,
+       .bread = devbread,
+       .write = procwrite,
+       .bwrite = devbwrite,
+       .remove = devremove,
+       .wstat = procwstat,
+       .power = devpower,
+       .chaninfo = devchaninfo,
 };
 
 #if 0
@@ -1323,15 +1193,15 @@ static struct chan *proctext(struct chan *c, struct proc *p)
 
        s = p->seg[TSEG];
        if (s == 0)
-               error(Enonexist);
+               error(ENOENT, ERROR_FIXME);
        if (p->state == Dead)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        lock(s);
        i = s->image;
        if (i == 0) {
                unlock(s);
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
        }
        unlock(s);
 
@@ -1343,19 +1213,19 @@ static struct chan *proctext(struct chan *c, struct proc *p)
 
        tc = i->c;
        if (tc == 0)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 
        /* TODO: what do you want here?  you can't get a kref and have the new val
         * be 1.  Here is the old code: if (kref_get(&tc->ref, 1) == 1 || ... ) */
        if (kref_refcnt(&tc->ref, 1) == 1 || (tc->flag & COPEN) == 0
                || tc->mode != OREAD) {
                cclose(tc);
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
        }
 
        if (p->pid != PID(c->qid)) {
                cclose(tc);
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
        }
 
        poperror();
@@ -1372,7 +1242,7 @@ void procstopwait(struct proc *p, int ctl)
        int pid;
 
        if (p->pdbg)
-               error(Einuse);
+               error(EBUSY, ERROR_FIXME);
        if (procstopped(p) || p->state == Broken)
                return;
 
@@ -1391,27 +1261,15 @@ void procstopwait(struct proc *p, int ctl)
        poperror();
        qlock(&p->debug);
        if (p->pid != pid)
-               error(Eprocdied);
+               error(ESRCH, ERROR_FIXME);
 }
 
 #endif
 static void procctlcloseone(struct proc *p, int fd)
 {
 // TODO: resolve this and sys_close
-       struct file *file = get_file_from_fd(&p->open_files, fd);
-       int retval = 0;
-       printd("%s %d\n", __func__, fd);
-       /* VFS */
-       if (file) {
-               put_file_from_fd(&p->open_files, fd);
-               kref_put(&file->f_kref);        /* Drop the ref from get_file */
-               return;
-       }
-       /* 9ns, should also handle errors (bad FD, etc) */
-       retval = sysclose(fd);
+       sysclose(fd);
        return;
-
-       //sys_close(p, fd);
 }
 
 void procctlclosefiles(struct proc *p, int all, int fd)
@@ -1425,15 +1283,38 @@ void procctlclosefiles(struct proc *p, int all, int fd)
                procctlcloseone(p, fd);
 }
 
+static void strace_shutdown(struct kref *a)
+{
+       struct strace *strace = container_of(a, struct strace, procs);
+       static const char base_msg[] = "# Traced ~%lu syscs, Dropped %lu";
+       size_t msg_len = NUMSIZE64 * 2 + sizeof(base_msg);
+       char *msg = kmalloc(msg_len, 0);
+
+       if (msg)
+               snprintf(msg, msg_len, base_msg, strace->appx_nr_sysc,
+                        atomic_read(&strace->nr_drops));
+       qhangup(strace->q, msg);
+       kfree(msg);
+}
+
+static void strace_release(struct kref *a)
+{
+       struct strace *strace = container_of(a, struct strace, users);
+
+       qfree(strace->q);
+       kfree(strace);
+}
+
 static void procctlreq(struct proc *p, char *va, int n)
 {
-       ERRSTACK(2);
+       ERRSTACK(1);
        int8_t irq_state = 0;
        int npc, pri, core;
        struct cmdbuf *cb;
        struct cmdtab *ct;
        int64_t time;
        char *e;
+       struct strace *strace;
 
        cb = parsecmd(va, n);
        if (waserror()) {
@@ -1444,36 +1325,68 @@ static void procctlreq(struct proc *p, char *va, int n)
        ct = lookupcmd(cb, proccmd, ARRAY_SIZE(proccmd));
 
        switch (ct->index) {
-               default:
-                       error("nope\n");
-                       break;
-               case CMtrace:
-                       systrace_trace_pid(p);
-                       break;
-               case CMclose:
-                       procctlclosefiles(p, 0, atoi(cb->f[1]));
-                       break;
-               case CMclosefiles:
-                       procctlclosefiles(p, 1, 0);
-                       break;
+       case CMstraceall:
+       case CMstraceme:
+       case CMstrace_drop:
+               /* common allocation.  if we inherited, we might have one already */
+               if (!p->strace) {
+                       strace = kzmalloc(sizeof(*p->strace), MEM_WAIT);
+                       spinlock_init(&strace->lock);
+                       bitmap_set(strace->trace_set, 0, MAX_SYSCALL_NR);
+                       strace->q = qopen(65536, Qmsg, NULL, NULL);
+                       /* The queue is reopened and hungup whenever we open the Qstrace
+                        * file.  This hangup might not be necessary, but is safer. */
+                       qhangup(strace->q, NULL);
+                       /* both of these refs are put when the proc is freed.  procs is for
+                        * every process that has this p->strace.  users is procs + every
+                        * user (e.g. from open()).
+                        *
+                        * it is possible to kref_put the procs kref in proc_destroy, which
+                        * would make strace's job easier (no need to do an async wait on
+                        * the child), and we wouldn't need to decref p in
+                        * procread(Qstrace).  But the downside is that proc_destroy races
+                        * with us here with the kref initialization. */
+                       kref_init(&strace->procs, strace_shutdown, 1);
+                       kref_init(&strace->users, strace_release, 1);
+                       if (!atomic_cas_ptr((void**)&p->strace, 0, strace)) {
+                               /* someone else won the race and installed strace. */
+                               qfree(strace->q);
+                               kfree(strace);
+                               error(EAGAIN, "Concurrent strace init, try again");
+                       }
+               }
+               break;
+       }
+
+       /* actually do the command. */
+       switch (ct->index) {
+       default:
+               error(EFAIL, "Command not implemented");
+               break;
+       case CMclose:
+               procctlclosefiles(p, 0, atoi(cb->f[1]));
+               break;
+       case CMclosefiles:
+               procctlclosefiles(p, 1, 0);
+               break;
 #if 0
-                       we may want this.Let us pause a proc.case CMhang:p->hang = 1;
-                       break;
+               we may want this.Let us pause a proc.case CMhang:p->hang = 1;
+               break;
 #endif
-               case CMkill:
-                       p = pid2proc(strtol(cb->f[1], 0, 0));
-                       if (!p)
-                               error("No such proc\n");
-
-                       enable_irqsave(&irq_state);
-                       proc_destroy(p);
-                       disable_irqsave(&irq_state);
-                       proc_decref(p);
-                       /* this is a little ghetto. it's not fully free yet, but we are also
-                        * slowing it down by messing with it, esp with the busy waiting on a
-                        * hyperthreaded core. */
-                       spin_on(p->env_cr3);
-                       break;
+       case CMstraceme:
+               p->strace->inherit = FALSE;
+               break;
+       case CMstraceall:
+               p->strace->inherit = TRUE;
+               break;
+       case CMstrace_drop:
+               if (!strcmp(cb->f[1], "on"))
+                       p->strace->drop_overflow = TRUE;
+               else if (!strcmp(cb->f[1], "off"))
+                       p->strace->drop_overflow = FALSE;
+               else
+                       error(EINVAL, "strace_drop takes on|off %s", cb->f[1]);
+               break;
        }
        poperror();
        kfree(cb);
@@ -1500,7 +1413,7 @@ procctlmemio(struct proc *p, uintptr_t offset, int n, void *va, int read)
        for (;;) {
                s = seg(p, offset, 1);
                if (s == 0)
-                       error(Ebadarg);
+                       error(EINVAL, ERROR_FIXME);
 
                if (offset + n >= s->top)
                        n = s->top - offset;