New and easy strace framework.
authorRonald G. Minnich <rminnich@gmail.com>
Mon, 25 Jan 2016 22:26:22 +0000 (14:26 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 3 Feb 2016 23:31:54 +0000 (18:31 -0500)
echo straceon > /proc/pid/strace
cat /proc/pid/strace

echo straceme if you don't want inheritance.
echo straceoff > /proc/pid/strace to stop it.

That's it. strace acts like a file.

and you'll see syscall info (enter and exit) for the process.

So strace is now spelled cat, dd, grep, or, well,
anything that reads files.

Inheritance is working.

But this is a very efficient way to trace processes, even better than
the tracer I wrote for Plan 9.
A single read from strace can read many system call records.

This now dumps read, write, and open information.

Included is a sample strace program which works. You can even
strace a shell now and watch the children.

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
[ various fixups, side by side with Ron! ]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/drivers/dev/proc.c
kern/include/env.h
kern/include/kdebug.h
kern/include/kthread.h
kern/include/syscall.h
kern/src/hexdump.c
kern/src/ns/sysfile.c
kern/src/process.c
kern/src/syscall.c
tests/strace.c [new file with mode: 0644]

index 102545f..b5c39d5 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the UCB release of Plan 9. It is subject to the license
  * terms in the LICENSE file found in the top-level directory of this
  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
@@ -57,6 +57,7 @@ enum {
        Qregs,
        Qsegment,
        Qstatus,
+       Qstrace,
        Qvmstatus,
        Qtext,
        Qwait,
@@ -80,13 +81,16 @@ enum {
        CMstartstop,
        CMstartsyscall,
        CMstop,
+       CMtrace,
        CMwaitstop,
        CMwired,
-       CMtrace,
        CMcore,
        CMvminit,
        CMvmstart,
        CMvmkill,
+       CMstraceme,
+       CMstraceall,
+       CMstraceoff,
 };
 
 enum {
@@ -116,6 +120,7 @@ struct dirtab procdir[] = {
        //  {"regs",        {Qregs},    sizeof(Ureg),       0000},
        {"segment", {Qsegment}, 0, 0444},
        {"status", {Qstatus}, STATSIZE, 0444},
+       {"strace", {Qstrace}, 0, 0666},
        {"vmstatus", {Qvmstatus}, 0, 0444},
        {"text", {Qtext}, 0, 0000},
        {"wait", {Qwait}, 0, 0400},
@@ -140,15 +145,18 @@ struct cmdtab proccmd[] = {
        {CMstartstop, "startstop", 1},
        {CMstartsyscall, "startsyscall", 1},
        {CMstop, "stop", 1},
+       {CMtrace, "trace", 0},
        {CMwaitstop, "waitstop", 1},
        {CMwired, "wired", 2},
-       {CMtrace, "trace", 0},
        {CMcore, "core", 2},
        {CMcore, "core", 2},
        {CMcore, "core", 2},
        {CMvminit, "vminit", 0},
        {CMvmstart, "vmstart", 0},
        {CMvmkill, "vmkill", 0},
+       {CMstraceme, "straceme", 0},
+       {CMstraceall, "straceall", 0},
+       {CMstraceoff, "straceoff", 0},
 };
 
 /*
@@ -533,6 +541,16 @@ static struct chan *procopen(struct chan *c, int omode)
                case Qvmstatus:
                case Qctl:
                        break;
+
+               case Qstrace:
+                       if (!p->strace)
+                               error(ENOENT, "Process does not have tracing enabled");
+                       /* the ref we are upping is the one we put in __proc_free, which is
+                        * the one we got from CMstrace{on,me}.  We have a ref on p, so we
+                        * know we won't free until we decref the proc. */
+                       kref_get(&p->strace->users, 1);
+                       c->aux = p->strace;
+                       break;
                case Qnotepg:
                        error(ENOSYS, ERROR_FIXME);
 #if 0
@@ -548,9 +566,6 @@ static struct chan *procopen(struct chan *c, int omode)
                        break;
 
                default:
-                       poperror();
-                       //qunlock(&p->debug);
-                       kref_put(&p->p_kref);
                        printk("procopen %#llux\n", c->qid.path);
                        error(EINVAL, ERROR_FIXME);
        }
@@ -730,8 +745,19 @@ static void procclose(struct chan *c)
                 */
                spin_unlock(&tlock);
        }
+       if (QID(c->qid) == Qsyscall) {
+               if (c->aux)
+                       qclose(c->aux);
+               c->aux = NULL;
+       }
        if (QID(c->qid) == Qns && c->aux != 0)
                kfree(c->aux);
+       if (QID(c->qid) == Qstrace && c->aux != 0) {
+               struct strace *s = c->aux;
+
+               kref_put(&s->users);
+               c->aux = NULL;
+       }
 }
 
 void int2flag(int flag, char *s)
@@ -811,6 +837,7 @@ static long procread(struct chan *c, void *va, long n, int64_t off)
        int tesz;
        uint8_t *rptr;
        struct mntwalk *mw;
+       struct strace *s;
 
        if (c->qid.type & QTDIR) {
                int nn;
@@ -853,10 +880,11 @@ static long procread(struct chan *c, void *va, long n, int64_t off)
                        return readstr(off, va, n, tpids);
 #endif
        if ((p = pid2proc(SLOT(c->qid))) == NULL)
-               error(ESRCH, ERROR_FIXME);
+               error(ESRCH, "%d: no such process", SLOT(c->qid));
        if (p->pid != PID(c->qid)) {
                kref_put(&p->p_kref);
-               error(ESRCH, ERROR_FIXME);
+               error(ESRCH, "weird: p->pid is %d, PID(c->qid) is %d: mismatch",
+                     p->pid, PID(c->qid));
        }
        switch (QID(c->qid)) {
                default:
@@ -1129,15 +1157,34 @@ regread:
                        kfree(wq);
                        return n;
 #endif
+               case Qstrace:
+                       s = c->aux;
+                       /* We need to decref, so that p can get freed while we are blocked
+                        * on the qread.  in proc_free, the strace is released that will
+                        * qhangup that will wake us up. */
+                       kref_put(&p->p_kref);
+                       n = qread(s->q, va, n);
+                       return n;
+
                case Qstatus:{
-                               /* the extra 2 is paranoia */
-                               char buf[8 + 1 + PROC_PROGNAME_SZ + 1 + 10 + 1 + 6 + 2];
-                               snprintf(buf, sizeof(buf),
+                               /* the old code grew the stack and was hideous.
+                                * status is not a high frequency operation; just malloc. */
+                               char *buf = kmalloc(4096, KMALLOC_WAIT);
+                               char *s = buf, *e = buf + 4096;
+                               int i;
+
+                               s = seprintf(s, e,
                                         "%8d %-*s %-10s %6d", p->pid, PROC_PROGNAME_SZ,
                                         p->progname, procstate2str(p->state),
                                         p->ppid);
+                               if (p->strace)
+                                       s = seprintf(s, e, " %d trace users %d traced procs",
+                                                    kref_refcnt(&p->strace->users),
+                                                    kref_refcnt(&p->strace->procs));
                                kref_put(&p->p_kref);
-                               return readstr(off, va, n, buf);
+                               i = readstr(off, va, n, buf);
+                               kfree(buf);
+                               return i;
                        }
 
                case Qvmstatus:
@@ -1210,7 +1257,8 @@ regread:
 #endif
        }
 
-       error(EINVAL, ERROR_FIXME);
+
+       error(EINVAL, "QID %d did not match any QIDs for #proc", QID(c->qid));
        return 0;       /* not reached */
 }
 
@@ -1317,15 +1365,21 @@ static long procwrite(struct chan *c, void *va, long n, int64_t off)
                        procctlreq(p, va, n);
                        break;
 
+               /* this lets your write a marker into the data stream,
+                * which is a very powerful tool. */
+               case Qstrace:
+                       assert(c->aux);
+                       /* it is possible that the q hungup and is closed.  that would be
+                        * the case if all of the procs closed and decref'd.  if the q is
+                        * closed, qwrite() will throw an error. */
+                       n = qwrite(((struct strace*)c->aux)->q, va, n);
+                       break;
                default:
-                       poperror();
-                       kref_put(&p->p_kref);
                        error(EFAIL, "unknown qid %#llux in procwrite\n", c->qid.path);
        }
        poperror();
        kref_put(&p->p_kref);
        return n;
-
 }
 
 struct dev procdevtab __devtab = {
@@ -1462,6 +1516,21 @@ void procctlclosefiles(struct proc *p, int all, int fd)
                procctlcloseone(p, fd);
 }
 
+static void strace_shutdown(struct kref *a)
+{
+       struct strace *strace = container_of(a, struct strace, procs);
+
+       qhangup(strace->q, "No more traces");
+}
+
+static void strace_release(struct kref *a)
+{
+       struct strace *strace = container_of(a, struct strace, users);
+
+       qfree(strace->q);
+       kfree(strace);
+}
+
 static void procctlreq(struct proc *p, char *va, int n)
 {
        ERRSTACK(1);
@@ -1471,6 +1540,7 @@ static void procctlreq(struct proc *p, char *va, int n)
        struct cmdtab *ct;
        int64_t time;
        char *e;
+       struct strace *strace;
 
        cb = parsecmd(va, n);
        if (waserror()) {
@@ -1481,40 +1551,81 @@ static void procctlreq(struct proc *p, char *va, int n)
        ct = lookupcmd(cb, proccmd, ARRAY_SIZE(proccmd));
 
        switch (ct->index) {
-               case CMvmstart:
-               case CMvmkill:
-               default:
-                       error(EFAIL, "nope\n");
-                       break;
-               case CMtrace:
-                       systrace_trace_pid(p);
-                       break;
-               case CMclose:
-                       procctlclosefiles(p, 0, atoi(cb->f[1]));
-                       break;
-               case CMclosefiles:
-                       procctlclosefiles(p, 1, 0);
-                       break;
+       case CMstraceall:
+       case CMstraceme:
+               /* common allocation.  if we inherited, we might have one already */
+               if (!p->strace) {
+                       strace = kmalloc(sizeof(*p->strace), KMALLOC_WAIT);
+                       strace->q = qopen(65536, Qdropoverflow|Qcoalesce, NULL, NULL);
+                       /* both of these refs are put when the proc is freed.  procs is for
+                        * every process that has this p->strace.  users is procs + every
+                        * user (e.g. from open()).
+                        *
+                        * it is possible to kref_put the procs kref in proc_destroy, which
+                        * would make strace's job easier (no need to do an async wait on
+                        * the child), and we wouldn't need to decref p in
+                        * procread(Qstrace).  But the downside is that proc_destroy races
+                        * with us here with the kref initialization. */
+                       kref_init(&strace->procs, strace_shutdown, 1);
+                       kref_init(&strace->users, strace_release, 1);
+                       if (!atomic_cas_ptr((void**)&p->strace, 0, strace)) {
+                               /* someone else won the race and installed strace. */
+                               qfree(strace->q);
+                               kfree(strace);
+                               error(EAGAIN, "Concurrent strace init, try again");
+                       }
+               }
+               break;
+       }
+
+       /* actually do the command. */
+       switch (ct->index) {
+       case CMvmstart:
+       case CMvmkill:
+       default:
+               error(EFAIL, "Command not implemented");
+               break;
+       case CMtrace:
+               systrace_trace_pid(p);
+               break;
+       case CMclose:
+               procctlclosefiles(p, 0, atoi(cb->f[1]));
+               break;
+       case CMclosefiles:
+               procctlclosefiles(p, 1, 0);
+               break;
 #if 0
-                       we may want this.Let us pause a proc.case CMhang:p->hang = 1;
-                       break;
+               we may want this.Let us pause a proc.case CMhang:p->hang = 1;
+               break;
 #endif
-               case CMkill:
-                       p = pid2proc(strtol(cb->f[1], 0, 0));
-                       if (!p)
-                               error(EFAIL, "No such proc\n");
-
-                       enable_irqsave(&irq_state);
-                       proc_destroy(p);
-                       disable_irqsave(&irq_state);
-                       proc_decref(p);
-                       /* this is a little ghetto. it's not fully free yet, but we are also
-                        * slowing it down by messing with it, esp with the busy waiting on a
-                        * hyperthreaded core. */
-                       spin_on(p->env_cr3);
-                       break;
-               case CMvminit:
-                       break;
+       case CMkill:
+               p = pid2proc(strtol(cb->f[1], 0, 0));
+               if (!p)
+                       error(EFAIL, "No such proc\n");
+
+               enable_irqsave(&irq_state);
+               proc_destroy(p);
+               disable_irqsave(&irq_state);
+               proc_decref(p);
+               /* this is a little ghetto. it's not fully free yet, but we are also
+                * slowing it down by messing with it, esp with the busy waiting on a
+                * hyperthreaded core. */
+               spin_on(p->env_cr3);
+               break;
+       case CMvminit:
+               break;
+       case CMstraceme:
+               p->strace_on = TRUE;
+               p->strace_inherit = FALSE;
+               break;
+       case CMstraceall:
+               p->strace_on = TRUE;
+               p->strace_inherit = TRUE;
+               break;
+       case CMstraceoff:
+               p->strace_on = FALSE;
+               p->strace_inherit = FALSE;
+               break;
        }
        poperror();
        kfree(cb);
index 66fa3b3..4630787 100644 (file)
@@ -118,6 +118,10 @@ struct proc {
 
        /* VMMCP */
        struct vmm vmm;
+
+       struct strace                           *strace;
+       bool                                            strace_on;
+       bool                                            strace_inherit;
 };
 
 /* Til we remove all Env references */
index 0a11754..bf4f474 100644 (file)
@@ -62,7 +62,7 @@ void __print_func_exit(const char *func, const char *file);
 #define print_func_exit() __print_func_exit(__FUNCTION__, __FILE__)
 void hexdump(void *v, int length);
 void pahexdump(uintptr_t pa, int length);
-int printdump(char *buf, int buflen, uint8_t *data);
+int printdump(char *buf, int numprint, int buflen, uint8_t *data);
 
 extern bool printx_on;
 void set_printx(int mode);
index 0776992..0b9f4bf 100644 (file)
@@ -46,6 +46,7 @@ struct kthread {
        char                                            *name;
        char                                            generic_buf[GENBUF_SZ];
        struct systrace_record          *trace;
+       struct systrace_record          *strace;
 };
 
 /* Semaphore for kthreads to sleep on.  0 or less means you need to sleep */
index 94407e1..8ecdc4f 100644 (file)
@@ -5,6 +5,8 @@
 
 #include <ros/common.h>
 #include <process.h>
+#include <kref.h>
+#include <ns.h>
 
 #define SYSTRACE_ON                                    0x01
 #define SYSTRACE_LOUD                          0x02
@@ -40,6 +42,15 @@ struct systrace_record {
        uint8_t                 data[SYSTR_RECORD_SZ - sizeof(struct systrace_record_anon)];
 };
 
+struct strace {
+       bool opened;
+       bool tracing;
+       bool inherit;
+       struct kref procs; /* when procs goes to zero, q is hung up. */
+       struct kref users; /* when users goes to zero, q and struct are freed. */
+       struct queue *q;
+};
+
 /* Syscall table */
 typedef intreg_t (*syscall_t)(struct proc *, uintreg_t, uintreg_t, uintreg_t,
                               uintreg_t, uintreg_t, uintreg_t);
index 02281e4..6b81d42 100644 (file)
@@ -72,11 +72,17 @@ void pahexdump(uintptr_t pa, int len)
 }
 
 /* Print a string, with printables preserved, and \xxx where not possible. */
-int printdump(char *buf, int buflen, uint8_t *data)
+int printdump(char *buf, int numprint, int buflen, uint8_t *data)
 {
        int ret = 0;
        int ix = 0;
-       while (ret < buflen) {
+
+       if (buflen < 1)
+               return ret;
+       buf[ret++] = '\'';
+       /* we want 2 bytes left in the buf (which is ret < buflen - 1), one for the
+        * char, and one for the \' after the loop. */
+       while (ix < numprint && ret < (buflen - 1)) {
                if (isprint(data[ix])) {
                        buf[ret++] = data[ix];
                } else if (ret < buflen - 4) {
@@ -87,5 +93,6 @@ int printdump(char *buf, int buflen, uint8_t *data)
                }
                ix++;
        }
+       buf[ret++] = '\'';
        return ret;
 }
index 11f3639..6bad857 100644 (file)
@@ -79,7 +79,8 @@ struct chan *fdtochan(struct fd_table *fdt, int fd, int mode, int chkmnt,
        if ((mode & c->mode) != mode) {
                if (iref)
                        cclose(c);
-               error(EBADF, "FD access mode failure: chan mode 0x%x, wanted 0x%x",
+               error(EBADF,
+                     "FD access mode failure: chan mode 0x%x, wanted 0x%x (opened with 0 instead of O_READ?)",
                      c->mode, mode);
        }
        return c;
index 38f54d7..dd561ca 100644 (file)
@@ -211,7 +211,7 @@ struct proc *pid_nth(unsigned int n)
                 * so continue
                 */
 
-               if (kref_get_not_zero(&p->p_kref, 1)){
+               if (kref_get_not_zero(&p->p_kref, 1)) {
                        /* this one counts */
                        if (! n){
                                printd("pid_nth: at end, p %p\n", p);
@@ -220,7 +220,7 @@ struct proc *pid_nth(unsigned int n)
                        kref_put(&p->p_kref);
                        n--;
                }
-               if (!hashtable_iterator_advance(iter)){
+               if (!hashtable_iterator_advance(iter)) {
                        p = NULL;
                        break;
                }
@@ -468,6 +468,10 @@ static void __proc_free(struct kref *kref)
        assert(kref_refcnt(&p->p_kref) == 0);
        assert(TAILQ_EMPTY(&p->alarmset.list));
 
+       if (p->strace) {
+               kref_put(&p->strace->procs);
+               kref_put(&p->strace->users);
+       }
        __vmm_struct_cleanup(p);
        p->progname[0] = 0;
        free_path(p, p->binary_path);
index db74250..1615dc5 100644 (file)
@@ -54,13 +54,17 @@ static size_t systrace_fill_pretty_buf(struct systrace_record *trace)
        size_t len = 0;
        struct timespec ts_start;
        struct timespec ts_end;
+       char what = 'X';
        tsc2timespec(trace->start_timestamp, &ts_start);
        tsc2timespec(trace->end_timestamp, &ts_end);
+       if (trace->end_timestamp == 0)
+               what = 'E';
 
        len = snprintf(trace->pretty_buf, SYSTR_PRETTY_BUF_SZ - len,
-                  "[%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
+                  "%c [%7d.%09d]-[%7d.%09d] Syscall %3d (%12s):(0x%llx, 0x%llx, "
                   "0x%llx, 0x%llx, 0x%llx, 0x%llx) ret: 0x%llx proc: %d core: %d "
                   "vcore: %d data: ",
+                  what,
                   ts_start.tv_sec,
                   ts_start.tv_nsec,
                   ts_end.tv_sec,
@@ -77,25 +81,95 @@ static size_t systrace_fill_pretty_buf(struct systrace_record *trace)
                   trace->pid,
                   trace->coreid,
                   trace->vcoreid);
-       /* if we have extra data, print it out on the next line, lined up nicely.
-        * this is only useful for looking at the dump in certain terminals.  if we
-        * have a tool that processes the info, we shouldn't do this. */
-       if (trace->datalen)
-               len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len,
-                               "\n%67s", "");
+
        len += printdump(trace->pretty_buf + len,
-                        MIN(trace->datalen, SYSTR_PRETTY_BUF_SZ - len - 1),
+                        trace->datalen,
+                        SYSTR_PRETTY_BUF_SZ - len - 1,
                         trace->data);
        len += snprintf(trace->pretty_buf + len, SYSTR_PRETTY_BUF_SZ - len, "\n");
        return len;
 }
 
+static struct systrace_record *sctrace(struct systrace_record *trace,
+                                       struct proc *p, struct syscall *sysc)
+{
+       int n;
+       uintreg_t cp = 0;
+       int datalen = 0;
+
+       assert(p->strace);
+
+       if (!trace) {
+               // TODO: could we allocb and then write that block?
+               // Still, if we're tracing, we take a hit, and this is so
+               // much more efficient than strace it's not clear we care.
+               trace = kmalloc(SYSTR_BUF_SZ, 0);
+
+               if (!trace)
+                       return NULL;
+
+               int coreid, vcoreid;
+               struct proc *p = current;
+
+               coreid = core_id();
+               vcoreid = proc_get_vcoreid(p);
+
+               // TODO: functionalize this, if we decide this
+               // approach is OK.
+               trace->start_timestamp = read_tsc();
+               trace->end_timestamp = 0;
+               trace->syscallno = sysc->num;
+               trace->arg0 = sysc->arg0;
+               trace->arg1 = sysc->arg1;
+               trace->arg2 = sysc->arg2;
+               trace->arg3 = sysc->arg3;
+               trace->arg4 = sysc->arg4;
+               trace->arg5 = sysc->arg5;
+               trace->pid = p->pid;
+               trace->coreid = coreid;
+               trace->vcoreid = vcoreid;
+               trace->pretty_buf = (char*)trace + sizeof(struct systrace_record);
+               trace->datalen = 0;
+               trace->data[0] = 0;
+               switch (sysc->num) {
+               case SYS_write:
+                       cp = sysc->arg1;
+                       datalen = sysc->arg2;
+                       break;
+               case SYS_openat:
+                       cp = sysc->arg1;
+                       datalen = sysc->arg2;
+                       break;
+               }
+       } else {
+               trace->end_timestamp = read_tsc();
+               trace->retval = sysc->retval;
+               switch (sysc->num) {
+               case SYS_read:
+                       cp = sysc->arg1;
+                       datalen = sysc->retval < 0 ? 0 : sysc->retval;
+                       break;
+               }
+
+       }
+
+       trace->datalen = MIN(sizeof(trace->data), datalen);
+       memmove(trace->data, (void *)cp, trace->datalen);
+       n = systrace_fill_pretty_buf(trace);
+       qwrite(p->strace->q, trace->pretty_buf, n);
+       return trace;
+}
+
 static void systrace_start_trace(struct kthread *kthread, struct syscall *sysc)
 {
        struct systrace_record *trace;
        int coreid, vcoreid;
        struct proc *p = current;
 
+       if (p->strace_on)
+               kthread->strace = sctrace(NULL, p, sysc);
+
+       /* TODO: merge these two types of tracing, or just remove this old one */
        if (!__trace_this_proc(p))
                return;
        assert(!kthread->trace);        /* catch memory leaks */
@@ -133,6 +207,7 @@ static void systrace_finish_trace(struct kthread *kthread, long retval)
 {
        struct systrace_record *trace = kthread->trace;
        size_t pretty_len;
+
        if (trace) {
                trace->end_timestamp = read_tsc();
                trace->retval = retval;
@@ -143,6 +218,12 @@ static void systrace_finish_trace(struct kthread *kthread, long retval)
                        printk("EXIT %s", trace->pretty_buf);
                kfree(trace);
        }
+       /* TODO: merge with or remove the old tracer */
+       if (kthread->strace) {
+               sctrace(kthread->strace, current, kthread->sysc);
+               kfree(kthread->strace);
+               kthread->strace = 0;
+       }
 }
 
 #ifdef CONFIG_SYSCALL_STRING_SAVING
@@ -560,6 +641,19 @@ static pid_t sys_getpid(struct proc *p)
        return p->pid;
 }
 
+/* Helper for proc_create and fork */
+static void inherit_strace(struct proc *parent, struct proc *child)
+{
+       if (parent->strace && parent->strace_inherit) {
+               /* Refcnt on both, put in the child's ->strace. */
+               kref_get(&parent->strace->users, 1);
+               kref_get(&parent->strace->procs, 1);
+               child->strace = parent->strace;
+               child->strace_on = TRUE;
+               child->strace_inherit = TRUE;
+       }
+}
+
 /* Creates a process from the file 'path'.  The process is not runnable by
  * default, so it needs it's status to be changed so that the next call to
  * schedule() will try to run it. */
@@ -609,6 +703,7 @@ static int sys_proc_create(struct proc *p, char *path, size_t path_l,
                set_errstr("Failed to alloc new proc");
                goto error_proc_alloc;
        }
+       inherit_strace(p, new_p);
        /* close the CLOEXEC ones, even though this isn't really an exec */
        close_fdt(&new_p->open_files, TRUE);
        /* Load the elf. */
@@ -760,6 +855,8 @@ static ssize_t sys_fork(env_t* e)
        env->heap_top = e->heap_top;
        env->env_flags = e->env_flags;
 
+       inherit_strace(e, env);
+
        /* In general, a forked process should be a fresh process, and we copy over
         * whatever stuff is needed between procinfo/procdata. */
        *env->procdata = *e->procdata;
@@ -1821,7 +1918,7 @@ intreg_t sys_readlink(struct proc *p, char *path, size_t path_l,
 
        if (symname){
                copy_amt = strnlen(symname, buf_l - 1) + 1;
-               if (! memcpy_to_user_errno(p, u_buf, symname, copy_amt))
+               if (!memcpy_to_user_errno(p, u_buf, symname, copy_amt))
                        ret = copy_amt - 1;
        }
        if (path_d)
@@ -2322,7 +2419,7 @@ intreg_t sys_rename(struct proc *p, char *old_path, size_t old_path_l,
        }
 
        mlen = convD2M(&dir, mbuf, sizeof(mbuf));
-       if (! mlen) {
+       if (!mlen) {
                printk("convD2M failed\n");
                set_errno(EINVAL);
                goto done;
@@ -2515,6 +2612,7 @@ const struct sys_table_entry syscall_table[] = {
        [SYS_tap_fds] = {(syscall_t)sys_tap_fds, "tap_fds"},
 };
 const int max_syscall = sizeof(syscall_table)/sizeof(syscall_table[0]);
+
 /* Executes the given syscall.
  *
  * Note tf is passed in, which points to the tf of the context on the kernel
@@ -2566,6 +2664,7 @@ intreg_t syscall(struct proc *p, uintreg_t sc_num, uintreg_t a0, uintreg_t a1,
 void run_local_syscall(struct syscall *sysc)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+       struct proc *p = pcpui->cur_proc;
 
        /* In lieu of pinning, we just check the sysc and will PF on the user addr
         * later (if the addr was unmapped).  Which is the plan for all UMEM. */
diff --git a/tests/strace.c b/tests/strace.c
new file mode 100644 (file)
index 0000000..1432c89
--- /dev/null
@@ -0,0 +1,93 @@
+/* Copyright (c) 2016 Google Inc., All Rights Reserved.
+ * Ron Minnich <rminnich@google.com>
+ * See LICENSE for details. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <parlib/parlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <iplib/iplib.h>
+#include <iplib/icmp.h>
+#include <ctype.h>
+#include <pthread.h>
+#include <parlib/spinlock.h>
+#include <parlib/timing.h>
+#include <parlib/tsc-compat.h>
+#include <parlib/printf-ext.h>
+#include <benchutil/alarm.h>
+#include <ndblib/ndb.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+
+void usage(void)
+{
+       fprintf(stderr, "usage: strace command [args...]\n");
+       exit(1);
+}
+
+void main(int argc, char **argv, char **envp)
+{
+       int fd;
+       int pid;
+       int amt;
+       static char p[2 * MAX_PATH_LEN];
+       static char buf[16384];
+       struct syscall sysc;
+       char *prog_name = argv[1];
+
+
+       if (argc < 2)
+               usage();
+       if ((*argv[1] != '/') && (*argv[1] != '.')) {
+               snprintf(p, sizeof(p), "/bin/%s", argv[1]);
+               prog_name = p;
+       }
+
+       pid = sys_proc_create(prog_name, strlen(prog_name), argv + 1, envp,
+                             PROC_DUP_FGRP);
+       if (pid < 0) {
+               perror("proc_create");
+               exit(-1);
+       }
+       /* We need to wait on the child asynchronously.  If we hold a ref (as the
+        * parent), the child won't proc_free and that won't hangup/wake us from a
+        * read. */
+       syscall_async(&sysc, SYS_waitpid, pid, NULL, 0, 0, 0, 0);
+
+       snprintf(p, sizeof(p), "/proc/%d/ctl", pid);
+       fd = open(p, O_WRITE);
+       if (fd < 0) {
+               fprintf(stderr, "open %s: %r\n", p);
+               exit(1);
+       }
+
+       snprintf(p, sizeof(p), "straceall");
+       if (write(fd, p, strlen(p)) < strlen(p)) {
+               fprintf(stderr, "write to ctl %s %d: %r\n", p, fd);
+               exit(1);
+       }
+       close(fd);
+
+       snprintf(p, sizeof(p), "/proc/%d/strace", pid);
+       fd = open(p, O_READ);
+       if (fd < 0) {
+               fprintf(stderr, "open %s: %r\n", p);
+               exit(1);
+       }
+
+       /* now that we've set up the tracing, we can run the process.  isn't it
+        * great that the process doesn't immediately start when you make it? */
+       sys_proc_run(pid);
+
+       while ((amt = read(fd, buf, sizeof(buf))) > 0) {
+               if (write(1, buf, amt) < amt) {
+                       fprintf(stderr, "Write to stdout: %r\n");
+                       exit(1);
+               }
+       }
+       if ((amt < 0) && (errno != ESRCH))
+               fprintf(stderr, "Read fd %d for %s: %r\n", fd, p);
+}