net: Use chan flag O_NONBLOCK for nonblocking
[akaros.git] / kern / src / net / devip.c
index 88f46c3..48f67cd 100644 (file)
@@ -1,4 +1,31 @@
-// INFERNO
+/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+ * Portions Copyright © 1997-1999 Vita Nuova Limited
+ * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
+ *                                (www.vitanuova.com)
+ * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+ *
+ * Modified for the Akaros operating system:
+ * Copyright (c) 2013-2014 The Regents of the University of California
+ * Copyright (c) 2013-2015 Google Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE. */
+
 #include <vfs.h>
 #include <kfs.h>
 #include <slab.h>
 #include <smp.h>
 #include <ip.h>
 
+struct dev ipdevtab;
+
+static char *devname(void)
+{
+       return ipdevtab.name;
+}
+
 enum {
        Qtopdir = 1,                            /* top level directory */
        Qtopbase,
        Qarp = Qtopbase,
-       Qbootp,
        Qndb,
        Qiproute,
        Qiprouter,
@@ -67,6 +100,12 @@ extern char *eve;
 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
 static void closeconv(struct conv *);
 
+static struct conv *chan2conv(struct chan *chan)
+{
+       /* That's a lot of pointers to get to the conv! */
+       return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
+}
+
 static inline int founddevdir(struct chan *c, struct qid q, char *n,
                                                          int64_t length, char *user, long perm,
                                                          struct dir *db)
@@ -79,7 +118,7 @@ static int topdirgen(struct chan *c, struct dir *dp)
 {
        struct qid q;
        mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-       snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lu", c->dev);
+       snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
        return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
 }
 
@@ -90,7 +129,7 @@ static int ip3gen(struct chan *c, int i, struct dir *dp)
        struct conv *cv;
        char *p;
 
-       cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+       cv = chan2conv(c);
        if (cv->owner == NULL)
                kstrdup(&cv->owner, eve);
        mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
@@ -159,11 +198,6 @@ static int ip1gen(struct chan *c, int i, struct dir *dp)
                case Qarp:
                        p = "arp";
                        break;
-               case Qbootp:
-                       if (bootp == NULL)
-                               return 0;
-                       p = "bootp";
-                       break;
                case Qndb:
                        p = "ndb";
                        len = strlen(f->ndb);
@@ -212,7 +246,6 @@ ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
                        s -= f->np;
                        return ip1gen(c, s + Qtopbase, dp);
                case Qarp:
-               case Qbootp:
                case Qndb:
                case Qlog:
                case Qiproute:
@@ -284,7 +317,7 @@ static struct Fs *ipgetfs(int dev)
 
        qlock(&fslock);
        if (ipfs[dev] == NULL) {
-               f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
+               f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
                rwinit(&f->rwlock);
                qlock_init(&f->iprouter.qlock);
                ip_init(f);
@@ -324,10 +357,10 @@ static struct chan *ipattach(char *spec)
 
        dev = atoi(spec);
        if (dev >= Nfs)
-               error("bad specification");
+               error(EFAIL, "bad specification");
 
        ipgetfs(dev);
-       c = devattach('I', spec);
+       c = devattach(devname(), spec);
        mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
        c->dev = dev;
 
@@ -362,12 +395,6 @@ static int should_wake(void *arg)
        return cv->incall != NULL;
 }
 
-static int m2p[] = {
-       [OREAD] 4,
-       [OWRITE] 2,
-       [ORDWR] 6
-};
-
 static struct chan *ipopen(struct chan *c, int omode)
 {
        ERRSTACK(2);
@@ -376,7 +403,8 @@ static struct chan *ipopen(struct chan *c, int omode)
        int perm;
        struct Fs *f;
 
-       perm = m2p[omode & 3];
+       /* perm is a lone rwx, not the rwx------ from the conversion */
+       perm = omode_to_rwx(omode) >> 6;
 
        f = ipfs[c->dev];
 
@@ -384,9 +412,9 @@ static struct chan *ipopen(struct chan *c, int omode)
                default:
                        break;
                case Qndb:
-                       if (omode & (OWRITE | OTRUNC) && !iseve())
-                               error(Eperm);
-                       if ((omode & (OWRITE | OTRUNC)) == (OWRITE | OTRUNC))
+                       if (omode & (O_WRITE | O_TRUNC) && !iseve())
+                               error(EPERM, ERROR_FIXME);
+                       if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
                                f->ndb[0] = 0;
                        break;
                case Qlog:
@@ -404,18 +432,17 @@ static struct chan *ipopen(struct chan *c, int omode)
                case Qremote:
                case Qlocal:
                case Qstats:
-               case Qbootp:
                case Qipselftab:
-                       if (!IS_RDONLY(omode))
-                               error(Eperm);
+                       if (omode & O_WRITE)
+                               error(EPERM, ERROR_FIXME);
                        break;
                case Qsnoop:
-                       if (!IS_RDONLY(omode))
-                               error(Eperm);
+                       if (omode & O_WRITE)
+                               error(EPERM, ERROR_FIXME);
                        p = f->p[PROTO(c->qid)];
                        cv = p->conv[CONV(c->qid)];
                        if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
-                               error(Eperm);
+                               error(EPERM, ERROR_FIXME);
                        atomic_inc(&cv->snoopers);
                        break;
                case Qclone:
@@ -429,12 +456,9 @@ static struct chan *ipopen(struct chan *c, int omode)
                        qunlock(&p->qlock);
                        poperror();
                        if (cv == NULL) {
-                               error(Enodev);
+                               error(ENODEV, ERROR_FIXME);
                                break;
                        }
-                       /* we only honor nonblock on a clone */
-                       if (c->flag & CNONBLOCK)
-                               Fsconvnonblock(cv, TRUE);
                        mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
                        break;
                case Qdata:
@@ -451,9 +475,9 @@ static struct chan *ipopen(struct chan *c, int omode)
                        }
                        if ((perm & (cv->perm >> 6)) != perm) {
                                if (strcmp(ATTACHER(c), cv->owner) != 0)
-                                       error(Eperm);
+                                       error(EPERM, ERROR_FIXME);
                                if ((perm & cv->perm) != perm)
-                                       error(Eperm);
+                                       error(EPERM, ERROR_FIXME);
 
                        }
                        cv->inuse++;
@@ -467,16 +491,32 @@ static struct chan *ipopen(struct chan *c, int omode)
                        break;
                case Qlisten:
                        cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+                       /* No permissions or Announce checks required.  We'll see if that's
+                        * a good idea or not. (the perm check would do nothing, as is,
+                        * since an O_PATH perm is 0).
+                        *
+                        * But we probably want to incref to keep the conversation around
+                        * until this FD/chan is closed.  #ip is a little weird in that
+                        * objects never really go away (high water mark for convs, you can
+                        * always find them in the ns).  I think it is possible to
+                        * namec/ipgen a chan, then have that conv close, then have that
+                        * chan be opened.  You can probably do this with a data file. */
+                       if (omode & O_PATH) {
+                               qlock(&cv->qlock);
+                               cv->inuse++;
+                               qunlock(&cv->qlock);
+                               break;
+                       }
                        if ((perm & (cv->perm >> 6)) != perm) {
                                if (strcmp(ATTACHER(c), cv->owner) != 0)
-                                       error(Eperm);
+                                       error(EPERM, ERROR_FIXME);
                                if ((perm & cv->perm) != perm)
-                                       error(Eperm);
+                                       error(EPERM, ERROR_FIXME);
 
                        }
 
                        if (cv->state != Announced)
-                               error("not announced");
+                               error(EFAIL, "not announced");
 
                        if (waserror()) {
                                closeconv(cv);
@@ -490,7 +530,7 @@ static struct chan *ipopen(struct chan *c, int omode)
                        while (nc == NULL) {
                                /* give up if we got a hangup */
                                if (qisclosed(cv->rq))
-                                       error("listen hungup");
+                                       error(EFAIL, "listen hungup");
 
                                qlock(&cv->listenq);
                                if (waserror()) {
@@ -500,10 +540,8 @@ static struct chan *ipopen(struct chan *c, int omode)
                                /* we can peek at incall without grabbing the cv qlock.  if
                                 * anything is there, it'll remain there until we dequeue it.
                                 * no one else can, since we hold the listenq lock */
-                               if (cv->nonblock && !cv->incall) {
-                                       set_errno(EAGAIN);
-                                       error("listen queue empty");
-                               }
+                               if ((c->flag & O_NONBLOCK) && !cv->incall)
+                                       error(EAGAIN, "listen queue empty");
                                /* wait for a connect */
                                rendez_sleep(&cv->listenr, should_wake, cv);
 
@@ -515,9 +553,6 @@ static struct chan *ipopen(struct chan *c, int omode)
                                        cv->incall = nc->next;
                                        mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
                                        kstrdup(&cv->owner, ATTACHER(c));
-                                       /* TODO: If we want to support something like accept4(),
-                                        * where the new conversations are nonblocking right away,
-                                        * we can do so here. */
                                }
                                qunlock(&cv->qlock);
 
@@ -545,7 +580,7 @@ static int ipwstat(struct chan *c, uint8_t * dp, int n)
        f = ipfs[c->dev];
        switch (TYPE(c->qid)) {
                default:
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                        break;
                case Qctl:
                case Qdata:
@@ -559,11 +594,11 @@ static int ipwstat(struct chan *c, uint8_t * dp, int n)
        }
        n = convM2D(dp, n, d, (char *)&d[1]);
        if (n == 0)
-               error(Eshortstat);
+               error(ENODATA, ERROR_FIXME);
        p = f->p[PROTO(c->qid)];
        cv = p->conv[CONV(c->qid)];
        if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
-               error(Eperm);
+               error(EPERM, ERROR_FIXME);
        if (!emptystr(d->uid))
                kstrdup(&cv->owner, d->uid);
        if (d->mode != ~0UL)
@@ -590,8 +625,9 @@ static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
                case Qdata:
                        proto = f->p[PROTO(ch->qid)];
                        conv = proto->conv[CONV(ch->qid)];
-                       snprintf(ret, ret_l, "Qdata, proto %s, conv idx %d", proto->name,
-                                        conv->x);
+                       snprintf(ret, ret_l, "Qdata, %s proto %s, conv idx %d",
+                                SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
+                                proto->name, conv->x);
                        break;
                case Qarp:
                        ret = "Qarp";
@@ -599,6 +635,13 @@ static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
                case Qiproute:
                        ret = "Qiproute";
                        break;
+               case Qlisten:
+                       proto = f->p[PROTO(ch->qid)];
+                       conv = proto->conv[CONV(ch->qid)];
+                       snprintf(ret, ret_l, "Qlisten, %s proto %s, conv idx %d",
+                                SLIST_EMPTY(&conv->listen_taps) ? "untapped" : "tapped",
+                                proto->name, conv->x);
+                       break;
                case Qlog:
                        ret = "Qlog";
                        break;
@@ -666,6 +709,7 @@ static void ipclose(struct chan *c)
                case Qdata:
                case Qctl:
                case Qerr:
+               case Qlisten:
                        if (c->flag & COPEN)
                                closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
                        break;
@@ -697,15 +741,13 @@ static long ipread(struct chan *ch, void *a, long n, int64_t off)
        p = a;
        switch (TYPE(ch->qid)) {
                default:
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                case Qtopdir:
                case Qprotodir:
                case Qconvdir:
                        return devdirread(ch, a, n, 0, 0, ipgen);
                case Qarp:
                        return arpread(f->arp, a, offset, n);
-               case Qbootp:
-                       return bootpread(a, offset, n);
                case Qndb:
                        return readstr(offset, a, n, f->ndb);
                case Qiproute:
@@ -753,14 +795,15 @@ static long ipread(struct chan *ch, void *a, long n, int64_t off)
                        x = f->p[PROTO(ch->qid)];
                        c = x->conv[CONV(ch->qid)];
                        sofar = (*x->state) (c, buf, Statelen - 2);
-                       sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
-                                         c->nonblock ? "on" : "off");
                        rv = readstr(offset, p, n, buf);
                        kfree(buf);
                        return rv;
                case Qdata:
                        c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
-                       return qread(c->rq, a, n);
+                       if (ch->flag & O_NONBLOCK)
+                               return qread_nonblock(c->rq, a, n);
+                       else
+                               return qread(c->rq, a, n);
                case Qerr:
                        c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
                        return qread(c->eq, a, n);
@@ -770,7 +813,7 @@ static long ipread(struct chan *ch, void *a, long n, int64_t off)
                case Qstats:
                        x = f->p[PROTO(ch->qid)];
                        if (x->stats == NULL)
-                               error("stats not implemented");
+                               error(EFAIL, "stats not implemented");
                        buf = kzmalloc(Statelen, 0);
                        (*x->stats) (x, buf, Statelen);
                        rv = readstr(offset, p, n, buf);
@@ -782,15 +825,14 @@ static long ipread(struct chan *ch, void *a, long n, int64_t off)
 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
 {
        struct conv *c;
-       struct Proto *x;
-       struct Fs *f;
 
        switch (TYPE(ch->qid)) {
                case Qdata:
-                       f = ipfs[ch->dev];
-                       x = f->p[PROTO(ch->qid)];
-                       c = x->conv[CONV(ch->qid)];
-                       return qbread(c->rq, n);
+                       c = chan2conv(ch);
+                       if (ch->flag & O_NONBLOCK)
+                               return qbread_nonblock(c->rq, n);
+                       else
+                               return qbread(c->rq, n);
                default:
                        return devbread(ch, n, offset);
        }
@@ -807,7 +849,7 @@ static void setladdr(struct conv *c)
 /*
  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
  */
-static char *setluniqueport(struct conv *c, int lport)
+static void setluniqueport(struct conv *c, int lport)
 {
        struct Proto *p;
        struct conv *xp;
@@ -828,12 +870,11 @@ static char *setluniqueport(struct conv *c, int lport)
                        && ipcmp(xp->raddr, c->raddr) == 0
                        && ipcmp(xp->laddr, c->laddr) == 0) {
                        qunlock(&p->qlock);
-                       return "address in use";
+                       error(EFAIL, "address in use");
                }
        }
        c->lport = lport;
        qunlock(&p->qlock);
-       return NULL;
 }
 
 /*
@@ -865,7 +906,7 @@ static void setlport(struct conv *c)
                                *pp = 600;
                } else
                        while (*pp < 5000)
-                               *pp = nrand(1 << 15);
+                               urandom_read(pp, sizeof(*pp));
 
                found = 0;
                for (x = 0; x < p->nc; x++) {
@@ -887,15 +928,12 @@ static void setlport(struct conv *c)
  *  set a local address and port from a string of the form
  *     [address!]port[!r]
  */
-static char *setladdrport(struct conv *c, char *str, int announcing)
+static void setladdrport(struct conv *c, char *str, int announcing)
 {
        char *p;
-       char *rv;
        uint16_t lport;
        uint8_t addr[IPaddrlen];
 
-       rv = NULL;
-
        /*
         *  ignore restricted part if it exists.  it's
         *  meaningless on local ports.
@@ -922,32 +960,31 @@ static char *setladdrport(struct conv *c, char *str, int announcing)
                        if (ipforme(c->p->f, addr))
                                ipmove(c->laddr, addr);
                        else
-                               return "not a local IP address";
+                               error(EFAIL, "not a local IP address");
                }
        }
 
        /* one process can get all connections */
        if (announcing && strcmp(p, "*") == 0) {
                if (!iseve())
-                       error(Eperm);
-               return setluniqueport(c, 0);
+                       error(EPERM, ERROR_FIXME);
+               setluniqueport(c, 0);
        }
 
        lport = atoi(p);
        if (lport <= 0)
                setlport(c);
        else
-               rv = setluniqueport(c, lport);
-       return rv;
+               setluniqueport(c, lport);
 }
 
-static char *setraddrport(struct conv *c, char *str)
+static void setraddrport(struct conv *c, char *str)
 {
        char *p;
 
        p = strchr(str, '!');
        if (p == NULL)
-               return "malformed address";
+               error(EFAIL, "malformed address");
        *p++ = 0;
        parseip(c->raddr, str);
        c->rport = atoi(p);
@@ -956,33 +993,25 @@ static char *setraddrport(struct conv *c, char *str)
                if (strstr(p, "!r") != NULL)
                        c->restricted = 1;
        }
-       return NULL;
 }
 
 /*
  *  called by protocol connect routine to set addresses
  */
-char *Fsstdconnect(struct conv *c, char *argv[], int argc)
+void Fsstdconnect(struct conv *c, char *argv[], int argc)
 {
-       char *p;
-
        switch (argc) {
                default:
-                       return "bad args to connect";
+                       error(EINVAL, "bad args to %s", __func__);
                case 2:
-                       p = setraddrport(c, argv[1]);
-                       if (p != NULL)
-                               return p;
+                       setraddrport(c, argv[1]);
                        setladdr(c);
                        setlport(c);
                        break;
                case 3:
-                       p = setraddrport(c, argv[1]);
-                       if (p != NULL)
-                               return p;
-                       p = setladdrport(c, argv[2], 0);
-                       if (p != NULL)
-                               return p;
+                       setraddrport(c, argv[1]);
+                       setladdrport(c, argv[2], 0);
+                       break;
        }
 
        if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
@@ -991,8 +1020,6 @@ char *Fsstdconnect(struct conv *c, char *argv[], int argc)
                c->ipversion = V4;
        else
                c->ipversion = V6;
-
-       return NULL;
 }
 
 /*
@@ -1009,14 +1036,12 @@ static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
        char *p;
 
        if (c->state != 0)
-               error(Econinuse);
+               error(EBUSY, ERROR_FIXME);
        c->state = Connecting;
        c->cerr[0] = '\0';
        if (x->connect == NULL)
-               error("connect not supported");
-       p = x->connect(c, cb->f, cb->nf);
-       if (p != NULL)
-               error(p);
+               error(EFAIL, "connect not supported");
+       x->connect(c, cb->f, cb->nf);
 
        qunlock(&c->qlock);
        if (waserror()) {
@@ -1028,21 +1053,22 @@ static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
        poperror();
 
        if (c->cerr[0] != '\0')
-               error(c->cerr);
+               error(EFAIL, c->cerr);
 }
 
 /*
  *  called by protocol announce routine to set addresses
  */
-char *Fsstdannounce(struct conv *c, char *argv[], int argc)
+void Fsstdannounce(struct conv *c, char *argv[], int argc)
 {
        memset(c->raddr, 0, sizeof(c->raddr));
        c->rport = 0;
        switch (argc) {
                default:
-                       return "bad args to announce";
+                       error(EINVAL, "bad args to announce");
                case 2:
-                       return setladdrport(c, argv[1], 1);
+                       setladdrport(c, argv[1], 1);
+                       break;
        }
 }
 
@@ -1060,14 +1086,12 @@ static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
        char *p;
 
        if (c->state != 0)
-               error(Econinuse);
+               error(EBUSY, ERROR_FIXME);
        c->state = Announcing;
        c->cerr[0] = '\0';
        if (x->announce == NULL)
-               error("announce not supported");
-       p = x->announce(c, cb->f, cb->nf);
-       if (p != NULL)
-               error(p);
+               error(EFAIL, "announce not supported");
+       x->announce(c, cb->f, cb->nf);
 
        qunlock(&c->qlock);
        if (waserror()) {
@@ -1079,55 +1103,54 @@ static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
        poperror();
 
        if (c->cerr[0] != '\0')
-               error(c->cerr);
+               error(EFAIL, c->cerr);
 }
 
 /*
  *  called by protocol bind routine to set addresses
  */
-char *Fsstdbind(struct conv *c, char *argv[], int argc)
+void Fsstdbind(struct conv *c, char *argv[], int argc)
 {
        switch (argc) {
                default:
-                       return "bad args to bind";
+                       error(EINVAL, "bad args to bind");
                case 2:
-                       return setladdrport(c, argv[1], 0);
+                       setladdrport(c, argv[1], 0);
+                       break;
        }
 }
 
-void Fsconvnonblock(struct conv *cv, bool onoff)
-{
-       qnonblock(cv->wq, onoff);
-       qnonblock(cv->rq, onoff);
-       cv->nonblock = onoff;
-}
-
 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
 {
-       char *p;
-
        if (x->bind == NULL)
-               p = Fsstdbind(c, cb->f, cb->nf);
+               Fsstdbind(c, cb->f, cb->nf);
        else
-               p = x->bind(c, cb->f, cb->nf);
-       if (p != NULL)
-               error(p);
+               x->bind(c, cb->f, cb->nf);
 }
 
-static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
+static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
 {
        if (cb->nf < 2)
                goto err;
-       if (!strcmp(cb->f[1], "on"))
-               Fsconvnonblock(c, TRUE);
-       else if (!strcmp(cb->f[1], "off"))
-               Fsconvnonblock(c, FALSE);
-       else
+       if (!strcmp(cb->f[1], "rd")) {
+               qhangup(cv->rq, "shutdown");
+               if (cv->p->shutdown)
+                       cv->p->shutdown(cv, SHUT_RD);
+       } else if (!strcmp(cb->f[1], "wr")) {
+               qhangup(cv->wq, "shutdown");
+               if (cv->p->shutdown)
+                       cv->p->shutdown(cv, SHUT_WR);
+       } else if (!strcmp(cb->f[1], "rdwr")) {
+               qhangup(cv->rq, "shutdown");
+               qhangup(cv->wq, "shutdown");
+               if (cv->p->shutdown)
+                       cv->p->shutdown(cv, SHUT_RDWR);
+       } else {
                goto err;
+       }
        return;
 err:
-       set_errno(EINVAL);
-       error("nonblock [on|off]");
+       error(EINVAL, "shutdown [rx|tx|rxtx]");
 }
 
 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
@@ -1162,15 +1185,14 @@ static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
 
        switch (TYPE(ch->qid)) {
                default:
-                       error(Eperm);
+                       error(EPERM, ERROR_FIXME);
                case Qdata:
                        x = f->p[PROTO(ch->qid)];
                        c = x->conv[CONV(ch->qid)];
-
-                       if (c->wq == NULL)
-                               error(Eperm);
-
-                       qwrite(c->wq, a, n);
+                       if (ch->flag & O_NONBLOCK)
+                               qwrite_nonblock(c->wq, a, n);
+                       else
+                               qwrite(c->wq, a, n);
                        break;
                case Qarp:
                        return arpwrite(f, a, n);
@@ -1193,15 +1215,15 @@ static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
                                nexterror();
                        }
                        if (cb->nf < 1)
-                               error("short control request");
+                               error(EFAIL, "short control request");
                        if (strcmp(cb->f[0], "connect") == 0)
                                connectctlmsg(x, c, cb);
                        else if (strcmp(cb->f[0], "announce") == 0)
                                announcectlmsg(x, c, cb);
                        else if (strcmp(cb->f[0], "bind") == 0)
                                bindctlmsg(x, c, cb);
-                       else if (strcmp(cb->f[0], "nonblock") == 0)
-                               nonblockctlmsg(c, cb);
+                       else if (strcmp(cb->f[0], "shutdown") == 0)
+                               shutdownctlmsg(c, cb);
                        else if (strcmp(cb->f[0], "ttl") == 0)
                                ttlctlmsg(c, cb);
                        else if (strcmp(cb->f[0], "tos") == 0)
@@ -1210,32 +1232,30 @@ static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
                                c->ignoreadvice = 1;
                        else if (strcmp(cb->f[0], "addmulti") == 0) {
                                if (cb->nf < 2)
-                                       error("addmulti needs interface address");
+                                       error(EFAIL, "addmulti needs interface address");
                                if (cb->nf == 2) {
                                        if (!ipismulticast(c->raddr))
-                                               error("addmulti for a non multicast address");
+                                               error(EFAIL, "addmulti for a non multicast address");
                                        parseip(ia, cb->f[1]);
                                        ipifcaddmulti(c, c->raddr, ia);
                                } else {
                                        parseip(ma, cb->f[2]);
                                        if (!ipismulticast(ma))
-                                               error("addmulti for a non multicast address");
+                                               error(EFAIL, "addmulti for a non multicast address");
                                        parseip(ia, cb->f[1]);
                                        ipifcaddmulti(c, ma, ia);
                                }
                        } else if (strcmp(cb->f[0], "remmulti") == 0) {
                                if (cb->nf < 2)
-                                       error("remmulti needs interface address");
+                                       error(EFAIL, "remmulti needs interface address");
                                if (!ipismulticast(c->raddr))
-                                       error("remmulti for a non multicast address");
+                                       error(EFAIL, "remmulti for a non multicast address");
                                parseip(ia, cb->f[1]);
                                ipifcremmulti(c, c->raddr, ia);
                        } else if (x->ctl != NULL) {
-                               p = x->ctl(c, cb->f, cb->nf);
-                               if (p != NULL)
-                                       error(p);
+                               x->ctl(c, cb->f, cb->nf);
                        } else
-                               error("unknown control request");
+                               error(EFAIL, "unknown control request");
                        qunlock(&c->qlock);
                        kfree(cb);
                        poperror();
@@ -1246,50 +1266,158 @@ static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
 {
        struct conv *c;
-       struct Proto *x;
-       struct Fs *f;
        int n;
 
        switch (TYPE(ch->qid)) {
                case Qdata:
-                       f = ipfs[ch->dev];
-                       x = f->p[PROTO(ch->qid)];
-                       c = x->conv[CONV(ch->qid)];
-
-                       if (c->wq == NULL)
-                               error(Eperm);
-
+                       c = chan2conv(ch);
                        if (bp->next)
                                bp = concatblock(bp);
                        n = BLEN(bp);
-                       qbwrite(c->wq, bp);
+                       if (ch->flag & O_NONBLOCK)
+                               qbwrite_nonblock(c->wq, bp);
+                       else
+                               qbwrite(c->wq, bp);
                        return n;
                default:
                        return devbwrite(ch, bp, offset);
        }
 }
 
+static void ip_wake_cb(struct queue *q, void *data, int filter)
+{
+       struct conv *conv = (struct conv*)data;
+       struct fd_tap *tap_i;
+       /* For these two, we want to ignore events on the opposite end of the
+        * queues.  For instance, we want to know when the WQ is writable.  Our
+        * writes will actually make it readable - we don't want to trigger a tap
+        * for that.  However, qio doesn't know how/why we are using a queue, or
+        * even who the ends are (hence the callbacks) */
+       if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
+               return;
+       if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
+               return;
+       /* At this point, we have an event we want to send to our taps (if any).
+        * The lock protects list integrity and the existence of the tap.
+        *
+        * Previously, I thought of using the conv qlock.  That actually breaks, due
+        * to weird usages of the qlock (someone holds it for a long time, blocking
+        * the inbound wakeup from etherread4).
+        *
+        * I opted for a spinlock for a couple reasons:
+        * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
+        * send_event).
+        * - our callers might not want to block.  A lot of network wakeups will
+        * come network processes (etherread4) or otherwise unrelated to this
+        * particular conversation.  I'd rather do something like fire off a KMSG
+        * than block those.
+        * - if fire_tap takes a while, holding the lock only slows down other
+        * events on this *same* conversation, or other tap registration.  not a
+        * huge deal. */
+       spin_lock(&conv->tap_lock);
+       SLIST_FOREACH(tap_i, &conv->data_taps, link)
+               fire_tap(tap_i, filter);
+       spin_unlock(&conv->tap_lock);
+}
+
+int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
+{
+       struct conv *conv = chan2conv(chan);
+       int ret;
+
+       #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
+                                      FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
+                                      FDTAP_FILT_ERROR)
+       #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
+
+       switch (TYPE(chan->qid)) {
+               case Qdata:
+                       if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
+                               set_errno(ENOSYS);
+                               set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
+                                          tap->filter, DEVIP_LEGAL_DATA_TAPS);
+                               return -1;
+                       }
+                       spin_lock(&conv->tap_lock);
+                       switch (cmd) {
+                               case (FDTAP_CMD_ADD):
+                                       if (SLIST_EMPTY(&conv->data_taps)) {
+                                               qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
+                                               qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
+                                       }
+                                       SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
+                                       ret = 0;
+                                       break;
+                               case (FDTAP_CMD_REM):
+                                       SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
+                                       if (SLIST_EMPTY(&conv->data_taps)) {
+                                               qio_set_wake_cb(conv->rq, 0, conv);
+                                               qio_set_wake_cb(conv->wq, 0, conv);
+                                       }
+                                       ret = 0;
+                                       break;
+                               default:
+                                       set_errno(ENOSYS);
+                                       set_errstr("Unsupported #%s data tap command %p",
+                                                  devname(), cmd);
+                                       ret = -1;
+                       }
+                       spin_unlock(&conv->tap_lock);
+                       return ret;
+               case Qlisten:
+                       if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
+                               set_errno(ENOSYS);
+                               set_errstr("Unsupported #%s listen tap %p, must be %p",
+                                          devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
+                               return -1;
+                       }
+                       spin_lock(&conv->tap_lock);
+                       switch (cmd) {
+                               case (FDTAP_CMD_ADD):
+                                       SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
+                                       ret = 0;
+                                       break;
+                               case (FDTAP_CMD_REM):
+                                       SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
+                                       ret = 0;
+                                       break;
+                               default:
+                                       set_errno(ENOSYS);
+                                       set_errstr("Unsupported #%s listen tap command %p",
+                                                  devname(), cmd);
+                                       ret = -1;
+                       }
+                       spin_unlock(&conv->tap_lock);
+                       return ret;
+               default:
+                       set_errno(ENOSYS);
+                       set_errstr("Can't tap #%s file type %d", devname(),
+                                  TYPE(chan->qid));
+                       return -1;
+       }
+}
+
 struct dev ipdevtab __devtab = {
-       'I',
-       "ip",
-
-       ipreset,
-       ipinit,
-       devshutdown,
-       ipattach,
-       ipwalk,
-       ipstat,
-       ipopen,
-       devcreate,
-       ipclose,
-       ipread,
-       ipbread,
-       ipwrite,
-       ipbwrite,
-       devremove,
-       ipwstat,
-       devpower,
-       ipchaninfo,
+       .name = "ip",
+
+       .reset = ipreset,
+       .init = ipinit,
+       .shutdown = devshutdown,
+       .attach = ipattach,
+       .walk = ipwalk,
+       .stat = ipstat,
+       .open = ipopen,
+       .create = devcreate,
+       .close = ipclose,
+       .read = ipread,
+       .bread = ipbread,
+       .write = ipwrite,
+       .bwrite = ipbwrite,
+       .remove = devremove,
+       .wstat = ipwstat,
+       .power = devpower,
+       .chaninfo = ipchaninfo,
+       .tapfd = iptapfd,
 };
 
 int Fsproto(struct Fs *f, struct Proto *p)
@@ -1344,11 +1472,14 @@ retry:
                if (c == NULL) {
                        c = kzmalloc(sizeof(struct conv), 0);
                        if (c == NULL)
-                               error(Enomem);
+                               error(ENOMEM, ERROR_FIXME);
                        qlock_init(&c->qlock);
                        qlock_init(&c->listenq);
                        rendez_init(&c->cr);
                        rendez_init(&c->listenr);
+                       SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
+                       SLIST_INIT(&c->listen_taps);
+                       spinlock_init(&c->tap_lock);
                        qlock(&c->qlock);
                        c->p = p;
                        c->x = pp - p->conv;
@@ -1356,13 +1487,14 @@ retry:
                                c->ptcl = kzmalloc(p->ptclsize, 0);
                                if (c->ptcl == NULL) {
                                        kfree(c);
-                                       error(Enomem);
+                                       error(ENOMEM, ERROR_FIXME);
                                }
                        }
                        *pp = c;
                        p->ac++;
                        c->eq = qopen(1024, Qmsg, 0, 0);
                        (*p->create) (c);
+                       assert(c->rq && c->wq);
                        break;
                }
                if (canqlock(&c->qlock)) {
@@ -1395,7 +1527,6 @@ retry:
        c->restricted = 0;
        c->ttl = MAXTTL;
        c->tos = DFLTTOS;
-       c->nonblock = FALSE;
        qreopen(c->rq);
        qreopen(c->wq);
        qreopen(c->eq);
@@ -1407,10 +1538,9 @@ retry:
 int Fsconnected(struct conv *c, char *msg)
 {
        if (msg != NULL && *msg != '\0')
-               strncpy(c->cerr, msg, sizeof(c->cerr));
+               strlcpy(c->cerr, msg, sizeof(c->cerr));
 
        switch (c->state) {
-
                case Announcing:
                        c->state = Announced;
                        break;
@@ -1437,6 +1567,17 @@ struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
        return f->t2p[proto];
 }
 
+static void fire_listener_taps(struct conv *conv)
+{
+       struct fd_tap *tap_i;
+       if (SLIST_EMPTY(&conv->listen_taps))
+               return;
+       spin_lock(&conv->tap_lock);
+       SLIST_FOREACH(tap_i, &conv->listen_taps, link)
+               fire_tap(tap_i, FDTAP_FILT_READABLE);
+       spin_unlock(&conv->tap_lock);
+}
+
 /*
  *  called with protocol locked
  */
@@ -1474,6 +1615,7 @@ struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
        qunlock(&c->qlock);
 
        rendez_wakeup(&c->listenr);
+       fire_listener_taps(c);
 
        return nc;
 }
@@ -1481,9 +1623,9 @@ struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
 {
        if (off > strlen(f->ndb))
-               error(Eio);
+               error(EIO, ERROR_FIXME);
        if (off + n >= sizeof(f->ndb) - 1)
-               error(Eio);
+               error(EIO, ERROR_FIXME);
        memmove(f->ndb + off, a, n);
        f->ndb[off + n] = 0;
        f->ndbvers++;