9ns: make kstrdup() actually atomic
[akaros.git] / kern / src / ns / chan.c
index 3b9193b..af83d1b 100644 (file)
@@ -26,8 +26,6 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE. */
 
-#include <vfs.h>
-#include <kfs.h>
 #include <slab.h>
 #include <kmalloc.h>
 #include <kref.h>
@@ -40,6 +38,8 @@
 #include <smp.h>
 #include <syscall.h>
 
+struct chan *kern_slash;
+
 char *channame(struct chan *c)
 {      /* DEBUGGING */
        if (c == NULL)
@@ -65,7 +65,7 @@ struct {
 typedef struct Elemlist Elemlist;
 
 struct Elemlist {
-       char *name;                                     /* copy of name, so '/' can be overwritten */
+       char *name;             /* copy of name, so '/' can be overwritten */
        int ARRAY_SIZEs;
        char **elems;
        int *off;
@@ -108,7 +108,8 @@ void kstrdup(char **p, char *s)
        char *t, *prev;
 
        n = strlen(s) + 1;
-       /* if it's a user, we can wait for memory; if not, something's very wrong */
+       /* if it's a user, we can wait for memory; if not, something's very
+        * wrong */
        if (current) {
                t = kzmalloc(n, 0);
        } else {
@@ -117,8 +118,8 @@ void kstrdup(char **p, char *s)
                        panic("kstrdup: no memory");
        }
        memmove(t, s, n);
-       prev = *p;
-       *p = t;
+
+       prev = atomic_swap_ptr((void**)p, t);
        kfree(prev);
 }
 
@@ -158,6 +159,14 @@ static void chan_release(struct kref *kref)
 {
        struct chan *c = container_of(kref, struct chan, ref);
        ERRSTACK(1);
+
+       /* We can be called from RCU callbacks, but close methods can block.  In
+        * those cases, and any other context that cannot block, we need to
+        * defer our work to a kernel message. */
+       if (!can_block(this_pcpui_ptr())) {
+               run_as_rkm(chan_release, kref);
+               return;
+       }
        /* this style discards the error from close().  picture it as
         * if (waserror()) { } else { close(); } chanfree_no_matter_what();  */
        if (!waserror()) {
@@ -193,8 +202,8 @@ struct chan *newchan(void)
                qlock_init(&c->umqlock);
        }
 
-       /* if you get an error before associating with a dev, cclose skips calling
-        * the dev's close */
+       /* if you get an error before associating with a dev, cclose skips
+        * calling the dev's close */
        c->type = -1;
        c->flag = 0;
        kref_init(&c->ref, chan_release, 1);
@@ -271,7 +280,8 @@ struct cname *addelem(struct cname *n, char *s)
                n->s = t;
                n->alen = a;
        }
-       if (n->len > 0 && n->s[n->len - 1] != '/' && s[0] != '/')       /* don't insert extra slash if one is present */
+       /* don't insert extra slash if one is present */
+       if (n->len > 0 && n->s[n->len - 1] != '/' && s[0] != '/')
                n->s[n->len++] = '/';
        memmove(n->s + n->len, s, i + 1);
        n->len += i;
@@ -321,7 +331,7 @@ void cclose(struct chan *c)
                return;
 
        if (c->flag & CFREE)
-               panic("cclose %p", getcallerpc(&c));
+               panic("double cclose %p.  maybe kfunc channame and hexdump", c);
 
        kref_put(&c->ref);
 }
@@ -383,6 +393,7 @@ int eqchantdqid(struct chan *a, int type, int dev, struct qid qid, int pathonly)
 static void mh_release(struct kref *kref)
 {
        struct mhead *mh = container_of(kref, struct mhead, ref);
+
        mh->mount = (struct mount *)0xCafeBeef;
        kfree(mh);
 }
@@ -415,7 +426,18 @@ int cmount(struct chan *new, struct chan *old, int flag, char *spec)
        struct mhead *m, **l, *mh;
        struct mount *nm, *f, *um, **h;
 
-       if (QTDIR & (old->qid.type ^ new->qid.type))
+       /* Can't bind pointing to a symlink, since it vastly complicates namec
+        * and walk.  In particular, walk() only follows mounts on the
+        * intermediate path elements.  Grep 'ntry - 1'.  Because of that,
+        * walk() can end on a symlink.  Having domount() follow symlinks is a
+        * pain: undomount. */
+       if (new->qid.type & QTSYMLINK)
+               error(EINVAL, "cannot bind a symlink");
+
+       /* Can bind anything onto a symlink's name.  Otherwise, both the old and
+        * the new must agree on whether or not it is a directory. */
+       if (!(old->qid.type & QTSYMLINK) &&
+           (QTDIR & (old->qid.type ^ new->qid.type)))
                error(EINVAL, ERROR_FIXME);
 
        if (old->umh)
@@ -607,13 +629,44 @@ struct chan *cclone(struct chan *c)
        return nc;
 }
 
-int
-findmount(struct chan **cp,
-                 struct mhead **mp, int type, int dev, struct qid qid)
+/* Helper: is something mounted on the chan? */
+static bool is_mount_point(struct chan *c)
 {
        struct pgrp *pg;
        struct mhead *m;
+       int type = c->type;
+       int dev = c->dev;
+       struct qid qid = c->qid;
 
+       if (!current)
+               return false;
+       pg = current->pgrp;
+       rlock(&pg->ns);
+       for (m = MOUNTH(pg, qid); m; m = m->hash) {
+               rlock(&m->lock);
+               if (!m->from) {
+                       runlock(&m->lock);
+                       continue;
+               }
+               if (eqchantdqid(m->from, type, dev, qid, 1)) {
+                       runlock(&m->lock);
+                       runlock(&pg->ns);
+                       return true;
+               }
+               runlock(&m->lock);
+       }
+       runlock(&pg->ns);
+       return false;
+}
+
+int findmount(struct chan **cp, struct mhead **mp, int type, int dev,
+             struct qid qid)
+{
+       struct pgrp *pg;
+       struct mhead *m;
+
+       if (!current)
+               return 0;
        pg = current->pgrp;
        rlock(&pg->ns);
        for (m = MOUNTH(pg, qid); m; m = m->hash) {
@@ -658,6 +711,8 @@ struct chan *undomount(struct chan *c, struct cname *name)
        struct mount *t;
        struct mhead **h, **he, *f;
 
+       if (!current)
+               return c;
        pg = current->pgrp;
        rlock(&pg->ns);
        if (waserror()) {
@@ -673,12 +728,14 @@ struct chan *undomount(struct chan *c, struct cname *name)
                        for (t = f->mount; t; t = t->next) {
                                if (eqchan(c, t->to, 1)) {
                                        /*
-                                        * We want to come out on the left hand side of the mount
-                                        * point using the element of the union that we entered on.
-                                        * To do this, find the element that has a from name of
-                                        * c->name->s.
+                                        * We want to come out on the left hand
+                                        * side of the mount point using the
+                                        * element of the union that we entered
+                                        * on.  To do this, find the element
+                                        * that has a from name of c->name->s.
                                         */
-                                       if (strcmp(t->head->from->name->s, name->s) != 0)
+                                       if (strcmp(t->head->from->name->s,
+                                                  name->s) != 0)
                                                continue;
                                        nc = t->head->from;
                                        chan_incref(nc);
@@ -717,15 +774,17 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
        /*
         * While we haven't gotten all the way down the path:
         *    1. step through a mount point, if any
-        *    2. send a walk request for initial dotdot or initial prefix without dotdot
+        *    2. send a walk request for initial dotdot or initial prefix
+        *    without dotdot
         *    3. move to the first mountpoint along the way.
         *    4. repeat.
         *
-        * An invariant is that each time through the loop, c is on the undomount
-        * side of the mount point, and c's name is cname.
+        * An invariant is that each time through the loop, c is on the
+        * undomount side of the mount point, and c's name is cname.
         */
        for (nhave = 0; nhave < nnames; nhave += n) {
-               /* We only allow symlink when they are first and it's .. (see below) */
+               /* We only allow symlink when they are first and it's .. (see
+                * below) */
                if ((c->qid.type & (QTDIR | QTSYMLINK)) == 0) {
                        if (nerror)
                                *nerror = nhave;
@@ -761,7 +820,8 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
                type = c->type;
                dev = c->dev;
 
-               if ((wq = devtab[type].walk(c, NULL, names + nhave, ntry)) == NULL) {
+               if ((wq = devtab[type].walk(c, NULL, names + nhave, ntry)) ==
+                   NULL) {
                        /* try a union mount, if any */
                        if (mh && wh->can_mount) {
                                /*
@@ -770,8 +830,12 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
                                rlock(&mh->lock);
                                for (f = mh->mount->next; f; f = f->next)
                                        if ((wq =
-                                                devtab[f->to->type].walk(f->to, NULL, names + nhave,
-                                                                                                 ntry)) != NULL)
+                                            devtab[f->to->type].walk(f->to,
+                                                                     NULL,
+                                                                     names +
+                                                                     nhave,
+                                                                     ntry)) !=
+                                           NULL)
                                                break;
                                runlock(&mh->lock);
                                if (f != NULL) {
@@ -802,20 +866,27 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
                        nc = NULL;
                        if (wh->can_mount)
                                for (i = 0; i < wq->nqid && i < ntry - 1; i++)
-                                       if (findmount(&nc, &nmh, type, dev, wq->qid[i]))
+                                       if (findmount(&nc, &nmh, type, dev,
+                                                     wq->qid[i]))
                                                break;
                        if (nc == NULL) {       /* no mount points along path */
                                if (wq->clone == NULL) {
                                        cclose(c);
                                        cnameclose(cname);
-                                       if (wq->nqid == 0 || (wq->qid[wq->nqid - 1].type & QTDIR)) {
+                                       if (wq->nqid == 0 ||
+                                           (wq->qid[wq->nqid - 1].type &
+                                            QTDIR)) {
                                                if (nerror)
-                                                       *nerror = nhave + wq->nqid + 1;
-                                               set_error(ENOENT, "walk failed");
+                                                       *nerror = nhave +
+                                                               wq->nqid + 1;
+                                               set_error(ENOENT,
+                                                         "walk failed");
                                        } else {
                                                if (nerror)
-                                                       *nerror = nhave + wq->nqid;
-                                               set_error(ENOTDIR, "walk failed");
+                                                       *nerror = nhave +
+                                                               wq->nqid;
+                                               set_error(ENOTDIR,
+                                                         "walk failed");
                                        }
                                        kfree(wq);
                                        if (mh != NULL)
@@ -823,24 +894,7 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
                                        return -1;
                                }
                                n = wq->nqid;
-                               if (wq->clone->qid.type & QTSYMLINK) {
-                                       nc = walk_symlink(wq->clone, wh, nnames - nhave - n);
-                                       if (!nc) {
-                                               /* walk_symlink() set error.  This seems to be the
-                                                * standard walk() error-cleanup. */
-                                               if (nerror)
-                                                       *nerror = nhave + wq->nqid;
-                                               cclose(c);
-                                               cclose(wq->clone);
-                                               cnameclose(cname);
-                                               kfree(wq);
-                                               if (mh != NULL)
-                                                       putmhead(mh);
-                                               return -1;
-                                       }
-                               } else {
-                                       nc = wq->clone;
-                               }
+                               nc = wq->clone;
                        } else {        /* stopped early, at a mount point */
                                if (wq->clone != NULL) {
                                        cclose(wq->clone);
@@ -849,6 +903,26 @@ int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
                                lastmountpoint = nc;
                                n = i + 1;
                        }
+                       if (nc->qid.type & QTSYMLINK) {
+                               struct chan *old_nc = nc;
+
+                               nc = walk_symlink(old_nc, wh,
+                                                 nnames - nhave - n);
+                               if (!nc) {
+                                       /* walk_symlink() set error.
+                                        * This seems to be the standard
+                                        * walk() error-cleanup. */
+                                       if (nerror)
+                                               *nerror = nhave + n;
+                                       cclose(c);
+                                       cclose(old_nc);
+                                       cnameclose(cname);
+                                       kfree(wq);
+                                       if (mh != NULL)
+                                               putmhead(mh);
+                                       return -1;
+                               }
+                       }
                        for (i = 0; i < n; i++)
                                cname = addelem(cname, names[nhave + i]);
                }
@@ -1020,9 +1094,6 @@ void *memrchr(void *va, int c, long n)
  * Since the functions that open Aaccess (sysstat, syswstat, sys_stat)
  * do not use the struct cname*, this avoids an unnecessary clone.
  *
- * Acreatechan will never open. It will do all the tests and return a chan
- * for the directory where an open will succeed.
- *
  * The classic namec() is broken into a front end to get the starting point and
  * a __namec_from, which does the guts of the lookup.  */
 static struct chan *__namec_from(struct chan *c, char *aname, int amode,
@@ -1031,7 +1102,7 @@ static struct chan *__namec_from(struct chan *c, char *aname, int amode,
 {
        ERRSTACK(2);
        int len, npath;
-       struct chan *cnew;
+       struct chan *cnew, *renamee;
        struct cname *cname;
        Elemlist e;
        struct mhead *m;
@@ -1061,24 +1132,38 @@ static struct chan *__namec_from(struct chan *c, char *aname, int amode,
 
        if (e.mustbedir)
                omode &= ~O_NOFOLLOW;
-       /*
-        * On create, ....
-        */
-       if ((amode == Acreate) || (amode == Acreatechan)) {
+
+       switch (amode) {
+       case Acreate:
                /* perm must have DMDIR if last element is / or /. */
                if (e.mustbedir && !(perm & DMDIR)) {
                        npath = e.ARRAY_SIZEs;
                        error(EINVAL, "create without DMDIR");
                }
-
                /* don't try to walk the last path element just yet. */
                if (e.ARRAY_SIZEs == 0)
                        error(EEXIST, ERROR_FIXME);
                e.ARRAY_SIZEs--;
-               /* We're dropping the last element, which O_NOFOLLOW applied to.  Not
-                * sure if there are any legit reasons to have O_NOFOLLOW with create.*/
+               /* We're dropping the last element, which O_NOFOLLOW applied to.
+                * Not sure if there are any legit reasons to have O_NOFOLLOW
+                * with create.*/
                omode &= ~O_NOFOLLOW;
+               break;
+       case Arename:
+               if (e.ARRAY_SIZEs == 0)
+                       error(EINVAL, "rename needs at least one name");
+               e.ARRAY_SIZEs--;
+               omode &= ~O_NOFOLLOW;
+               break;
+       /* the difference for stat and lstat (Aaccess) are handled in sysfile.c
+        */
+       case Abind:
+       case Amount:
+       case Aremove:
+               omode |= O_NOFOLLOW;
+               break;
        }
+
        if (omode & O_NOFOLLOW)
                wh->no_follow = true;
 
@@ -1087,26 +1172,29 @@ static struct chan *__namec_from(struct chan *c, char *aname, int amode,
                        printd("namec %s walk error npath=%d\n", aname, npath);
                        error(EFAIL, "walk failed");
                }
-               /* Old plan 9 errors would jump here for the magic error parsing. */
+               /* Old plan 9 errors would jump here for the magic error
+                * parsing. */
 NameError:
                if (current_errstr()[0]) {
-                       /* errstr is set, we'll just stick with it and error out */
-                       longjmp(&get_cur_errbuf()->jmpbuf, 1);
+                       /* errstr is set, just stick with it and error out */
+                       error_jmp();
                } else {
                        error(EFAIL, "Name to chan lookup failed");
                }
-               /* brho: skipping the namec custom error string business, since it hides
-                * the underlying failure.  implement this if you want the old stuff. */
+               /* brho: skipping the namec custom error string business, since
+                * it hides the underlying failure.  implement this if you want
+                * the old stuff. */
 #if 0
                strlcpy(tmperrbuf, current->errstr, sizeof(tmperrbuf));
-               len = prefix + e.off[npath]; // prefix was name - aname, the start pt
-               if (len < ERRMAX / 3 || (name = memrchr(aname, '/', len)) == NULL
-                       || name == aname)
-                       snprintf(get_cur_genbuf(), sizeof current->genbuf, "%.*s", len,
-                                        aname);
+               // prefix was name - aname, the start pt
+               len = prefix + e.off[npath];
+               if (len < ERRMAX / 3 || (name = memrchr(aname, '/', len)) ==
+                   NULL || name == aname)
+                       snprintf(get_cur_genbuf(), sizeof current->genbuf,
+                                "%.*s", len, aname);
                else
-                       snprintf(get_cur_genbuf(), sizeof current->genbuf, "...%.*s",
-                                        (int)(len - (name - aname)), name);
+                       snprintf(get_cur_genbuf(), sizeof current->genbuf,
+                                "...%.*s", (int)(len - (name - aname)), name);
                snprintf(current->errstr, ERRMAX, "%#q %s", get_cur_genbuf(),
                                 tmperrbuf);
 #endif
@@ -1123,217 +1211,262 @@ NameError:
        }
 
        switch (amode) {
-               case Aaccess:
-                       if (wh->can_mount)
-                               domount(&c, NULL);
-                       break;
-
-               case Abind:
-                       m = NULL;
-                       if (wh->can_mount)
-                               domount(&c, &m);
-                       if (c->umh != NULL)
-                               putmhead(c->umh);
-                       c->umh = m;
-                       break;
-
-               case Aremove:
-               case Aopen:
+       case Aaccess:
+               if (wh->can_mount)
+                       domount(&c, NULL);
+               break;
+
+       case Abind:
+               m = NULL;
+               if (wh->can_mount)
+                       domount(&c, &m);
+               if (c->umh != NULL)
+                       putmhead(c->umh);
+               c->umh = m;
+               break;
+
+       case Aremove:
+       case Aopen:
 Open:
-                       /* save the name; domount might change c */
-                       cname = c->name;
-                       kref_get(&cname->ref, 1);
-                       m = NULL;
-                       if (wh->can_mount)
-                               domount(&c, &m);
+               /* save the name; domount might change c */
+               cname = c->name;
+               kref_get(&cname->ref, 1);
+               m = NULL;
+               if (wh->can_mount)
+                       domount(&c, &m);
 
-                       /* our own copy to open or remove */
-                       c = cunique(c);
+               /* our own copy to open or remove */
+               c = cunique(c);
 
-                       /* now it's our copy anyway, we can put the name back */
-                       cnameclose(c->name);
-                       c->name = cname;
-
-                       switch (amode) {
-                               case Aremove:
-                                       putmhead(m);
-                                       break;
+               /* now it's our copy anyway, we can put the name back */
+               cnameclose(c->name);
+               c->name = cname;
 
-                               case Aopen:
-                               case Acreate:
-                                       if (c->umh != NULL) {
-                                               printd("cunique umh\n");
-                                               putmhead(c->umh);
-                                               c->umh = NULL;
-                                       }
+               switch (amode) {
+               case Aremove:
+                       putmhead(m);
+                       break;
 
-                                       /* only save the mount head if it's a multiple element union */
-                                       if (m && m->mount && m->mount->next)
-                                               c->umh = m;
-                                       else
-                                               putmhead(m);
-                                       if (omode == O_EXEC)
-                                               c->flag &= ~CCACHE;
-                                       /* here is where convert omode/vfs flags to c->flags.
-                                        * careful, O_CLOEXEC and O_REMCLO are in there.  might need
-                                        * to change that. */
-                                       c->flag |= omode & CEXTERNAL_FLAGS;
-                                       c = devtab[c->type].open(c,
-                                                                omode & ~O_CLOEXEC);
-                                       /* if you get this from a dev, in the dev's open, you are
-                                        * probably saving mode directly, without passing it through
-                                        * openmode. */
-                                       if (c->mode & O_TRUNC)
-                                               error(EFAIL, "Device %s open failed to clear O_TRUNC",
-                                                     devtab[c->type].name);
-                                       break;
+               case Aopen:
+               case Acreate:
+                       if (c->umh != NULL) {
+                               printd("cunique umh\n");
+                               putmhead(c->umh);
+                               c->umh = NULL;
                        }
-                       break;
 
-               case Atodir:
-                       /*
-                        * Directories (e.g. for cd) are left before the mount point,
-                        * so one may mount on / or . and see the effect.
-                        */
-                       if (!(c->qid.type & QTDIR))
-                               error(ENOTDIR, ERROR_FIXME);
+                       /* only save the mount head if it's a multiple element
+                        * union */
+                       if (m && m->mount && m->mount->next)
+                               c->umh = m;
+                       else
+                               putmhead(m);
+                       /* here is where convert omode/vfs flags to c->flags.
+                        * careful, O_CLOEXEC and O_REMCLO are in there.  might
+                        * need to change that. */
+                       c->flag |= omode & CEXTERNAL_FLAGS;
+                       c = devtab[c->type].open(c,
+                                                omode & ~O_CLOEXEC);
+                       /* if you get this from a dev, in the dev's open, you
+                        * are probably saving mode directly, without passing it
+                        * through openmode. */
+                       if (c->mode & O_TRUNC)
+                               error(EFAIL,
+                                     "Device %s open failed to clear O_TRUNC",
+                                     devtab[c->type].name);
                        break;
+               }
+               break;
 
-               case Amount:
-                       /*
-                        * When mounting on an already mounted upon directory,
-                        * one wants subsequent mounts to be attached to the
-                        * original directory, not the replacement.  Don't domount.
-                        */
-                       break;
+       case Atodir:
+               /*
+                * Directories (e.g. for cd) are left before the mount point,
+                * so one may mount on / or . and see the effect.
+                */
+               if (!(c->qid.type & QTDIR))
+                       error(ENOTDIR, ERROR_FIXME);
+               break;
 
-               case Acreatechan:
-                       /*
-                        * We've walked to the place where it *could* be created.
-                        * Return that chan.
-                        */
-                       break;
+       case Amount:
+               /*
+                * When mounting on an already mounted upon directory,
+                * one wants subsequent mounts to be attached to the
+                * original directory, not the replacement.  Don't domount.
+                */
+               break;
+
+       case Arename:
+               /* We already walked to the parent of new_path, which is in c.
+                * We're a lot like create here - need to find mounts, etc.  On
+                * the way out, we putmhead if we have an m, and clean up our
+                * chans.  On success, c becomes cnew (thus close the old c).
+                * On failure, we just close cnew. */
+               if (!(c->qid.type & QTDIR))
+                       error(ENOTDIR, "rename target parent is not a dir");
+               e.ARRAY_SIZEs++;
+               m = NULL;
+               cnew = NULL;
+               if (waserror()) {
+                       /* rename or createdir failed */
+                       cclose(cnew);
+                       if (m)
+                               putmhead(m);
+                       nexterror();    /* safe since we're in a waserror() */
+               }
+               if (wh->can_mount && findmount(&cnew, &m, c->type, c->dev,
+                                              c->qid)) {
+                       cnew = createdir(cnew, m);
+               } else {
+                       cnew = c;
+                       chan_incref(cnew);
+               }
+               cnew = cunique(cnew);
+               cnameclose(cnew->name);
+               cnew->name = c->name;
+               kref_get(&cnew->name->ref, 1);
+               /* At this point, we have our new_path parent chan (cnew) and
+                * the renamee chan */
+               renamee = ext;
+               if (cnew->type != renamee->type)
+                       error(EXDEV, "can't rename across device types");
+
+               devtab[cnew->type].rename(renamee, cnew,
+                                         e.elems[e.ARRAY_SIZEs - 1], 0);
+               poperror();
+
+               if (m)
+                       putmhead(m);
+               cclose(c);
+               c = cnew;
+               c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs - 1]);
+               break;
 
-               case Acreate:
-                       /*
-                        * We've already walked all but the last element.
-                        * If the last exists, try to open it OTRUNC.
-                        * If omode&OEXCL is set, just give up.
-                        */
-                       e.ARRAY_SIZEs++;
-                       if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) == 0) {
-                               if (omode & O_EXCL)
-                                       error(EEXIST, ERROR_FIXME);
-                               omode |= O_TRUNC;
-                               goto Open;
+       case Acreate:
+               /*
+                * We've already walked all but the last element.
+                * If the last exists, try to open it OTRUNC.
+                * If omode&OEXCL is set, just give up.
+                */
+               e.ARRAY_SIZEs++;
+               if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) == 0) {
+                       if (omode & O_EXCL)
+                               error(EEXIST, ERROR_FIXME);
+                       omode |= O_TRUNC;
+                       goto Open;
+               }
+
+               /*
+                * The semantics of the create(2) system call are that if the
+                * file exists and can be written, it is to be opened with
+                * truncation.  On the other hand, the create(5) message fails
+                * if the file exists.
+                *
+                * If we get two create(2) calls happening simultaneously, they
+                * might both get here and send create(5) messages, but only one
+                * of the messages will succeed.  To provide the expected
+                * create(2) semantics, the call with the failed message needs
+                * to try the above walk again, opening for truncation.  This
+                * correctly solves the create/create race, in the sense that
+                * any observable outcome can be explained as one happening
+                * before the other.  The create/create race is quite common.
+                * For example, it happens when two rc subshells simultaneously
+                * update the same environment variable.
+                *
+                * The implementation still admits a create/create/remove race:
+                * (A) walk to file, fails
+                * (B) walk to file, fails
+                * (A) create file, succeeds, returns
+                * (B) create file, fails
+                * (A) remove file, succeeds, returns
+                * (B) walk to file, return failure.
+                *
+                * This is hardly as common as the create/create race, and is
+                * really not too much worse than what might happen if (B) got a
+                * hold of a file descriptor and then the file was removed --
+                * either way (B) can't do anything with the result of the
+                * create call.  So we don't care about this race.
+                *
+                * Applications that care about more fine-grained decision of
+                * the races can use the OEXCL flag to get at the underlying
+                * create(5) semantics; by default we provide the common case.
+                *
+                * We need to stay behind the mount point in case we
+                * need to do the first walk again (should the create fail).
+                *
+                * We also need to cross the mount point and find the directory
+                * in the union in which we should be creating.
+                *
+                * The channel staying behind is c, the one moving forward is
+                * cnew.
+                */
+               m = NULL;
+               cnew = NULL;    /* is this assignment necessary? */
+               /* discard error */
+               if (!waserror()) {      /* try create */
+                       if (wh->can_mount &&
+                           findmount(&cnew, &m, c->type, c->dev, c->qid))
+                               cnew = createdir(cnew, m);
+                       else {
+                               cnew = c;
+                               chan_incref(cnew);
                        }
 
                        /*
-                        * The semantics of the create(2) system call are that if the
-                        * file exists and can be written, it is to be opened with truncation.
-                        * On the other hand, the create(5) message fails if the file exists.
-                        * If we get two create(2) calls happening simultaneously,
-                        * they might both get here and send create(5) messages, but only
-                        * one of the messages will succeed.  To provide the expected create(2)
-                        * semantics, the call with the failed message needs to try the above
-                        * walk again, opening for truncation.  This correctly solves the
-                        * create/create race, in the sense that any observable outcome can
-                        * be explained as one happening before the other.
-                        * The create/create race is quite common.  For example, it happens
-                        * when two rc subshells simultaneously update the same
-                        * environment variable.
-                        *
-                        * The implementation still admits a create/create/remove race:
-                        * (A) walk to file, fails
-                        * (B) walk to file, fails
-                        * (A) create file, succeeds, returns
-                        * (B) create file, fails
-                        * (A) remove file, succeeds, returns
-                        * (B) walk to file, return failure.
-                        *
-                        * This is hardly as common as the create/create race, and is really
-                        * not too much worse than what might happen if (B) got a hold of a
-                        * file descriptor and then the file was removed -- either way (B) can't do
-                        * anything with the result of the create call.  So we don't care about this race.
-                        *
-                        * Applications that care about more fine-grained decision of the races
-                        * can use the OEXCL flag to get at the underlying create(5) semantics;
-                        * by default we provide the common case.
-                        *
-                        * We need to stay behind the mount point in case we
-                        * need to do the first walk again (should the create fail).
-                        *
-                        * We also need to cross the mount point and find the directory
-                        * in the union in which we should be creating.
-                        *
-                        * The channel staying behind is c, the one moving forward is cnew.
+                        * We need our own copy of the Chan because we're about
+                        * to send a create, which will move it.  Once we have
+                        * our own copy, we can fix the name, which might be
+                        * wrong if findmount gave us a new Chan.
                         */
-                       m = NULL;
-                       cnew = NULL;    /* is this assignment necessary? */
-                       /* discard error */
-                       if (!waserror()) {      /* try create */
-                               if (wh->can_mount && findmount(&cnew, &m, c->type, c->dev,
-                                                              c->qid))
-                                       cnew = createdir(cnew, m);
-                               else {
-                                       cnew = c;
-                                       chan_incref(cnew);
-                               }
-
-                               /*
-                                * We need our own copy of the Chan because we're
-                                * about to send a create, which will move it.  Once we have
-                                * our own copy, we can fix the name, which might be wrong
-                                * if findmount gave us a new Chan.
-                                */
-                               cnew = cunique(cnew);
-                               cnameclose(cnew->name);
-                               cnew->name = c->name;
-                               kref_get(&cnew->name->ref, 1);
-
-                               cnew->flag |= omode & CEXTERNAL_FLAGS;
-                               devtab[cnew->type].create(cnew, e.elems[e.ARRAY_SIZEs - 1],
-                                                                                 omode & ~(O_EXCL | O_CLOEXEC),
-                                                                                 perm, ext);
-                               poperror();
-
-                               if (m)
-                                       putmhead(m);
-                               cclose(c);
-                               c = cnew;
-                               c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs - 1]);
-                               break;
-                       }
+                       cnew = cunique(cnew);
+                       cnameclose(cnew->name);
+                       cnew->name = c->name;
+                       kref_get(&cnew->name->ref, 1);
+
+                       cnew->flag |= omode & CEXTERNAL_FLAGS;
+                       devtab[cnew->type].create(cnew,
+                                                 e.elems[e.ARRAY_SIZEs - 1],
+                                                 omode & ~(O_EXCL | O_CLOEXEC),
+                                                 perm, ext);
+                       poperror();
 
-                       /* create failed */
-                       cclose(cnew);
                        if (m)
                                putmhead(m);
-                       if (omode & O_EXCL)
-                               nexterror();    /* safe since we're in a waserror() */
-                       poperror();     /* matching the if(!waserror) */
-
-                       /* save error, so walk doesn't clobber our existing errstr */
-                       strlcpy(tmperrbuf, current_errstr(), sizeof(tmperrbuf));
-                       saved_errno = get_errno();
-                       /* note: we depend that walk does not error */
-                       if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) < 0) {
-                               set_errno(saved_errno);
-                               /* Report the error we had originally */
-                               error(EFAIL, tmperrbuf);
-                       }
-                       strlcpy(current_errstr(), tmperrbuf, MAX_ERRSTR_LEN);
-                       omode |= O_TRUNC;
-                       goto Open;
+                       cclose(c);
+                       c = cnew;
+                       c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs - 1]);
+                       break;
+               }
+
+               /* create failed */
+               cclose(cnew);
+               if (m)
+                       putmhead(m);
+               if (omode & O_EXCL)
+                       nexterror();    /* safe since we're in a waserror() */
+               poperror();     /* matching the if(!waserror) */
+
+               /* save error, so walk doesn't clobber our existing errstr */
+               strlcpy(tmperrbuf, current_errstr(), sizeof(tmperrbuf));
+               saved_errno = get_errno();
+               /* note: we depend that walk does not error */
+               if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) < 0) {
+                       set_errno(saved_errno);
+                       /* Report the error we had originally */
+                       error(EFAIL, tmperrbuf);
+               }
+               strlcpy(current_errstr(), tmperrbuf, MAX_ERRSTR_LEN);
+               omode |= O_TRUNC;
+               goto Open;
 
-               default:
-                       panic("unknown namec access %d\n", amode);
+       default:
+               panic("unknown namec access %d\n", amode);
        }
 
        poperror();
 
        if (e.ARRAY_SIZEs > 0)
-               strlcpy(get_cur_genbuf(), e.elems[e.ARRAY_SIZEs - 1], GENBUF_SZ);
+               strlcpy(get_cur_genbuf(), e.elems[e.ARRAY_SIZEs - 1],
+                       GENBUF_SZ);
        else
                strlcpy(get_cur_genbuf(), ".", GENBUF_SZ);
 
@@ -1360,65 +1493,71 @@ struct chan *namec(char *name, int amode, int omode, uint32_t perm, void *ext)
         * evaluate starting there.
         */
        switch (name[0]) {
-               case '/':
-                       /* TODO: kernel walkers will crash here */
-                       assert(current && current->slash);
+       case '/':
+               if (current)
                        c = current->slash;
-                       chan_incref(c);
-                       break;
-
-               case '#':
-                       wh.can_mount = false;
-                       devname = get_cur_genbuf();
-                       devname[0] = '\0';
-                       n = 0;
-                       name++; /* drop the # */
-                       while ((*name != '\0') && (*name != '/')) {
-                               if (n >= GENBUF_SZ - 1)
-                                       error(ENAMETOOLONG, ERROR_FIXME);
-                               devname[n++] = *name++;
-                       }
-                       devname[n] = '\0';
-                       /* for a name #foo.spec, devname = foo\0, devspec = spec\0.
-                        * genbuf contains foo\0spec\0.  for no spec, devspec = \0 */
-                       devspec = strchr(devname, '.');
-                       if (devspec) {
-                               *devspec = '\0';
-                               devspec++;
-                       } else {
-                               devspec = &devname[n];
-                       }
-                       if (!strcmp(devname, "mnt"))
-                               error(EINVAL, ERROR_FIXME);
-                       /* TODO: deal with this "nodevs" business. */
-                       #if 0
-                       /*
-                        *  the nodevs exceptions are
-                        *  |  it only gives access to pipes you create
-                        *  e  this process's environment
-                        *  s  private file2chan creation space
-                        *  D private secure sockets name space
-                        *  a private TLS name space
-                        */
-                       if (current->pgrp->nodevs &&
-                               //          (utfrune("|esDa", r) == NULL
-                               ((strchr("|esDa", get_cur_genbuf()[1]) == NULL)
-                                || (get_cur_genbuf()[1] == 's' // || r == 's'
-                                        && get_cur_genbuf()[n] != '\0')))
-                               error(EINVAL, ERROR_FIXME);
-                       #endif
-                       devtype = devno(devname, 1);
-                       if (devtype == -1)
-                               error(EFAIL, "Unknown #device %s (spec %s)", devname, devspec);
-                       c = devtab[devtype].attach(devspec);
-                       break;
-               default:
-                       /* this case also covers \0 */
-                       c = current->dot;
-                       if (!c)
-                               panic("no dot!");
-                       chan_incref(c);
-                       break;
+               else
+                       c = kern_slash;
+               chan_incref(c);
+               break;
+
+       case '#':
+               wh.can_mount = false;
+               devname = get_cur_genbuf();
+               devname[0] = '\0';
+               n = 0;
+               name++; /* drop the # */
+               while ((*name != '\0') && (*name != '/')) {
+                       if (n >= GENBUF_SZ - 1)
+                               error(ENAMETOOLONG, ERROR_FIXME);
+                       devname[n++] = *name++;
+               }
+               devname[n] = '\0';
+               /* for a name #foo.spec, devname = foo\0, devspec = spec\0.
+                * genbuf contains foo\0spec\0.  for no spec, devspec = \0 */
+               devspec = strchr(devname, '.');
+               if (devspec) {
+                       *devspec = '\0';
+                       devspec++;
+               } else {
+                       devspec = &devname[n];
+               }
+               /* These devices have special attach functions that treat the
+                * char * as a blob pointer */
+               if (!strcmp(devname, "mnt"))
+                       error(EINVAL, "can't namec-attach #mnt");
+               if (!strcmp(devname, "gtfs"))
+                       error(EINVAL, "can't namec-attach #gtfs");
+               /* TODO: deal with this "nodevs" business. */
+               #if 0
+               /*
+                *  the nodevs exceptions are
+                *  |  it only gives access to pipes you create
+                *  e  this process's environment
+                *  s  private file2chan creation space
+                *  D private secure sockets name space
+                *  a private TLS name space
+                */
+               if (current->pgrp->nodevs &&
+                       //          (utfrune("|esDa", r) == NULL
+                       ((strchr("|esDa", get_cur_genbuf()[1]) == NULL)
+                        || (get_cur_genbuf()[1] == 's' // || r == 's'
+                                && get_cur_genbuf()[n] != '\0')))
+                       error(EINVAL, ERROR_FIXME);
+               #endif
+               devtype = devno(devname, 1);
+               if (devtype == -1)
+                       error(EFAIL, "Unknown #device %s (spec %s)", devname,
+                             devspec);
+               c = devtab[devtype].attach(devspec);
+               break;
+       default:
+               /* this case also covers \0 */
+               c = current->dot;
+               if (!c)
+                       panic("no dot!");
+               chan_incref(c);
+               break;
        }
        return __namec_from(c, name, amode, omode, perm, &wh, ext);
 }
@@ -1490,7 +1629,8 @@ void validname(char *aname, int slashok)
                } else {
                        if (isfrog[c])
                                if (!slashok || c != '/') {
-                                       error(EINVAL, "%s (%p), at char %c", aname, aname, c);
+                                       error(EINVAL, "%s (%p), at char %c",
+                                             aname, aname, c);
                                }
                        name++;
                }
@@ -1554,8 +1694,12 @@ static struct chan *walk_symlink(struct chan *symlink, struct walk_helper *wh,
        struct dir *dir;
        char *link_name, *link_store;
        struct chan *from;
+       bool old_nofollow;
        Elemlist e = {0};
 
+       /* mildly expensive: need to rlock the namespace */
+       if (is_mount_point(symlink))
+               return symlink;
        if (!nr_names_left && wh->no_follow)
                return symlink;
        if (wh->nr_loops >= WALK_MAX_NR_LOOPS) {
@@ -1576,9 +1720,10 @@ static struct chan *walk_symlink(struct chan *symlink, struct walk_helper *wh,
        kfree(dir);
 
        if (link_name[0] == '/') {
-               /* TODO: kernel walkers will crash here, just like namec() */
-               assert(current && current->slash);
-               from = current->slash;
+               if (current)
+                       from = current->slash;
+               else
+                       from = kern_slash;
        } else {
                from = symlink;
                link_name -= 3;
@@ -1593,18 +1738,27 @@ static struct chan *walk_symlink(struct chan *symlink, struct walk_helper *wh,
        kfree(link_store);
 
        wh->nr_loops++;
+       /* no_follow applies to the outermost walk, i.e. the one that the
+        * original namec performs.  At this point, we've decided that we're
+        * going to try and follow a symlink: even if its no_follow, that only
+        * applies to the last link in the original path.  Our sub-walks are not
+        * no_follow.
+        *
+        * Note the other wh vars need to stay with the walk: nr_loops,
+        * since its our method of detecting symlink loops, and can_mount, which
+        * is a property of the overall namec() call. */
+       old_nofollow = wh->no_follow;
+       wh->no_follow = false;
        if (walk(&from, e.elems, e.ARRAY_SIZEs, wh, NULL) < 0) {
                cclose(from);
                from = NULL;
        } else {
+               /* We can still have a successful walk and have the new 'from'
+                * be a symlink.  We'd need walk_symlink to return a symlink
+                * chan, which happens if the symlink is a mount point. */
                cclose(symlink);
-               if (from->qid.type & QTSYMLINK) {
-                       symlink = from;
-                       from = walk_symlink(symlink, wh, nr_names_left);
-                       if (!from)
-                               cclose(symlink);
-               }
        }
+       wh->no_follow = old_nofollow;
        wh->nr_loops--;
 
        kfree(e.name);