9ns: make kstrdup() actually atomic
[akaros.git] / kern / src / ns / chan.c
index f7d28d2..af83d1b 100644 (file)
@@ -1,6 +1,31 @@
-// INFERNO
-#include <vfs.h>
-#include <kfs.h>
+/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+ * Portions Copyright © 1997-1999 Vita Nuova Limited
+ * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
+ *                                (www.vitanuova.com)
+ * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
+ *
+ * Modified for the Akaros operating system:
+ * Copyright (c) 2013-2014 The Regents of the University of California
+ * Copyright (c) 2013-2015 Google Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE. */
+
 #include <slab.h>
 #include <kmalloc.h>
 #include <kref.h>
 #include <smp.h>
 #include <syscall.h>
 
-char*
-channame(struct chan *c)               /* DEBUGGING */
-{
-       if(c == NULL)
+struct chan *kern_slash;
+
+char *channame(struct chan *c)
+{      /* DEBUGGING */
+       if (c == NULL)
                return "<NULL chan>";
-       if(c->name == NULL)
+       if (c->name == NULL)
                return "<NULL name>";
-       if(c->name->s == NULL)
+       if (c->name->s == NULL)
                return "<NULL name.s>";
        return c->name->s;
 }
 
-enum
-{
-       CNAMESLOP       = 20
+enum {
+       CNAMESLOP = 20
 };
 
-struct
-{
+struct {
        spinlock_t lock;
-       int     fid;
-       struct chan     *free;
-       struct chan     *list;
-}chanalloc;
+       int fid;
+       struct chan *free;
+       struct chan *list;
+} chanalloc;
 
 typedef struct Elemlist Elemlist;
 
-struct Elemlist
-{
-       char    *name;  /* copy of name, so '/' can be overwritten */
-       int     ARRAY_SIZEs;
-       char    **elems;
-       int     *off;
-       int     mustbedir;
+struct Elemlist {
+       char *name;             /* copy of name, so '/' can be overwritten */
+       int ARRAY_SIZEs;
+       char **elems;
+       int *off;
+       int mustbedir;
+};
+
+struct walk_helper {
+       bool can_mount;
+       bool no_follow;
+       unsigned int nr_loops;
 };
+#define WALK_MAX_NR_LOOPS 8
+
+static struct chan *walk_symlink(struct chan *symlink, struct walk_helper *wh,
+                                 unsigned int nr_names_left);
 
 #define SEP(c) ((c) == 0 || (c) == '/')
-void cleancname(struct cname*);
+void cleancname(struct cname *);
 
-int
-isdotdot(char *p)
+int isdotdot(char *p)
 {
-       return p[0]=='.' && p[1]=='.' && p[2]=='\0';
+       return p[0] == '.' && p[1] == '.' && p[2] == '\0';
 }
 
-int
-emptystr(char *s)
+int emptystr(char *s)
 {
-       if(s == NULL)
+       if (s == NULL)
                return 1;
-       if(s[0] == '\0')
+       if (s[0] == '\0')
                return 1;
        return 0;
 }
@@ -71,65 +102,78 @@ emptystr(char *s)
 /*
  * Atomically replace *p with copy of s
  */
-void
-kstrdup(char **p, char *s)
+void kstrdup(char **p, char *s)
 {
        int n;
        char *t, *prev;
 
-       n = strlen(s)+1;
-       /* if it's a user, we can wait for memory; if not, something's very wrong */
+       n = strlen(s) + 1;
+       /* if it's a user, we can wait for memory; if not, something's very
+        * wrong */
        if (current) {
                t = kzmalloc(n, 0);
-       }else{
+       } else {
                t = kzmalloc(n, 0);
-               if(t == NULL)
+               if (t == NULL)
                        panic("kstrdup: no memory");
        }
        memmove(t, s, n);
-       prev = *p;
-       *p = t;
+
+       prev = atomic_swap_ptr((void**)p, t);
        kfree(prev);
 }
 
-void
-chandevreset(void)
+void chandevreset(void)
 {
        int i;
 
-       for(i=0; devtab[i] != NULL; i++)
-               devtab[i]->reset();
+       for (i = 0; &devtab[i] < __devtabend; i++) {
+               if (devtab[i].reset)
+                       devtab[i].reset();
+       }
 }
 
-void
-chandevinit(void)
+void chandevinit(void)
 {
        int i;
 
-       for(i=0; devtab[i] != NULL; i++)
-               devtab[i]->init();
+       for (i = 0; &devtab[i] < __devtabend; i++) {
+               if (devtab[i].init)
+                       devtab[i].init();
+       }
 }
 
-void
-chandevshutdown(void)
+void chandevshutdown(void)
 {
        int i;
-       
+
        /* shutdown in reverse order */
-       for(i=0; devtab[i] != NULL; i++)
-               ;
-       for(i--; i >= 0; i--)
-               devtab[i]->shutdown();
+       for (i = 0; &devtab[i] < __devtabend; i++) ;
+       for (i--; i >= 0; i--) {
+               if (devtab[i].shutdown)
+                       devtab[i].shutdown();
+       }
 }
 
 static void chan_release(struct kref *kref)
 {
        struct chan *c = container_of(kref, struct chan, ref);
        ERRSTACK(1);
+
+       /* We can be called from RCU callbacks, but close methods can block.  In
+        * those cases, and any other context that cannot block, we need to
+        * defer our work to a kernel message. */
+       if (!can_block(this_pcpui_ptr())) {
+               run_as_rkm(chan_release, kref);
+               return;
+       }
        /* this style discards the error from close().  picture it as
         * if (waserror()) { } else { close(); } chanfree_no_matter_what();  */
        if (!waserror()) {
-               devtab[c->type]->close(c);
+               printd("releasing chan %p, type %d\n", c, c->type);
+               /* -1 means there is no dev yet.  wants a noop for close() */
+               if (c->type != -1)
+                       devtab[c->type].close(c);
        }
        /* need to poperror regardless of whether we error'd or not */
        poperror();
@@ -137,18 +181,17 @@ static void chan_release(struct kref *kref)
        chanfree(c);
 }
 
-struct chan*
-newchan(void)
+struct chan *newchan(void)
 {
        struct chan *c;
 
        spin_lock(&(&chanalloc)->lock);
        c = chanalloc.free;
-       if(c != 0)
+       if (c != 0)
                chanalloc.free = c->next;
        spin_unlock(&(&chanalloc)->lock);
 
-       if(c == NULL) {
+       if (c == NULL) {
                c = kzmalloc(sizeof(struct chan), 0);
                spin_lock(&(&chanalloc)->lock);
                c->fid = ++chanalloc.fid;
@@ -159,9 +202,9 @@ newchan(void)
                qlock_init(&c->umqlock);
        }
 
-       /* if you get an error before associating with a dev,
-          close calls rootclose, a nop */
-       c->type = 0;
+       /* if you get an error before associating with a dev, cclose skips
+        * calling the dev's close */
+       c->type = -1;
        c->flag = 0;
        kref_init(&c->ref, chan_release, 1);
        c->dev = 0;
@@ -178,6 +221,8 @@ newchan(void)
        c->mqid.vers = 0;
        c->mqid.type = 0;
        c->name = 0;
+       c->buf = NULL;
+       c->mountpoint = NULL;
        return c;
 }
 
@@ -188,8 +233,7 @@ static void __cname_release(struct kref *kref)
        kfree(n);
 }
 
-struct cname*
-newcname(char *s)
+struct cname *newcname(char *s)
 {
        struct cname *n;
        int i;
@@ -197,32 +241,30 @@ newcname(char *s)
        n = kzmalloc(sizeof(*n), 0);
        i = strlen(s);
        n->len = i;
-       n->alen = i+CNAMESLOP;
+       n->alen = i + CNAMESLOP;
        n->s = kzmalloc(n->alen, 0);
-       memmove(n->s, s, i+1);
+       memmove(n->s, s, i + 1);
        kref_init(&n->ref, __cname_release, 1);
        return n;
 }
 
-void
-cnameclose(struct cname *n)
+void cnameclose(struct cname *n)
 {
-       if(n == NULL)
+       if (n == NULL)
                return;
        kref_put(&n->ref);
 }
 
-struct cname*
-addelem(struct cname *n, char *s)
+struct cname *addelem(struct cname *n, char *s)
 {
        int i, a;
        char *t;
        struct cname *new;
 
-       if(s[0]=='.' && s[1]=='\0')
+       if (s[0] == '.' && s[1] == '\0')
                return n;
 
-       if(kref_refcnt(&n->ref) > 1){
+       if (kref_refcnt(&n->ref) > 1) {
                /* copy on write */
                new = newcname(n->s);
                cnameclose(n);
@@ -230,47 +272,52 @@ addelem(struct cname *n, char *s)
        }
 
        i = strlen(s);
-       if(n->len+1+i+1 > n->alen){
-               a = n->len+1+i+1 + CNAMESLOP;
+       if (n->len + 1 + i + 1 > n->alen) {
+               a = n->len + 1 + i + 1 + CNAMESLOP;
                t = kzmalloc(a, 0);
-               memmove(t, n->s, n->len+1);
+               memmove(t, n->s, n->len + 1);
                kfree(n->s);
                n->s = t;
                n->alen = a;
        }
-       if(n->len>0 && n->s[n->len-1]!='/' && s[0]!='/')        /* don't insert extra slash if one is present */
+       /* don't insert extra slash if one is present */
+       if (n->len > 0 && n->s[n->len - 1] != '/' && s[0] != '/')
                n->s[n->len++] = '/';
-       memmove(n->s+n->len, s, i+1);
+       memmove(n->s + n->len, s, i + 1);
        n->len += i;
-       if(isdotdot(s))
+       if (isdotdot(s))
                cleancname(n);
        return n;
 }
 
-void
-chanfree(struct chan *c)
+void chanfree(struct chan *c)
 {
        c->flag = CFREE;
 
-       if(c->umh != NULL){
+       if (c->umh != NULL) {
                putmhead(c->umh);
                c->umh = NULL;
        }
-       if(c->umc != NULL){
+       if (c->umc != NULL) {
                cclose(c->umc);
                c->umc = NULL;
        }
-       if(c->mux != NULL){
+       if (c->mux != NULL) {
                //
                muxclose(c->mux);
                c->mux = NULL;
        }
-       if(c->mchan != NULL){
+       if (c->mchan != NULL) {
                cclose(c->mchan);
                c->mchan = NULL;
        }
 
        cnameclose(c->name);
+       if (c->buf)
+               kfree(c->buf);
+       c->buf = NULL;
+       c->bufused = 0;
+       c->ateof = 0;
 
        spin_lock(&(&chanalloc)->lock);
        c->next = chanalloc.free;
@@ -278,27 +325,32 @@ chanfree(struct chan *c)
        spin_unlock(&(&chanalloc)->lock);
 }
 
-void
-cclose(struct chan *c)
+void cclose(struct chan *c)
 {
-       if(c == 0)
+       if (c == 0)
                return;
 
-       if(c->flag&CFREE)
-               panic("cclose %lux", getcallerpc(&c));
+       if (c->flag & CFREE)
+               panic("double cclose %p.  maybe kfunc channame and hexdump", c);
 
        kref_put(&c->ref);
 }
 
+/* convenience wrapper for interposition.  if you do use this, don't forget
+ * about the kref_get_not_zero in plan9setup() */
+void chan_incref(struct chan *c)
+{
+       kref_get(&c->ref, 1);
+}
+
 /*
  * Make sure we have the only copy of c.  (Copy on write.)
  */
-struct chan*
-cunique(struct chan *c)
+struct chan *cunique(struct chan *c)
 {
        struct chan *nc;
 
-       if(kref_refcnt(&c->ref) != 1) {
+       if (kref_refcnt(&c->ref) != 1) {
                nc = cclone(c);
                cclose(c);
                c = nc;
@@ -307,36 +359,33 @@ cunique(struct chan *c)
        return c;
 }
 
-int
-eqqid(struct qid a, struct qid b)
+int eqqid(struct qid a, struct qid b)
 {
-       return a.path==b.path && a.vers==b.vers;
+       return a.path == b.path && a.vers == b.vers;
 }
 
-int
-eqchan(struct chan *a, struct chan *b, int pathonly)
+int eqchan(struct chan *a, struct chan *b, int pathonly)
 {
-       if(a->qid.path != b->qid.path)
+       if (a->qid.path != b->qid.path)
                return 0;
-       if(!pathonly && a->qid.vers!=b->qid.vers)
+       if (!pathonly && a->qid.vers != b->qid.vers)
                return 0;
-       if(a->type != b->type)
+       if (a->type != b->type)
                return 0;
-       if(a->dev != b->dev)
+       if (a->dev != b->dev)
                return 0;
        return 1;
 }
 
-int
-eqchantdqid(struct chan *a, int type, int dev, struct qid qid, int pathonly)
+int eqchantdqid(struct chan *a, int type, int dev, struct qid qid, int pathonly)
 {
-       if(a->qid.path != qid.path)
+       if (a->qid.path != qid.path)
                return 0;
-       if(!pathonly && a->qid.vers!=qid.vers)
+       if (!pathonly && a->qid.vers != qid.vers)
                return 0;
-       if(a->type != type)
+       if (a->type != type)
                return 0;
-       if(a->dev != dev)
+       if (a->dev != dev)
                return 0;
        return 1;
 }
@@ -344,7 +393,8 @@ eqchantdqid(struct chan *a, int type, int dev, struct qid qid, int pathonly)
 static void mh_release(struct kref *kref)
 {
        struct mhead *mh = container_of(kref, struct mhead, ref);
-       mh->mount = (struct mount*)0xCafeBeef;
+
+       mh->mount = (struct mount *)0xCafeBeef;
        kfree(mh);
 }
 
@@ -354,8 +404,9 @@ struct mhead *newmhead(struct chan *from)
 
        mh = kzmalloc(sizeof(struct mhead), 0);
        kref_init(&mh->ref, mh_release, 1);
+       rwinit(&mh->lock);
        mh->from = from;
-       kref_get(&from->ref, 1);
+       chan_incref(from);
 
 /*
        n = from->name->len;
@@ -367,8 +418,7 @@ struct mhead *newmhead(struct chan *from)
        return mh;
 }
 
-int
-cmount(struct chan *new, struct chan *old, int flag, char *spec)
+int cmount(struct chan *new, struct chan *old, int flag, char *spec)
 {
        ERRSTACK(1);
        struct pgrp *pg;
@@ -376,16 +426,27 @@ cmount(struct chan *new, struct chan *old, int flag, char *spec)
        struct mhead *m, **l, *mh;
        struct mount *nm, *f, *um, **h;
 
-       if(QTDIR & (old->qid.type^new->qid.type))
-               error(Emount);
+       /* Can't bind pointing to a symlink, since it vastly complicates namec
+        * and walk.  In particular, walk() only follows mounts on the
+        * intermediate path elements.  Grep 'ntry - 1'.  Because of that,
+        * walk() can end on a symlink.  Having domount() follow symlinks is a
+        * pain: undomount. */
+       if (new->qid.type & QTSYMLINK)
+               error(EINVAL, "cannot bind a symlink");
 
-if(old->umh)
-       printd("cmount old extra umh\n");
+       /* Can bind anything onto a symlink's name.  Otherwise, both the old and
+        * the new must agree on whether or not it is a directory. */
+       if (!(old->qid.type & QTSYMLINK) &&
+           (QTDIR & (old->qid.type ^ new->qid.type)))
+               error(EINVAL, ERROR_FIXME);
 
-       order = flag&MORDER;
+       if (old->umh)
+               printd("cmount old extra umh\n");
 
-       if((old->qid.type&QTDIR)==0 && order != MREPL)
-               error(Emount);
+       order = flag & MORDER;
+
+       if ((old->qid.type & QTDIR) == 0 && order != MREPL)
+               error(EINVAL, ERROR_FIXME);
 
        mh = new->umh;
 
@@ -396,30 +457,30 @@ if(old->umh)
         *
         * We need to check mh->mount->next to tell unions apart from
         * simple mount points, so that things like
-        *      mount -c fd /root
-        *      bind -c /root /
+        *  mount -c fd /root
+        *  bind -c /root /
         * work.  The check of mount->mflag catches things like
-        *      mount fd /root
-        *      bind -c /root /
-        * 
+        *  mount fd /root
+        *  bind -c /root /
+        *
         * This is far more complicated than it should be, but I don't
-        * see an easier way at the moment.             -rsc
+        * see an easier way at the moment.     -rsc
         */
-       if((flag&MCREATE) && mh && mh->mount
-       && (mh->mount->next || !(mh->mount->mflag&MCREATE)))
-               error(Emount);
+       if ((flag & MCREATE) && mh && mh->mount
+               && (mh->mount->next || !(mh->mount->mflag & MCREATE)))
+               error(EEXIST, ERROR_FIXME);
 
        pg = current->pgrp;
        wlock(&pg->ns);
 
        l = &MOUNTH(pg, old->qid);
-       for(m = *l; m; m = m->hash) {
-               if(eqchan(m->from, old, 1))
+       for (m = *l; m; m = m->hash) {
+               if (eqchan(m->from, old, 1))
                        break;
                l = &m->hash;
        }
 
-       if(m == NULL) {
+       if (m == NULL) {
                /*
                 *  nothing mounted here yet.  create a mount
                 *  head and add to the hash table.
@@ -431,49 +492,46 @@ if(old->umh)
                 *  if this is a union mount, add the old
                 *  node to the mount chain.
                 */
-               if(order != MREPL)
+               if (order != MREPL)
                        m->mount = newmount(m, old, 0, 0);
        }
        wlock(&m->lock);
-       if(waserror()){
+       if (waserror()) {
                wunlock(&m->lock);
                nexterror();
        }
        wunlock(&pg->ns);
 
        nm = newmount(m, new, flag, spec);
-       if(mh != NULL && mh->mount != NULL) {
+       if (mh != NULL && mh->mount != NULL) {
                /*
                 *  copy a union when binding it onto a directory
                 */
                flg = order;
-               if(order == MREPL)
+               if (order == MREPL)
                        flg = MAFTER;
                h = &nm->next;
                um = mh->mount;
-               for(um = um->next; um; um = um->next) {
+               for (um = um->next; um; um = um->next) {
                        f = newmount(m, um->to, flg, um->spec);
                        *h = f;
                        h = &f->next;
                }
        }
 
-       if(m->mount && order == MREPL) {
+       if (m->mount && order == MREPL) {
                mountfree(m->mount);
                m->mount = 0;
        }
 
-       if(flag & MCREATE)
+       if (flag & MCREATE)
                nm->mflag |= MCREATE;
 
-       if(m->mount && order == MAFTER) {
-               for(f = m->mount; f->next; f = f->next)
-                       ;
+       if (m->mount && order == MAFTER) {
+               for (f = m->mount; f->next; f = f->next) ;
                f->next = nm;
-       }
-       else {
-               for(f = nm; f->next; f = f->next)
-                       ;
+       } else {
+               for (f = nm; f->next; f = f->next) ;
                f->next = m->mount;
                m->mount = nm;
        }
@@ -483,18 +541,17 @@ if(old->umh)
        return nm->mountid;
 }
 
-void
-cunmount(struct chan *mnt, struct chan *mounted)
+void cunmount(struct chan *mnt, struct chan *mounted)
 {
        struct pgrp *pg;
        struct mhead *m, **l;
        struct mount *f, **p;
 
-       if(mnt->umh)    /* should not happen */
+       if (mnt->umh)   /* should not happen */
                printd("cunmount newp extra umh %p has %p\n", mnt, mnt->umh);
 
        /*
-        * It _can_ happen that mounted->umh is non-NULL, 
+        * It _can_ happen that mounted->umh is non-NULL,
         * because mounted is the result of namec(Aopen)
         * (see sysfile.c:/^sysunmount).
         * If we open a union directory, it will have a umh.
@@ -506,19 +563,19 @@ cunmount(struct chan *mnt, struct chan *mounted)
        wlock(&pg->ns);
 
        l = &MOUNTH(pg, mnt->qid);
-       for(m = *l; m; m = m->hash) {
-               if(eqchan(m->from, mnt, 1))
+       for (m = *l; m; m = m->hash) {
+               if (eqchan(m->from, mnt, 1))
                        break;
                l = &m->hash;
        }
 
-       if(m == 0) {
+       if (m == 0) {
                wunlock(&pg->ns);
-               error(Eunmount);
+               error(ENOENT, ERROR_FIXME);
        }
 
        wlock(&m->lock);
-       if(mounted == 0) {
+       if (mounted == 0) {
                *l = m->hash;
                wunlock(&pg->ns);
                mountfree(m->mount);
@@ -530,14 +587,14 @@ cunmount(struct chan *mnt, struct chan *mounted)
        }
 
        p = &m->mount;
-       for(f = *p; f; f = f->next) {
+       for (f = *p; f; f = f->next) {
                /* BUG: Needs to be 2 pass */
-               if(eqchan(f->to, mounted, 1) ||
-                 (f->to->mchan && eqchan(f->to->mchan, mounted, 1))) {
+               if (eqchan(f->to, mounted, 1) ||
+                       (f->to->mchan && eqchan(f->to->mchan, mounted, 1))) {
                        *p = f->next;
                        f->next = 0;
                        mountfree(f);
-                       if(m->mount == NULL) {
+                       if (m->mount == NULL) {
                                *l = m->hash;
                                cclose(m->from);
                                wunlock(&m->lock);
@@ -553,53 +610,83 @@ cunmount(struct chan *mnt, struct chan *mounted)
        }
        wunlock(&m->lock);
        wunlock(&pg->ns);
-       error(Eunion);
+       error(ENOENT, ERROR_FIXME);
 }
 
-struct chan*
-cclone(struct chan *c)
+struct chan *cclone(struct chan *c)
 {
        struct chan *nc;
        struct walkqid *wq;
 
-       wq = devtab[c->type]->walk(c, NULL, NULL, 0);
-       if(wq == NULL)
-               error("clone failed");
+       wq = devtab[c->type].walk(c, NULL, NULL, 0);
+       if (wq == NULL)
+               error(EFAIL, "clone failed");
        nc = wq->clone;
        kfree(wq);
        nc->name = c->name;
-       if(c->name)
+       if (c->name)
                kref_get(&c->name->ref, 1);
        return nc;
 }
 
-int
-findmount(struct chan **cp,
-         struct mhead **mp, int type, int dev, struct qid qid)
+/* Helper: is something mounted on the chan? */
+static bool is_mount_point(struct chan *c)
 {
        struct pgrp *pg;
        struct mhead *m;
+       int type = c->type;
+       int dev = c->dev;
+       struct qid qid = c->qid;
 
+       if (!current)
+               return false;
        pg = current->pgrp;
        rlock(&pg->ns);
-       for(m = MOUNTH(pg, qid); m; m = m->hash){
+       for (m = MOUNTH(pg, qid); m; m = m->hash) {
                rlock(&m->lock);
-if(m->from == NULL){
-       printd("m %p m->from 0\n", m);
-       runlock(&m->lock);
-       continue;
+               if (!m->from) {
+                       runlock(&m->lock);
+                       continue;
+               }
+               if (eqchantdqid(m->from, type, dev, qid, 1)) {
+                       runlock(&m->lock);
+                       runlock(&pg->ns);
+                       return true;
+               }
+               runlock(&m->lock);
+       }
+       runlock(&pg->ns);
+       return false;
 }
-               if(eqchantdqid(m->from, type, dev, qid, 1)) {
+
+int findmount(struct chan **cp, struct mhead **mp, int type, int dev,
+             struct qid qid)
+{
+       struct pgrp *pg;
+       struct mhead *m;
+
+       if (!current)
+               return 0;
+       pg = current->pgrp;
+       rlock(&pg->ns);
+       for (m = MOUNTH(pg, qid); m; m = m->hash) {
+               rlock(&m->lock);
+               if (m->from == NULL) {
+                       printd("m %p m->from 0\n", m);
+                       runlock(&m->lock);
+                       continue;
+               }
+               if (eqchantdqid(m->from, type, dev, qid, 1)) {
                        runlock(&pg->ns);
-                       if(mp != NULL){
+                       if (mp != NULL) {
                                kref_get(&m->ref, 1);
-                               if(*mp != NULL)
+                               if (*mp != NULL)
                                        putmhead(*mp);
                                *mp = m;
                        }
-                       if(*cp != NULL)
+                       if (*cp != NULL)
                                cclose(*cp);
-                       kref_get(&m->mount->to->ref, 1);
+                       chan_incref(m->mount->to);
                        *cp = m->mount->to;
                        runlock(&m->lock);
                        return 1;
@@ -611,14 +698,12 @@ if(m->from == NULL){
        return 0;
 }
 
-int
-domount(struct chan **cp, struct mhead **mp)
+int domount(struct chan **cp, struct mhead **mp)
 {
        return findmount(cp, mp, (*cp)->type, (*cp)->dev, (*cp)->qid);
 }
 
-struct chan*
-undomount(struct chan *c, struct cname *name)
+struct chan *undomount(struct chan *c, struct cname *name)
 {
        ERRSTACK(1);
        struct chan *nc;
@@ -626,30 +711,34 @@ undomount(struct chan *c, struct cname *name)
        struct mount *t;
        struct mhead **h, **he, *f;
 
+       if (!current)
+               return c;
        pg = current->pgrp;
        rlock(&pg->ns);
-       if(waserror()) {
+       if (waserror()) {
                runlock(&pg->ns);
                nexterror();
        }
 
        he = &pg->mnthash[MNTHASH];
-       for(h = pg->mnthash; h < he; h++) {
-               for(f = *h; f; f = f->hash) {
-                       if(strcmp(f->from->name->s, name->s) != 0)
+       for (h = pg->mnthash; h < he; h++) {
+               for (f = *h; f; f = f->hash) {
+                       if (strcmp(f->from->name->s, name->s) != 0)
                                continue;
-                       for(t = f->mount; t; t = t->next) {
-                               if(eqchan(c, t->to, 1)) {
+                       for (t = f->mount; t; t = t->next) {
+                               if (eqchan(c, t->to, 1)) {
                                        /*
-                                        * We want to come out on the left hand side of the mount
-                                        * point using the element of the union that we entered on.
-                                        * To do this, find the element that has a from name of
-                                        * c->name->s.
+                                        * We want to come out on the left hand
+                                        * side of the mount point using the
+                                        * element of the union that we entered
+                                        * on.  To do this, find the element
+                                        * that has a from name of c->name->s.
                                         */
-                                       if(strcmp(t->head->from->name->s, name->s) != 0)
+                                       if (strcmp(t->head->from->name->s,
+                                                  name->s) != 0)
                                                continue;
                                        nc = t->head->from;
-                                       kref_get(&nc->ref, 1);
+                                       chan_incref(nc);
                                        cclose(c);
                                        c = nc;
                                        break;
@@ -666,51 +755,53 @@ undomount(struct chan *c, struct cname *name)
  * Either walks all the way or not at all.  No partial results in *cp.
  * *nerror is the number of names to display in an error message.
  */
-static char Edoesnotexist[] = "does not exist";
-int
-walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
+int walk(struct chan **cp, char **names, int nnames, struct walk_helper *wh,
+         int *nerror)
 {
        int dev, dotdot, i, n, nhave, ntry, type;
-       struct chan *c, *nc;
+       struct chan *c, *nc, *lastmountpoint = NULL;
        struct cname *cname;
        struct mount *f;
        struct mhead *mh, *nmh;
        struct walkqid *wq;
 
        c = *cp;
-       kref_get(&c->ref, 1);
+       chan_incref(c);
        cname = c->name;
        kref_get(&cname->ref, 1);
        mh = NULL;
 
        /*
         * While we haven't gotten all the way down the path:
-        *    1. step through a mount po int unused_int, if any
-        *    2. send a walk request for initial dotdot or initial prefix without dotdot
+        *    1. step through a mount point, if any
+        *    2. send a walk request for initial dotdot or initial prefix
+        *    without dotdot
         *    3. move to the first mountpoint along the way.
         *    4. repeat.
         *
-        * An invariant is that each time through the loop, c is on the undomount
-        * side of the mount po int unused_int, and c's name is cname.
+        * An invariant is that each time through the loop, c is on the
+        * undomount side of the mount point, and c's name is cname.
         */
-       for(nhave=0; nhave<nnames; nhave+=n){
-               if((c->qid.type&QTDIR)==0){
-                       if(nerror)
+       for (nhave = 0; nhave < nnames; nhave += n) {
+               /* We only allow symlink when they are first and it's .. (see
+                * below) */
+               if ((c->qid.type & (QTDIR | QTSYMLINK)) == 0) {
+                       if (nerror)
                                *nerror = nhave;
                        cnameclose(cname);
                        cclose(c);
-                       set_errstr(Enotdir);
-                       if(mh != NULL)
+                       set_error(ENOTDIR, ERROR_FIXME);
+                       if (mh != NULL)
                                putmhead(mh);
                        return -1;
                }
                ntry = nnames - nhave;
-               if(ntry > MAXWELEM)
+               if (ntry > MAXWELEM)
                        ntry = MAXWELEM;
                dotdot = 0;
-               for(i=0; i<ntry; i++){
-                       if(isdotdot(names[nhave+i])){
-                               if(i==0) {
+               for (i = 0; i < ntry; i++) {
+                       if (isdotdot(names[nhave + i])) {
+                               if (i == 0) {
                                        dotdot = 1;
                                        ntry = 1;
                                } else
@@ -719,41 +810,52 @@ walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
                        }
                }
 
-               if(!dotdot && !nomount)
+               if (!dotdot && wh->can_mount)
                        domount(&c, &mh);
+               /* Bug - the only time we walk from a symlink should be during
+                * walk_symlink, which should have given us a dotdot. */
+               if ((c->qid.type & QTSYMLINK) && !dotdot)
+                       panic("Got a walk from a symlink that wasn't ..!");
 
                type = c->type;
                dev = c->dev;
 
-               if((wq = devtab[type]->walk(c, NULL, names+nhave, ntry)) == NULL){
+               if ((wq = devtab[type].walk(c, NULL, names + nhave, ntry)) ==
+                   NULL) {
                        /* try a union mount, if any */
-                       if(mh && !nomount){
+                       if (mh && wh->can_mount) {
                                /*
                                 * mh->mount == c, so start at mh->mount->next
                                 */
                                rlock(&mh->lock);
-                               for(f = mh->mount->next; f; f = f->next)
-                                       if((wq = devtab[f->to->type]->walk(f->to, NULL, names+nhave, ntry)) != NULL)
+                               for (f = mh->mount->next; f; f = f->next)
+                                       if ((wq =
+                                            devtab[f->to->type].walk(f->to,
+                                                                     NULL,
+                                                                     names +
+                                                                     nhave,
+                                                                     ntry)) !=
+                                           NULL)
                                                break;
                                runlock(&mh->lock);
-                               if(f != NULL){
+                               if (f != NULL) {
                                        type = f->to->type;
                                        dev = f->to->dev;
                                }
                        }
-                       if(wq == NULL){
+                       if (wq == NULL) {
                                cclose(c);
                                cnameclose(cname);
-                               if(nerror)
-                                       *nerror = nhave+1;
-                               if(mh != NULL)
+                               if (nerror)
+                                       *nerror = nhave + 1;
+                               if (mh != NULL)
                                        putmhead(mh);
                                return -1;
                        }
                }
 
                nmh = NULL;
-               if(dotdot) {
+               if (dotdot) {
                        assert(wq->nqid == 1);
                        assert(wq->clone != NULL);
 
@@ -762,39 +864,67 @@ walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
                        n = 1;
                } else {
                        nc = NULL;
-                       if(!nomount)
-                               for(i=0; i<wq->nqid && i<ntry-1; i++)
-                                       if(findmount(&nc, &nmh, type, dev, wq->qid[i]))
+                       if (wh->can_mount)
+                               for (i = 0; i < wq->nqid && i < ntry - 1; i++)
+                                       if (findmount(&nc, &nmh, type, dev,
+                                                     wq->qid[i]))
                                                break;
-                       if(nc == NULL){ /* no mount points along path */
-                               if(wq->clone == NULL){
+                       if (nc == NULL) {       /* no mount points along path */
+                               if (wq->clone == NULL) {
                                        cclose(c);
                                        cnameclose(cname);
-                                       if(wq->nqid==0 || (wq->qid[wq->nqid-1].type&QTDIR)){
-                                               if(nerror)
-                                                       *nerror = nhave+wq->nqid+1;
-                                               set_errstr(Edoesnotexist);
-                                       }else{
-                                               if(nerror)
-                                                       *nerror = nhave+wq->nqid;
-                                               set_errstr(Enotdir);
+                                       if (wq->nqid == 0 ||
+                                           (wq->qid[wq->nqid - 1].type &
+                                            QTDIR)) {
+                                               if (nerror)
+                                                       *nerror = nhave +
+                                                               wq->nqid + 1;
+                                               set_error(ENOENT,
+                                                         "walk failed");
+                                       } else {
+                                               if (nerror)
+                                                       *nerror = nhave +
+                                                               wq->nqid;
+                                               set_error(ENOTDIR,
+                                                         "walk failed");
                                        }
                                        kfree(wq);
-                                       if(mh != NULL)
+                                       if (mh != NULL)
                                                putmhead(mh);
                                        return -1;
                                }
                                n = wq->nqid;
                                nc = wq->clone;
-                       }else{          /* stopped early, at a mount point */
-                               if(wq->clone != NULL){
+                       } else {        /* stopped early, at a mount point */
+                               if (wq->clone != NULL) {
                                        cclose(wq->clone);
                                        wq->clone = NULL;
                                }
-                               n = i+1;
+                               lastmountpoint = nc;
+                               n = i + 1;
+                       }
+                       if (nc->qid.type & QTSYMLINK) {
+                               struct chan *old_nc = nc;
+
+                               nc = walk_symlink(old_nc, wh,
+                                                 nnames - nhave - n);
+                               if (!nc) {
+                                       /* walk_symlink() set error.
+                                        * This seems to be the standard
+                                        * walk() error-cleanup. */
+                                       if (nerror)
+                                               *nerror = nhave + n;
+                                       cclose(c);
+                                       cclose(old_nc);
+                                       cnameclose(cname);
+                                       kfree(wq);
+                                       if (mh != NULL)
+                                               putmhead(mh);
+                                       return -1;
+                               }
                        }
-                       for(i=0; i<n; i++)
-                               cname = addelem(cname, names[nhave+i]);
+                       for (i = 0; i < n; i++)
+                               cname = addelem(cname, names[nhave + i]);
                }
                cclose(c);
                c = nc;
@@ -807,7 +937,7 @@ walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
 
        c = cunique(c);
 
-       if(c->umh != NULL){     //BUG
+       if (c->umh != NULL) {   //BUG
                printd("walk umh\n");
                putmhead(c->umh);
                c->umh = NULL;
@@ -815,10 +945,11 @@ walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
 
        cnameclose(c->name);
        c->name = cname;
+       c->mountpoint = lastmountpoint;
 
        cclose(*cp);
        *cp = c;
-       if(nerror)
+       if (nerror)
                *nerror = 0;
        return 0;
 }
@@ -826,20 +957,19 @@ walk(struct chan **cp, char **names, int nnames, int nomount, int *nerror)
 /*
  * c is a mounted non-creatable directory.  find a creatable one.
  */
-struct chan*
-createdir(struct chan *c, struct mhead *m)
+struct chan *createdir(struct chan *c, struct mhead *m)
 {
        ERRSTACK(1);
        struct chan *nc;
        struct mount *f;
 
        rlock(&m->lock);
-       if(waserror()) {
+       if (waserror()) {
                runlock(&m->lock);
                nexterror();
        }
-       for(f = m->mount; f; f = f->next) {
-               if(f->mflag&MCREATE) {
+       for (f = m->mount; f; f = f->next) {
+               if (f->mflag & MCREATE) {
                        nc = cclone(f->to);
                        runlock(&m->lock);
                        poperror();
@@ -847,7 +977,7 @@ createdir(struct chan *c, struct mhead *m)
                        return nc;
                }
        }
-       error(Enocreate);
+       error(EPERM, ERROR_FIXME);
        poperror();
        return 0;
 }
@@ -855,14 +985,13 @@ createdir(struct chan *c, struct mhead *m)
 /*
  * In place, rewrite name to compress multiple /, eliminate ., and process ..
  */
-void
-cleancname(struct cname *n)
+void cleancname(struct cname *n)
 {
        char *p;
 
-       if(n->s[0] == '#'){
+       if (n->s[0] == '#') {
                p = strchr(n->s, '/');
-               if(p == NULL)
+               if (p == NULL)
                        return;
                cleanname(p);
 
@@ -870,27 +999,26 @@ cleancname(struct cname *n)
                 * The correct name is #i rather than #i/,
                 * but the correct name of #/ is #/.
                 */
-               if(strcmp(p, "/")==0 && n->s[1] != '/')
+               if (strcmp(p, "/") == 0 && n->s[1] != '/')
                        *p = '\0';
-       }else
+       } else
                cleanname(n->s);
        n->len = strlen(n->s);
 }
 
-static void
-growparse(Elemlist *e)
+static void growparse(Elemlist * e)
 {
        char **new;
        int *inew;
        enum { Delta = 8 };
 
-       if(e->ARRAY_SIZEs % Delta == 0){
+       if (e->ARRAY_SIZEs % Delta == 0) {
                new = kzmalloc((e->ARRAY_SIZEs + Delta) * sizeof(char *), 0);
-               memmove(new, e->elems, e->ARRAY_SIZEs*sizeof( char *));
+               memmove(new, e->elems, e->ARRAY_SIZEs * sizeof(char *));
                kfree(e->elems);
                e->elems = new;
                inew = kzmalloc((e->ARRAY_SIZEs + Delta + 1) * sizeof(int), 0);
-               memmove(inew, e->off, e->ARRAY_SIZEs*sizeof(int));
+               memmove(inew, e->off, e->ARRAY_SIZEs * sizeof(int));
                kfree(e->off);
                e->off = inew;
        }
@@ -905,8 +1033,7 @@ growparse(Elemlist *e)
  * reject, e.g., "/adm/users/." when /adm/users is a file
  * rather than a directory.
  */
-static void
-parsename(char *name, Elemlist *e)
+static void parsename(char *name, Elemlist * e)
 {
        char *slash;
 
@@ -916,21 +1043,21 @@ parsename(char *name, Elemlist *e)
        e->elems = NULL;
        e->off = kzmalloc(sizeof(int), 0);
        e->off[0] = skipslash(name) - name;
-       for(;;){
+       for (;;) {
                name = skipslash(name);
-               if(*name=='\0'){
+               if (*name == '\0') {
                        e->mustbedir = 1;
                        break;
                }
                growparse(e);
-               
+
                e->elems[e->ARRAY_SIZEs++] = name;
                /* we may want to do this again some day
-               slash = utfrune(name, '/');
-               */
+                  slash = utfrune(name, '/');
+                */
                slash = strchr(name, '/');
-               if(slash == NULL){
-                       e->off[e->ARRAY_SIZEs] = name+strlen(name) - e->name;
+               if (slash == NULL) {
+                       e->off[e->ARRAY_SIZEs] = name + strlen(name) - e->name;
                        e->mustbedir = 0;
                        break;
                }
@@ -940,14 +1067,13 @@ parsename(char *name, Elemlist *e)
        }
 }
 
-void*
-memrchr(void *va, int c, long n)
+void *memrchr(void *va, int c, long n)
 {
        uint8_t *a, *e;
 
        a = va;
-       for(e=a+n-1; e>a; e--)
-               if(*e == c)
+       for (e = a + n - 1; e > a; e--)
+               if (*e == c)
                        return e;
        return NULL;
 }
@@ -959,7 +1085,7 @@ memrchr(void *va, int c, long n)
  * Opening with amode Aopen, Acreate, or Aremove guarantees
  * that the result will be the only reference to that particular fid.
  * This is necessary since we might pass the result to
- * devtab[]->remove().
+ * devtab[].remove().
  *
  * Opening Atodir, Amount, or Aaccess does not guarantee this.
  *
@@ -967,174 +1093,146 @@ memrchr(void *va, int c, long n)
  * correct Chan* but with an incorrect struct cname attached.
  * Since the functions that open Aaccess (sysstat, syswstat, sys_stat)
  * do not use the struct cname*, this avoids an unnecessary clone.
- */
-struct chan*
-namec(char *aname, int amode, int omode, uint32_t perm)
+ *
+ * The classic namec() is broken into a front end to get the starting point and
+ * a __namec_from, which does the guts of the lookup.  */
+static struct chan *__namec_from(struct chan *c, char *aname, int amode,
+                                 int omode, uint32_t perm,
+                                 struct walk_helper *wh, void *ext)
 {
        ERRSTACK(2);
-       int n, prefix, len, t, nomount, npath;
-       struct chan *c, *cnew;
+       int len, npath;
+       struct chan *cnew, *renamee;
        struct cname *cname;
        Elemlist e;
        struct mhead *m;
        char tmperrbuf[ERRMAX];
-       char *name;
+       int saved_errno;
        // Rune r;
 
-       name = aname;
-       if(name[0] == '\0')
-               error("empty file name");
-       validname(name, 1);
-
-       /*
-        * Find the starting off point (the current slash, the root of
-        * a device tree, or the current dot) as well as the name to
-        * evaluate starting there.
-        */
-       nomount = 0;
-       switch(name[0]){
-       case '/':
-               c = current->slash;
-               if (! c)
-                       panic("no slash!");
-               kref_get(&c->ref, 1);
-               break;
-       
-       case '#':
-               nomount = 1;
-               get_cur_genbuf()[0] = '\0';
-               n = 0;
-               while(*name!='\0' && (*name != '/' || n < 2)){
-                       if(n >= sizeof(get_cur_genbuf())-1)
-                               error(Efilename);
-                       get_cur_genbuf()[n++] = *name++;
-               }
-               get_cur_genbuf()[n] = '\0';
-#if 0
-               n = chartorune(&r, get_cur_genbuf()+1)+1;
-               if(r == 'M')
-                       error(Enoattach);
-#endif
-               if (get_cur_genbuf()[1] == 'M')
-                       error(Enoattach);
-               /*
-                *  the nodevs exceptions are
-                *      |  it only gives access to pipes you create
-                *      e  this process's environment
-                *      s  private file2chan creation space
-                *      D private secure sockets name space
-                *      a private TLS name space
-                */
-               if(current->pgrp->nodevs &&
-                  //              (utfrune("|esDa", r) == NULL
-                  ((strchr("|esDa", get_cur_genbuf()[1]) == NULL)
-                   || (get_cur_genbuf()[1] == 's' // || r == 's'
-                       && get_cur_genbuf()[n]!='\0')))
-                       error(Enoattach);
-               t = devno(/*r*/get_cur_genbuf()[1], 1);
-               if(t == -1)
-                       error(Ebadsharp);
-               c = devtab[t]->attach(get_cur_genbuf()+n);
-               break;
-
-       default:
-               c = current->dot;
-               if (! c)
-                       panic("no dot!");
-               kref_get(&c->ref, 1);
-               break;
-       }
-       prefix = name - aname;
+       static_assert(!(CINTERNAL_FLAGS & CEXTERNAL_FLAGS));
 
        e.name = NULL;
        e.elems = NULL;
        e.off = NULL;
        e.ARRAY_SIZEs = 0;
-       if(waserror()){
+       if (waserror()) {
                cclose(c);
                kfree(e.name);
                kfree(e.elems);
                kfree(e.off);
-//dumpmount();
+               //dumpmount();
                nexterror();
        }
 
        /*
         * Build a list of elements in the path.
         */
-       parsename(name, &e);
+       parsename(aname, &e);
 
-       /*
-        * On create, ....
-        */
-       if(amode == Acreate){
+       if (e.mustbedir)
+               omode &= ~O_NOFOLLOW;
+
+       switch (amode) {
+       case Acreate:
                /* perm must have DMDIR if last element is / or /. */
-               if(e.mustbedir && !(perm&DMDIR)){
+               if (e.mustbedir && !(perm & DMDIR)) {
                        npath = e.ARRAY_SIZEs;
-                       strncpy(tmperrbuf,  "create without DMDIR", sizeof(tmperrbuf));
-                       goto NameError;
+                       error(EINVAL, "create without DMDIR");
                }
-
                /* don't try to walk the last path element just yet. */
-               if(e.ARRAY_SIZEs == 0)
-                       error(Eexist);
+               if (e.ARRAY_SIZEs == 0)
+                       error(EEXIST, ERROR_FIXME);
                e.ARRAY_SIZEs--;
+               /* We're dropping the last element, which O_NOFOLLOW applied to.
+                * Not sure if there are any legit reasons to have O_NOFOLLOW
+                * with create.*/
+               omode &= ~O_NOFOLLOW;
+               break;
+       case Arename:
+               if (e.ARRAY_SIZEs == 0)
+                       error(EINVAL, "rename needs at least one name");
+               e.ARRAY_SIZEs--;
+               omode &= ~O_NOFOLLOW;
+               break;
+       /* the difference for stat and lstat (Aaccess) are handled in sysfile.c
+        */
+       case Abind:
+       case Amount:
+       case Aremove:
+               omode |= O_NOFOLLOW;
+               break;
        }
 
-       if(walk(&c, e.elems, e.ARRAY_SIZEs, nomount, &npath) < 0){
-               if(npath < 0 || npath > e.ARRAY_SIZEs){
+       if (omode & O_NOFOLLOW)
+               wh->no_follow = true;
+
+       if (walk(&c, e.elems, e.ARRAY_SIZEs, wh, &npath) < 0) {
+               if (npath < 0 || npath > e.ARRAY_SIZEs) {
                        printd("namec %s walk error npath=%d\n", aname, npath);
-                       error("walk failed");
+                       error(EFAIL, "walk failed");
                }
-       NameError:
-               error("some kinda name error");
-               /* brho: skipping the namec custom error string business, since it hides
-                * the underlying failure.  implement this if you want the old stuff. */
+               /* Old plan 9 errors would jump here for the magic error
+                * parsing. */
+NameError:
+               if (current_errstr()[0]) {
+                       /* errstr is set, just stick with it and error out */
+                       error_jmp();
+               } else {
+                       error(EFAIL, "Name to chan lookup failed");
+               }
+               /* brho: skipping the namec custom error string business, since
+                * it hides the underlying failure.  implement this if you want
+                * the old stuff. */
 #if 0
-               strncpy(tmperrbuf,  current->errstr, sizeof(tmperrbuf));
-               len = prefix+e.off[npath];
-               if(len < ERRMAX/3 || (name=memrchr(aname, '/', len))==NULL || name==aname)
-                       snprintf(get_cur_genbuf(), sizeof current->genbuf, "%.*s", len, aname);
+               strlcpy(tmperrbuf, current->errstr, sizeof(tmperrbuf));
+               // prefix was name - aname, the start pt
+               len = prefix + e.off[npath];
+               if (len < ERRMAX / 3 || (name = memrchr(aname, '/', len)) ==
+                   NULL || name == aname)
+                       snprintf(get_cur_genbuf(), sizeof current->genbuf,
+                                "%.*s", len, aname);
                else
-                       snprintf(get_cur_genbuf(), sizeof current->genbuf, "...%.*s", (int)(len-(name-aname)), name);
-               snprintf(current->errstr, ERRMAX, "%#q %s", get_cur_genbuf(), tmperrbuf);
+                       snprintf(get_cur_genbuf(), sizeof current->genbuf,
+                                "...%.*s", (int)(len - (name - aname)), name);
+               snprintf(current->errstr, ERRMAX, "%#q %s", get_cur_genbuf(),
+                                tmperrbuf);
 #endif
        }
 
-       if(e.mustbedir && !(c->qid.type&QTDIR)){
+       if (e.mustbedir && !(c->qid.type & QTDIR)) {
                npath = e.ARRAY_SIZEs;
-               strncpy(tmperrbuf,  "not a directory", sizeof(tmperrbuf));
-               goto NameError;
+               error(ENOTDIR, "not a dir, but mustbedir.  trailing slash?");
        }
 
-       if(amode == Aopen && (omode&3) == OEXEC && (c->qid.type&QTDIR)){
+       if ((amode == Aopen) && (omode & O_EXEC) && (c->qid.type & QTDIR)) {
                npath = e.ARRAY_SIZEs;
-               error("cannot exec directory");
+               error(EFAIL, "cannot exec directory");
        }
 
-       switch(amode){
+       switch (amode) {
        case Aaccess:
-               if(!nomount)
+               if (wh->can_mount)
                        domount(&c, NULL);
                break;
 
        case Abind:
                m = NULL;
-               if(!nomount)
+               if (wh->can_mount)
                        domount(&c, &m);
-               if(c->umh != NULL)
+               if (c->umh != NULL)
                        putmhead(c->umh);
                c->umh = m;
                break;
 
        case Aremove:
        case Aopen:
-       Open:
+Open:
                /* save the name; domount might change c */
                cname = c->name;
                kref_get(&cname->ref, 1);
                m = NULL;
-               if(!nomount)
+               if (wh->can_mount)
                        domount(&c, &m);
 
                /* our own copy to open or remove */
@@ -1144,45 +1242,49 @@ namec(char *aname, int amode, int omode, uint32_t perm)
                cnameclose(c->name);
                c->name = cname;
 
-               switch(amode){
+               switch (amode) {
                case Aremove:
                        putmhead(m);
                        break;
 
                case Aopen:
                case Acreate:
-if(c->umh != NULL){
-       printd("cunique umh\n");
-       putmhead(c->umh);
-       c->umh = NULL;
-}
+                       if (c->umh != NULL) {
+                               printd("cunique umh\n");
+                               putmhead(c->umh);
+                               c->umh = NULL;
+                       }
 
-                       /* only save the mount head if it's a multiple element union */
-                       if(m && m->mount && m->mount->next)
+                       /* only save the mount head if it's a multiple element
+                        * union */
+                       if (m && m->mount && m->mount->next)
                                c->umh = m;
                        else
                                putmhead(m);
-
-                       if(omode == OEXEC)
-                               c->flag &= ~CCACHE;
-
-                       c = devtab[c->type]->open(c, omode&~OCEXEC);
-
-                       if(omode & OCEXEC)
-                               c->flag |= CCEXEC;
-                       if(omode & ORCLOSE)
-                               c->flag |= CRCLOSE;
+                       /* here is where convert omode/vfs flags to c->flags.
+                        * careful, O_CLOEXEC and O_REMCLO are in there.  might
+                        * need to change that. */
+                       c->flag |= omode & CEXTERNAL_FLAGS;
+                       c = devtab[c->type].open(c,
+                                                omode & ~O_CLOEXEC);
+                       /* if you get this from a dev, in the dev's open, you
+                        * are probably saving mode directly, without passing it
+                        * through openmode. */
+                       if (c->mode & O_TRUNC)
+                               error(EFAIL,
+                                     "Device %s open failed to clear O_TRUNC",
+                                     devtab[c->type].name);
                        break;
                }
                break;
 
        case Atodir:
                /*
-                * Directories (e.g. for cd) are left before the mount po int unused_int,
+                * Directories (e.g. for cd) are left before the mount point,
                 * so one may mount on / or . and see the effect.
                 */
-               if(!(c->qid.type & QTDIR))
-                       error(Enotdir);
+               if (!(c->qid.type & QTDIR))
+                       error(ENOTDIR, ERROR_FIXME);
                break;
 
        case Amount:
@@ -1193,6 +1295,52 @@ if(c->umh != NULL){
                 */
                break;
 
+       case Arename:
+               /* We already walked to the parent of new_path, which is in c.
+                * We're a lot like create here - need to find mounts, etc.  On
+                * the way out, we putmhead if we have an m, and clean up our
+                * chans.  On success, c becomes cnew (thus close the old c).
+                * On failure, we just close cnew. */
+               if (!(c->qid.type & QTDIR))
+                       error(ENOTDIR, "rename target parent is not a dir");
+               e.ARRAY_SIZEs++;
+               m = NULL;
+               cnew = NULL;
+               if (waserror()) {
+                       /* rename or createdir failed */
+                       cclose(cnew);
+                       if (m)
+                               putmhead(m);
+                       nexterror();    /* safe since we're in a waserror() */
+               }
+               if (wh->can_mount && findmount(&cnew, &m, c->type, c->dev,
+                                              c->qid)) {
+                       cnew = createdir(cnew, m);
+               } else {
+                       cnew = c;
+                       chan_incref(cnew);
+               }
+               cnew = cunique(cnew);
+               cnameclose(cnew->name);
+               cnew->name = c->name;
+               kref_get(&cnew->name->ref, 1);
+               /* At this point, we have our new_path parent chan (cnew) and
+                * the renamee chan */
+               renamee = ext;
+               if (cnew->type != renamee->type)
+                       error(EXDEV, "can't rename across device types");
+
+               devtab[cnew->type].rename(renamee, cnew,
+                                         e.elems[e.ARRAY_SIZEs - 1], 0);
+               poperror();
+
+               if (m)
+                       putmhead(m);
+               cclose(c);
+               c = cnew;
+               c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs - 1]);
+               break;
+
        case Acreate:
                /*
                 * We've already walked all but the last element.
@@ -1200,44 +1348,47 @@ if(c->umh != NULL){
                 * If omode&OEXCL is set, just give up.
                 */
                e.ARRAY_SIZEs++;
-               if(walk(&c, e.elems+e.ARRAY_SIZEs-1, 1, nomount, NULL) == 0){
-                       if(omode&OEXCL)
-                               error(Eexist);
-                       omode |= OTRUNC;
+               if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) == 0) {
+                       if (omode & O_EXCL)
+                               error(EEXIST, ERROR_FIXME);
+                       omode |= O_TRUNC;
                        goto Open;
                }
 
                /*
                 * The semantics of the create(2) system call are that if the
-                * file exists and can be written, it is to be opened with truncation.
-                * On the other hand, the create(5) message fails if the file exists.
-                * If we get two create(2) calls happening simultaneously, 
-                * they might both get here and send create(5) messages, but only 
-                * one of the messages will succeed.  To provide the expected create(2)
-                * semantics, the call with the failed message needs to try the above
-                * walk again, opening for truncation.  This correctly solves the 
-                * create/create race, in the sense that any observable outcome can
-                * be explained as one happening before the other.
-                * The create/create race is quite common.  For example, it happens
-                * when two rc subshells simultaneously update the same
-                * environment variable.
+                * file exists and can be written, it is to be opened with
+                * truncation.  On the other hand, the create(5) message fails
+                * if the file exists.
+                *
+                * If we get two create(2) calls happening simultaneously, they
+                * might both get here and send create(5) messages, but only one
+                * of the messages will succeed.  To provide the expected
+                * create(2) semantics, the call with the failed message needs
+                * to try the above walk again, opening for truncation.  This
+                * correctly solves the create/create race, in the sense that
+                * any observable outcome can be explained as one happening
+                * before the other.  The create/create race is quite common.
+                * For example, it happens when two rc subshells simultaneously
+                * update the same environment variable.
                 *
                 * The implementation still admits a create/create/remove race:
                 * (A) walk to file, fails
                 * (B) walk to file, fails
-                * (A) create file, succeeds, returns 
+                * (A) create file, succeeds, returns
                 * (B) create file, fails
                 * (A) remove file, succeeds, returns
                 * (B) walk to file, return failure.
                 *
-                * This is hardly as common as the create/create race, and is really
-                * not too much worse than what might happen if (B) got a hold of a
-                * file descriptor and then the file was removed -- either way (B) can't do
-                * anything with the result of the create call.  So we don't care about this race.
+                * This is hardly as common as the create/create race, and is
+                * really not too much worse than what might happen if (B) got a
+                * hold of a file descriptor and then the file was removed --
+                * either way (B) can't do anything with the result of the
+                * create call.  So we don't care about this race.
                 *
-                * Applications that care about more fine-grained decision of the races
-                * can use the OEXCL flag to get at the underlying create(5) semantics;
-                * by default we provide the common case.
+                * Applications that care about more fine-grained decision of
+                * the races can use the OEXCL flag to get at the underlying
+                * create(5) semantics; by default we provide the common case.
                 *
                 * We need to stay behind the mount point in case we
                 * need to do the first walk again (should the create fail).
@@ -1245,60 +1396,66 @@ if(c->umh != NULL){
                 * We also need to cross the mount point and find the directory
                 * in the union in which we should be creating.
                 *
-                * The channel staying behind is c, the one moving forward is cnew.
+                * The channel staying behind is c, the one moving forward is
+                * cnew.
                 */
                m = NULL;
                cnew = NULL;    /* is this assignment necessary? */
                /* discard error */
-               if(!waserror()){        /* try create */
-                       if(!nomount && findmount(&cnew, &m, c->type, c->dev, c->qid))
+               if (!waserror()) {      /* try create */
+                       if (wh->can_mount &&
+                           findmount(&cnew, &m, c->type, c->dev, c->qid))
                                cnew = createdir(cnew, m);
-                       else{
+                       else {
                                cnew = c;
-                               kref_get(&cnew->ref, 1);
+                               chan_incref(cnew);
                        }
 
                        /*
-                        * We need our own copy of the Chan because we're
-                        * about to send a create, which will move it.  Once we have
-                        * our own copy, we can fix the name, which might be wrong
-                        * if findmount gave us a new Chan.
+                        * We need our own copy of the Chan because we're about
+                        * to send a create, which will move it.  Once we have
+                        * our own copy, we can fix the name, which might be
+                        * wrong if findmount gave us a new Chan.
                         */
                        cnew = cunique(cnew);
                        cnameclose(cnew->name);
                        cnew->name = c->name;
                        kref_get(&cnew->name->ref, 1);
 
-                       devtab[cnew->type]->create(cnew, e.elems[e.ARRAY_SIZEs-1], omode&~(OEXCL|OCEXEC), perm);
+                       cnew->flag |= omode & CEXTERNAL_FLAGS;
+                       devtab[cnew->type].create(cnew,
+                                                 e.elems[e.ARRAY_SIZEs - 1],
+                                                 omode & ~(O_EXCL | O_CLOEXEC),
+                                                 perm, ext);
                        poperror();
-                       if(omode & OCEXEC)
-                               cnew->flag |= CCEXEC;
-                       if(omode & ORCLOSE)
-                               cnew->flag |= CRCLOSE;
-                       if(m)
+
+                       if (m)
                                putmhead(m);
                        cclose(c);
                        c = cnew;
-                       c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs-1]);
+                       c->name = addelem(c->name, e.elems[e.ARRAY_SIZEs - 1]);
                        break;
                }
 
                /* create failed */
                cclose(cnew);
-               if(m)
+               if (m)
                        putmhead(m);
-               if(omode & OEXCL)
+               if (omode & O_EXCL)
                        nexterror();    /* safe since we're in a waserror() */
-               poperror();             /* matching the if(!waserror) */
+               poperror();     /* matching the if(!waserror) */
 
-               /* save error, so walk doesn't clobber our existing errstr */         
-               strncpy(tmperrbuf, current_errstr(), MAX_ERRSTR_LEN);                 
+               /* save error, so walk doesn't clobber our existing errstr */
+               strlcpy(tmperrbuf, current_errstr(), sizeof(tmperrbuf));
+               saved_errno = get_errno();
                /* note: we depend that walk does not error */
-               if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, nomount, NULL) < 0) {
-                       error(tmperrbuf);   /* report the error we had originally */      
+               if (walk(&c, e.elems + e.ARRAY_SIZEs - 1, 1, wh, NULL) < 0) {
+                       set_errno(saved_errno);
+                       /* Report the error we had originally */
+                       error(EFAIL, tmperrbuf);
                }
-               strncpy(current_errstr(), tmperrbuf, MAX_ERRSTR_LEN); 
-               omode |= OTRUNC;
+               strlcpy(current_errstr(), tmperrbuf, MAX_ERRSTR_LEN);
+               omode |= O_TRUNC;
                goto Open;
 
        default:
@@ -1307,10 +1464,11 @@ if(c->umh != NULL){
 
        poperror();
 
-       if(e.ARRAY_SIZEs > 0)
-               strncpy(get_cur_genbuf(), e.elems[e.ARRAY_SIZEs-1], GENBUF_SZ);
+       if (e.ARRAY_SIZEs > 0)
+               strlcpy(get_cur_genbuf(), e.elems[e.ARRAY_SIZEs - 1],
+                       GENBUF_SZ);
        else
-               strncpy(get_cur_genbuf(), ".", GENBUF_SZ);
+               strlcpy(get_cur_genbuf(), ".", GENBUF_SZ);
 
        kfree(e.name);
        kfree(e.elems);
@@ -1319,24 +1477,123 @@ if(c->umh != NULL){
        return c;
 }
 
+struct chan *namec(char *name, int amode, int omode, uint32_t perm, void *ext)
+{
+       struct walk_helper wh = {.can_mount = true};
+       struct chan *c;
+       char *devname, *devspec;
+       int n, devtype;
+
+       if (name[0] == '\0')
+               error(EFAIL, "empty file name");
+       validname(name, 1);
+       /*
+        * Find the starting off point (the current slash, the root of
+        * a device tree, or the current dot) as well as the name to
+        * evaluate starting there.
+        */
+       switch (name[0]) {
+       case '/':
+               if (current)
+                       c = current->slash;
+               else
+                       c = kern_slash;
+               chan_incref(c);
+               break;
+
+       case '#':
+               wh.can_mount = false;
+               devname = get_cur_genbuf();
+               devname[0] = '\0';
+               n = 0;
+               name++; /* drop the # */
+               while ((*name != '\0') && (*name != '/')) {
+                       if (n >= GENBUF_SZ - 1)
+                               error(ENAMETOOLONG, ERROR_FIXME);
+                       devname[n++] = *name++;
+               }
+               devname[n] = '\0';
+               /* for a name #foo.spec, devname = foo\0, devspec = spec\0.
+                * genbuf contains foo\0spec\0.  for no spec, devspec = \0 */
+               devspec = strchr(devname, '.');
+               if (devspec) {
+                       *devspec = '\0';
+                       devspec++;
+               } else {
+                       devspec = &devname[n];
+               }
+               /* These devices have special attach functions that treat the
+                * char * as a blob pointer */
+               if (!strcmp(devname, "mnt"))
+                       error(EINVAL, "can't namec-attach #mnt");
+               if (!strcmp(devname, "gtfs"))
+                       error(EINVAL, "can't namec-attach #gtfs");
+               /* TODO: deal with this "nodevs" business. */
+               #if 0
+               /*
+                *  the nodevs exceptions are
+                *  |  it only gives access to pipes you create
+                *  e  this process's environment
+                *  s  private file2chan creation space
+                *  D private secure sockets name space
+                *  a private TLS name space
+                */
+               if (current->pgrp->nodevs &&
+                       //          (utfrune("|esDa", r) == NULL
+                       ((strchr("|esDa", get_cur_genbuf()[1]) == NULL)
+                        || (get_cur_genbuf()[1] == 's' // || r == 's'
+                                && get_cur_genbuf()[n] != '\0')))
+                       error(EINVAL, ERROR_FIXME);
+               #endif
+               devtype = devno(devname, 1);
+               if (devtype == -1)
+                       error(EFAIL, "Unknown #device %s (spec %s)", devname,
+                             devspec);
+               c = devtab[devtype].attach(devspec);
+               break;
+       default:
+               /* this case also covers \0 */
+               c = current->dot;
+               if (!c)
+                       panic("no dot!");
+               chan_incref(c);
+               break;
+       }
+       return __namec_from(c, name, amode, omode, perm, &wh, ext);
+}
+
+struct chan *namec_from(struct chan *c, char *name, int amode, int omode,
+                        uint32_t perm, void *ext)
+{
+       struct walk_helper wh = {.can_mount = true};
+
+       if (name[0] == '\0') {
+               /* Our responsibility to cclose 'c' on our error */
+               cclose(c);
+               error(EFAIL, "empty file name");
+       }
+       validname(name, 1);
+       return __namec_from(c, name, amode, omode, perm, &wh, ext);
+}
+
 /*
  * name is valid. skip leading / and ./ as much as possible
  */
-char*
-skipslash(char *name)
+char *skipslash(char *name)
 {
-       while(name[0]=='/' || (name[0]=='.' && (name[1]==0 || name[1]=='/')))
+       while (name[0] == '/'
+                  || (name[0] == '.' && (name[1] == 0 || name[1] == '/')))
                name++;
        return name;
 }
 
-char isfrog[256]={
-       /*NUL*/ 1, 1, 1, 1, 1, 1, 1, 1,
-       /*BKS*/ 1, 1, 1, 1, 1, 1, 1, 1,
-       /*DLE*/ 1, 1, 1, 1, 1, 1, 1, 1,
-       /*CAN*/ 1, 1, 1, 1, 1, 1, 1, 1,
-       ['/']   1,
-       [0x7f]  1,
+char isfrog[256] = {
+        /*NUL*/ 1, 1, 1, 1, 1, 1, 1, 1,
+        /*BKS*/ 1, 1, 1, 1, 1, 1, 1, 1,
+        /*DLE*/ 1, 1, 1, 1, 1, 1, 1, 1,
+        /*CAN*/ 1, 1, 1, 1, 1, 1, 1, 1,
+       ['/'] 1,
+       [0x7f] 1,
 };
 
 /*
@@ -1349,43 +1606,42 @@ char isfrog[256]={
  * The parameter slashok flags whether a slash character is an error
  * or a valid character.
  */
-void
-validname(char *aname, int slashok)
+void validname(char *aname, int slashok)
 {
        char *ename, *name;
        int c;
 
        name = aname;
-       ename = memchr(name, 0, (1<<16));
+       ename = memchr(name, 0, (1 << 16));
 
-       if(ename==NULL || ename-name>=(1<<16))
-               error("name too long");
+       if (ename == NULL || ename - name >= (1 << 16))
+               error(EINVAL, "Name too long");
 
-       while(*name){
+       while (*name) {
                /* all characters above '~' are ok */
-               c = *( uint8_t *)name;
-#if 0      
-               if(c >= Runeself)
+               c = *(uint8_t *) name;
+#if 0
+               if (c >= Runeself)
                        name += chartorune(&r, name);
 #endif
-               if(c >= 0x7f){
-                       error("Akaros doesn't do UTF-8");
+               if (c >= 0x7f) {
+                       error(EFAIL, "Akaros doesn't do UTF-8");
                } else {
-                       if(isfrog[c])
-                               if(!slashok || c!='/'){
-                                       error("%s: %q", Ebadchar, aname);
-                       }
+                       if (isfrog[c])
+                               if (!slashok || c != '/') {
+                                       error(EINVAL, "%s (%p), at char %c",
+                                             aname, aname, c);
+                               }
                        name++;
                }
        }
 }
 
-void
-isdir(struct chan *c)
+void isdir(struct chan *c)
 {
-       if(c->qid.type & QTDIR)
+       if (c->qid.type & QTDIR)
                return;
-       error(Enotdir);
+       error(ENOTDIR, ERROR_FIXME);
 }
 
 /*
@@ -1402,14 +1658,111 @@ isdir(struct chan *c)
  * The mount list is deleted when we cunmount.
  * The RWlock ensures that nothing is using the mount list at that time.
  *
- * It is okay to replace c->mh with whatever you want as 
+ * It is okay to replace c->mh with whatever you want as
  * long as you are sure you have a unique reference to it.
  *
  * This comment might belong somewhere else.
  */
-void
-putmhead(struct mhead *m)
+void putmhead(struct mhead *m)
 {
        if (m)
                kref_put(&m->ref);
 }
+
+/* Given s, make a copy of a string with padding bytes in front.  Returns a
+ * pointer to the start of the string and the memory to free in str_store.
+ *
+ * Free str_store with kfree. */
+static char *pad_and_strdup(char *s, int padding, char **str_store)
+{
+       char *store = kzmalloc(strlen(s) + 1 + padding, MEM_WAIT);
+
+       strlcpy(store + padding, s, strlen(s) + 1);
+       *str_store = store;
+       return store + padding;
+}
+
+/* Walks a symlink c.  Returns the target chan, which could be the symlink
+ * itself, if we're NO_FOLLOW.  On success, we'll decref the symlink and give
+ * you a ref counted result.
+ *
+ * Returns NULL on error, and does not close the symlink.  Like regular walk, it
+ * is all or nothing. */
+static struct chan *walk_symlink(struct chan *symlink, struct walk_helper *wh,
+                                 unsigned int nr_names_left)
+{
+       struct dir *dir;
+       char *link_name, *link_store;
+       struct chan *from;
+       bool old_nofollow;
+       Elemlist e = {0};
+
+       /* mildly expensive: need to rlock the namespace */
+       if (is_mount_point(symlink))
+               return symlink;
+       if (!nr_names_left && wh->no_follow)
+               return symlink;
+       if (wh->nr_loops >= WALK_MAX_NR_LOOPS) {
+               set_error(ELOOP, "too many nested symlinks in walk");
+               return NULL;
+       }
+       dir = chandirstat(symlink);
+       if (!dir) {
+               /* Should propagate the error from dev.stat() */
+               return NULL;
+       }
+       if (!(dir->mode & DMSYMLINK)) {
+               set_error(ELOOP, "symlink isn't a symlink!");
+               kfree(dir);
+               return NULL;
+       }
+       link_name = pad_and_strdup(dir->ext, 3, &link_store);
+       kfree(dir);
+
+       if (link_name[0] == '/') {
+               if (current)
+                       from = current->slash;
+               else
+                       from = kern_slash;
+       } else {
+               from = symlink;
+               link_name -= 3;
+               strncpy(link_name, "../", 3);
+               if (!from->name)
+                       from->name = newcname("");
+       }
+       /* we close this ref on failure or it gets walked to the result. */
+       chan_incref(from);
+
+       parsename(link_name, &e);
+       kfree(link_store);
+
+       wh->nr_loops++;
+       /* no_follow applies to the outermost walk, i.e. the one that the
+        * original namec performs.  At this point, we've decided that we're
+        * going to try and follow a symlink: even if its no_follow, that only
+        * applies to the last link in the original path.  Our sub-walks are not
+        * no_follow.
+        *
+        * Note the other wh vars need to stay with the walk: nr_loops,
+        * since its our method of detecting symlink loops, and can_mount, which
+        * is a property of the overall namec() call. */
+       old_nofollow = wh->no_follow;
+       wh->no_follow = false;
+       if (walk(&from, e.elems, e.ARRAY_SIZEs, wh, NULL) < 0) {
+               cclose(from);
+               from = NULL;
+       } else {
+               /* We can still have a successful walk and have the new 'from'
+                * be a symlink.  We'd need walk_symlink to return a symlink
+                * chan, which happens if the symlink is a mount point. */
+               cclose(symlink);
+       }
+       wh->no_follow = old_nofollow;
+       wh->nr_loops--;
+
+       kfree(e.name);
+       kfree(e.elems);
+       kfree(e.off);
+       return from;
+}