First Inferno imports
authorRonald G. Minnich <rminnich@google.com>
Thu, 16 Jan 2014 01:30:23 +0000 (17:30 -0800)
committerRonald G. Minnich <rminnich@google.com>
Thu, 16 Jan 2014 22:43:42 +0000 (14:43 -0800)
This is first pass and we're including the Inferno files as-is
so we can retain some ideas about what we change.

Signed-off-by: Ronald G. Minnich <rminnich@google.com>
kern/include/ip.h [new file with mode: 0644]
kern/include/ns.h [new file with mode: 0644]
kern/src/ns/chan.c [new file with mode: 0644]

diff --git a/kern/include/ip.h b/kern/include/ip.h
new file mode 100644 (file)
index 0000000..ef88455
--- /dev/null
@@ -0,0 +1,861 @@
+//INFERNO
+
+typedef struct Conv    Conv;
+typedef struct Fs      Fs;
+typedef union  Hwaddr  Hwaddr;
+typedef struct IP      IP;
+typedef struct IPaux   IPaux;
+typedef struct Ipself  Ipself;
+typedef struct Ipselftab       Ipselftab;
+typedef struct Iplink  Iplink;
+typedef struct Iplifc  Iplifc;
+typedef struct Ipmulti Ipmulti;
+typedef struct IProuter IProuter;
+typedef struct Ipifc   Ipifc;
+typedef struct Iphash  Iphash;
+typedef struct Ipht    Ipht;
+typedef struct Netlog  Netlog;
+typedef struct Ifclog  Ifclog;
+typedef struct Medium  Medium;
+typedef struct Proto   Proto;
+typedef struct Arpent  Arpent;
+typedef struct Arp Arp;
+typedef struct Route   Route;
+
+typedef struct Routerparams    Routerparams;
+typedef struct         Hostparams      Hostparams;
+typedef struct         V6router        V6router;
+typedef struct V6params        V6params;
+
+#pragma incomplete Arp
+#pragma        incomplete Ifclog
+#pragma incomplete Ipself
+#pragma incomplete Ipselftab
+#pragma incomplete IP
+#pragma incomplete Netlog
+
+enum
+{
+       Addrlen=        64,
+       Maxproto=       20,
+       Nhash=          64,
+       Maxincall=      5,
+       Nchans=         256,
+       MAClen=         16,             /* longest mac address */
+
+       MAXTTL=         255,
+       DFLTTOS=        0,
+
+       IPaddrlen=      16,
+       IPv4addrlen=    4,
+       IPv4off=        12,
+       IPllen=         4,
+
+       /* ip versions */
+       V4=             4,
+       V6=             6,
+       IP_VER4=        0x40,
+       IP_VER6=        0x60,
+
+       /* 2^Lroot trees in the root table */
+       Lroot=          10,
+
+       Maxpath =       64,
+};
+
+enum
+{
+       Idle=           0,
+       Announcing=     1,
+       Announced=      2,
+       Connecting=     3,
+       Connected=      4,
+};
+
+/*
+ *  one per conversation directory
+ */
+struct Conv
+{
+       QLock;
+
+       int     x;                      /* conversation index */
+       Proto*  p;
+
+       int     restricted;             /* remote port is restricted */
+       uint    ttl;                    /* max time to live */
+       uint    tos;                    /* type of service */
+       int     ignoreadvice;           /* don't terminate connection on icmp errors */
+
+       uchar   ipversion;
+       uchar   laddr[IPaddrlen];       /* local IP address */
+       uchar   raddr[IPaddrlen];       /* remote IP address */
+       ushort  lport;                  /* local port number */
+       ushort  rport;                  /* remote port number */
+
+       char    *owner;                 /* protections */
+       int     perm;
+       int     inuse;                  /* opens of listen/data/ctl */
+       int     length;
+       int     state;
+
+       /* udp specific */
+       int     headers;                /* data src/dst headers in udp */
+       int     reliable;               /* true if reliable udp */
+
+       Conv*   incall;                 /* calls waiting to be listened for */
+       Conv*   next;
+
+       Queue*  rq;                     /* queued data waiting to be read */
+       Queue*  wq;                     /* queued data waiting to be written */
+       Queue*  eq;                     /* returned error packets */
+       Queue*  sq;                     /* snooping queue */
+       Ref     snoopers;               /* number of processes with snoop open */
+
+       Rendez  cr;
+       char    cerr[ERRMAX];
+
+       QLock   listenq;
+       Rendez  listenr;
+
+       Ipmulti *multi;                 /* multicast bindings for this interface */
+
+       void*   ptcl;                   /* protocol specific stuff */
+
+       Route   *r;                     /* last route used */
+       ulong   rgen;                   /* routetable generation for *r */
+};
+
+struct Medium
+{
+       char    *name;
+       int     hsize;          /* medium header size */
+       int     mintu;          /* default min mtu */
+       int     maxtu;          /* default max mtu */
+       int     maclen;         /* mac address length  */
+       void    (*bind)(Ipifc*, int, char**);
+       void    (*unbind)(Ipifc*);
+       void    (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+       /* for arming interfaces to receive multicast */
+       void    (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+       void    (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+       /* process packets written to 'data' */
+       void    (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+       /* routes for router boards */
+       void    (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+       void    (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+       void    (*flushroutes)(Ipifc *ifc);
+
+       /* for routing multicast groups */
+       void    (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+       void    (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+       /* address resolution */
+       void    (*ares)(Fs*, int, uchar*, uchar*, int, int);    /* resolve */
+       void    (*areg)(Ipifc*, uchar*);                        /* register */
+
+       /* v6 address generation */
+       void    (*pref2addr)(uchar *pref, uchar *ea);
+
+       int     unbindonclose;  /* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+       uchar   local[IPaddrlen];
+       uchar   mask[IPaddrlen];
+       uchar   remote[IPaddrlen];
+       uchar   net[IPaddrlen];
+       uchar   tentative;      /* =1 => v6 dup disc on, =0 => confirmed unique */
+       uchar   onlink;         /* =1 => onlink, =0 offlink. */
+       uchar   autoflag;       /* v6 autonomous flag */
+       long    validlt;        /* v6 valid lifetime */
+       long    preflt;         /* v6 preferred lifetime */
+       long    origint;        /* time when addr was added */
+       Iplink  *link;          /* addresses linked to this lifc */
+       Iplifc  *next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+       Ipself  *self;
+       Iplifc  *lifc;
+       Iplink  *selflink;      /* next link for this local address */
+       Iplink  *lifclink;      /* next link for this ifc */
+       ulong   expire;
+       Iplink  *next;          /* free list */
+       int     ref;
+};
+
+/* rfc 2461, pp.40--43. */
+
+/* default values, one per stack */
+struct Routerparams {
+       int     mflag;
+       int     oflag;
+       int     maxraint;
+       int     minraint;
+       int     linkmtu;
+       int     reachtime;
+       int     rxmitra;
+       int     ttl;
+       int     routerlt;       
+};
+
+struct Hostparams {
+       int     rxmithost;
+};
+
+struct Ipifc
+{
+       RWlock;
+       
+       Conv    *conv;          /* link to its conversation structure */
+       char    dev[64];        /* device we're attached to */
+       Medium  *m;             /* Media pointer */
+       int     maxtu;          /* Maximum transfer unit */
+       int     mintu;          /* Minumum tranfer unit */
+       int     mbps;           /* megabits per second */
+       void    *arg;           /* medium specific */
+       int     reassemble;     /* reassemble IP packets before forwarding */
+
+       /* these are used so that we can unbind on the fly */
+       Lock    idlock;
+       uchar   ifcid;          /* incremented each 'bind/unbind/add/remove' */
+       int     ref;            /* number of proc's using this ipifc */
+       Rendez  wait;           /* where unbinder waits for ref == 0 */
+       int     unbinding;
+
+       uchar   mac[MAClen];    /* MAC address */
+
+       Iplifc  *lifc;          /* logical interfaces on this physical one */
+
+       ulong   in, out;        /* message statistics */
+       ulong   inerr, outerr;  /* ... */
+
+       uchar   sendra6;        /* == 1 => send router advs on this ifc */
+       uchar   recvra6;        /* == 1 => recv router advs on this ifc */
+       Routerparams rp;        /* router parameters as in RFC 2461, pp.40--43. 
+                                       used only if node is router */
+};
+
+/*
+ *  one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+       uchar   ma[IPaddrlen];
+       uchar   ia[IPaddrlen];
+       Ipmulti *next;
+};
+
+/*
+ *  hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+       Nipht=          521,    /* convenient prime */
+
+       IPmatchexact=   0,      /* match on 4 tuple */
+       IPmatchany,             /* *!* */
+       IPmatchport,            /* *!port */
+       IPmatchaddr,            /* addr!* */
+       IPmatchpa,              /* addr!port */
+};
+struct Iphash
+{
+       Iphash  *next;
+       Conv    *c;
+       int     match;
+};
+struct Ipht
+{
+       Lock;
+       Iphash  *tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ *  one per multiplexed protocol
+ */
+struct Proto
+{
+       QLock;
+       char*           name;           /* protocol name */
+       int             x;              /* protocol index */
+       int             ipproto;        /* ip protocol type */
+
+       char*           (*connect)(Conv*, char**, int);
+       char*           (*announce)(Conv*, char**, int);
+       char*           (*bind)(Conv*, char**, int);
+       int             (*state)(Conv*, char*, int);
+       void            (*create)(Conv*);
+       void            (*close)(Conv*);
+       void            (*rcv)(Proto*, Ipifc*, Block*);
+       char*           (*ctl)(Conv*, char**, int);
+       void            (*advise)(Proto*, Block*, char*);
+       int             (*stats)(Proto*, char*, int);
+       int             (*local)(Conv*, char*, int);
+       int             (*remote)(Conv*, char*, int);
+       int             (*inuse)(Conv*);
+       int             (*gc)(Proto*);  /* returns true if any conversations are freed */
+
+       Fs              *f;             /* file system this proto is part of */
+       Conv            **conv;         /* array of conversations */
+       int             ptclsize;       /* size of per protocol ctl block */
+       int             nc;             /* number of conversations */
+       int             ac;
+       Qid             qid;            /* qid for protocol directory */
+       ushort          nextport;
+       ushort          nextrport;
+
+       void            *priv;
+};
+
+/*
+ *  Stream for sending packets to user level
+ */
+struct IProuter {
+       QLock;
+       int     opens;
+       Queue   *q;
+};
+
+/*
+ *  one per IP protocol stack
+ */
+struct Fs
+{
+       RWlock;
+       int     dev;
+
+       int     np;
+       Proto*  p[Maxproto+1];          /* list of supported protocols */
+       Proto*  t2p[256];               /* vector of all protocols */
+       Proto*  ipifc;                  /* kludge for ipifcremroute & ipifcaddroute */
+       Proto*  ipmux;                  /* kludge for finding an ip multiplexor */
+
+       IP      *ip;
+       Ipselftab       *self;
+       Arp     *arp;
+       V6params        *v6p;
+       IProuter iprouter;
+
+       Route   *v4root[1<<Lroot];      /* v4 routing forest */
+       Route   *v6root[1<<Lroot];      /* v6 routing forest */
+       Route   *queue;                 /* used as temp when reinjecting routes */
+
+       Netlog  *alog;
+       Ifclog  *ilog;
+
+       char    ndb[1024];              /* an ndb entry for this interface */
+       int     ndbvers;
+       long    ndbmtime;
+};
+
+/* one per default router known to host */
+struct V6router {
+       uchar   inuse;
+       Ipifc   *ifc;
+       int     ifcid;
+       uchar   routeraddr[IPaddrlen];
+       long    ltorigin;
+       Routerparams    rp;
+};
+
+struct V6params
+{
+       Routerparams    rp;             /* v6 params, one copy per node now */
+       Hostparams      hp;
+       V6router        v6rlist[3];     /* max 3 default routers, currently */
+       int             cdrouter;       /* uses only v6rlist[cdrouter] if   */ 
+                                       /* cdrouter >= 0. */
+};
+
+
+int    Fsconnected(Conv*, char*);
+Conv*  Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int    Fspcolstats(char*, int);
+int    Fsproto(Fs*, Proto*);
+int    Fsbuiltinproto(Fs*, uchar);
+Conv*  Fsprotoclone(Proto*, char*);
+Proto* Fsrcvpcol(Fs*, uchar);
+Proto* Fsrcvpcolx(Fs*, uchar);
+char*  Fsstdconnect(Conv*, char**, int);
+char*  Fsstdannounce(Conv*, char**, int);
+char*  Fsstdbind(Conv*, char**, int);
+ulong  scalednconv(void);
+
+/* 
+ *  logging
+ */
+enum
+{
+       Logip=          1<<1,
+       Logtcp=         1<<2,
+       Logfs=          1<<3,
+       Logil=          1<<4,
+       Logicmp=        1<<5,
+       Logudp=         1<<6,
+       Logcompress=    1<<7,
+       Logilmsg=       1<<8,
+       Loggre=         1<<9,
+       Logppp=         1<<10,
+       Logtcprxmt=     1<<11,
+       Logigmp=        1<<12,
+       Logudpmsg=      1<<13,
+       Logipmsg=       1<<14,
+       Logrudp=        1<<15,
+       Logrudpmsg=     1<<16,
+       Logesp=         1<<17,
+       Logtcpwin=      1<<18,
+};
+
+void   netloginit(Fs*);
+void   netlogopen(Fs*);
+void   netlogclose(Fs*);
+void   netlogctl(Fs*, char*, int);
+long   netlogread(Fs*, void*, ulong, long);
+void   netlog(Fs*, int, char*, ...);
+void   ifcloginit(Fs*);
+long   ifclogread(Fs*, Chan *,void*, ulong, long);
+void   ifclog(Fs*, uchar *, int);
+void   ifclogopen(Fs*, Chan*);
+void   ifclogclose(Fs*, Chan*);
+
+/*
+ *  iproute.c
+ */
+typedef        struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+       /* type bits */
+       Rv4=            (1<<0),         /* this is a version 4 route */
+       Rifc=           (1<<1),         /* this route is a directly connected interface */
+       Rptpt=          (1<<2),         /* this route is a pt to pt interface */
+       Runi=           (1<<3),         /* a unicast self address */
+       Rbcast=         (1<<4),         /* a broadcast self address */
+       Rmulti=         (1<<5),         /* a multicast self address */
+       Rproxy=         (1<<6),         /* this route should be proxied */
+};
+
+struct Routewalk
+{
+       int     o;
+       int     h;
+       char*   p;
+       char*   e;
+       void*   state;
+       void    (*walk)(Route*, Routewalk*);
+};
+
+struct RouteTree
+{
+       Route*  right;
+       Route*  left;
+       Route*  mid;
+       uchar   depth;
+       uchar   type;
+       uchar   ifcid;          /* must match ifc->id */
+       Ipifc   *ifc;
+       char    tag[4];
+       int     ref;
+};
+
+struct V4route
+{
+       ulong   address;
+       ulong   endaddress;
+       uchar   gate[IPv4addrlen];
+};
+
+struct V6route
+{
+       ulong   address[IPllen];
+       ulong   endaddress[IPllen];
+       uchar   gate[IPaddrlen];
+};
+
+struct Route
+{
+       RouteTree;
+
+       union {
+               V6route v6;
+               V4route v4;
+       };
+};
+extern void    v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void    v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void    v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void    v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route*  v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route*  v6lookup(Fs *f, uchar *a, Conv *c);
+extern long    routeread(Fs *f, char*, ulong, int);
+extern long    routewrite(Fs *f, Chan*, char*, int);
+extern void    routetype(int, char*);
+extern void    ipwalkroutes(Fs*, Routewalk*);
+extern void    convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ *  devip.c
+ */
+
+/*
+ *  Hanging off every ip channel's ->aux is the following structure.
+ *  It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+       char    *owner;         /* the user that did the attach */
+       char    tag[4];
+};
+
+extern IPaux*  newipaux(char*, char*);
+
+/*
+ *  arp.c
+ */
+struct Arpent
+{
+       uchar   ip[IPaddrlen];
+       uchar   mac[MAClen];
+       Medium  *type;                  /* media type */
+       Arpent* hash;
+       Block*  hold;
+       Block*  last;
+       uint    ctime;                  /* time entry was created or refreshed */
+       uint    utime;                  /* time entry was last used */
+       uchar   state;
+       Arpent  *nextrxt;               /* re-transmit chain */
+       uint    rtime;                  /* time for next retransmission */
+       uchar   rxtsrem;
+       Ipifc   *ifc;
+       uchar   ifcid;                  /* must match ifc->id */
+};
+
+extern void    arpinit(Fs*);
+extern int     arpread(Arp*, char*, ulong, int);
+extern int     arpwrite(Fs*, char*, int);
+extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void    arprelease(Arp*, Arpent *a);
+extern Block*  arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void    arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int     myetheraddr(uchar*, char*);
+extern ulong   parseip(uchar*, char*);
+extern ulong   parseipmask(uchar*, char*);
+extern char*   v4parseip(uchar*, char*);
+extern void    maskip(uchar *from, uchar *mask, uchar *to);
+extern int     parsemac(uchar *to, char *from, int len);
+extern uchar*  defmask(uchar*);
+extern int     isv4(uchar*);
+extern void    v4tov6(uchar *v6, uchar *v4);
+extern int     v6tov4(uchar *v4, uchar *v6);
+extern int     eipfmt(Fmt*);
+
+#define        ipmove(x, y) memmove(x, y, IPaddrlen)
+#define        ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define        NOW     TK2MS(MACHP(0)->ticks)
+
+/*
+ *  media
+ */
+extern Medium  ethermedium;
+extern Medium  nullmedium;
+extern Medium  pktmedium;
+extern Medium  tripmedium;
+
+/*
+ *  ipifc.c
+ */
+extern Medium* ipfindmedium(char *name);
+extern void    addipmedium(Medium *med);
+extern int     ipforme(Fs*, uchar *addr);
+extern int     iptentative(Fs*, uchar *addr);
+extern int     ipisbm(uchar *);
+extern int     ipismulticast(uchar *);
+extern Ipifc*  findipifc(Fs*, uchar *remote, int type);
+extern void    findprimaryip(Fs*, uchar*);
+extern void    findlocalip(Fs*, uchar *local, uchar *remote);
+extern int     ipv4local(Ipifc *ifc, uchar *addr);
+extern int     ipv6local(Ipifc *ifc, uchar *addr);
+extern int     ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int     ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int     ipismulticast(uchar *ip);
+extern int     ipisbooting(void);
+extern int     ipifccheckin(Ipifc *ifc, Medium *med);
+extern void    ipifccheckout(Ipifc *ifc);
+extern int     ipifcgrab(Ipifc *ifc);
+extern void    ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void    ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void    ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void    ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char*   ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char*   ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long    ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char*   ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
+extern void    ipsendra6(Fs *f, int on);
+
+/*
+ *  ip.c
+ */
+extern void    iprouting(Fs*, int);
+extern void    icmpnoconv(Fs*, Block*);
+extern void    icmpcantfrag(Fs*, Block*, int);
+extern void    icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort  ipcsum(uchar*);
+extern void    ipiput4(Fs*, Ipifc*, Block*);
+extern void    ipiput6(Fs*, Ipifc*, Block*);
+extern int     ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int     ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int     ipstats(Fs*, char*, int);
+extern ushort  ptclbsum(uchar*, int);
+extern ushort  ptclcsum(Block*, int, int);
+extern void    ip_init(Fs*);
+extern void    update_mtucache(uchar*, ulong);
+extern ulong   restrict_mtu(uchar*, ulong);
+
+/*
+ * bootp.c
+ */
+char*  (*bootp)(Ipifc*);
+int    (*bootpread)(char*, ulong, int);
+
+/*
+ *  iprouter.c
+ */
+void   useriprouter(Fs*, Ipifc*, Block*);
+void   iprouteropen(Fs*);
+void   iprouterclose(Fs*);
+long   iprouterread(Fs*, void*, int);
+
+/*
+ *  resolving inferno/plan9 differences
+ */
+Chan*          commonfdtochan(int, int, int, int);
+char*          commonuser(void);
+char*          commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan*   chandial(char*, char*, char*, Chan**);
+
+/*
+ *  global to all of the stack
+ */
+extern void    (*igmpreportfn)(Ipifc*, uchar*);
+
+/* IPV6 */
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+/* rfc 3513 defines the address prefices */
+#define isv6mcast(addr)          ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
+#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
+
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+
+/* from RFC 2460 */
+
+typedef struct Ip6hdr     Ip6hdr;
+typedef struct Opthdr     Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6    Fraghdr6;
+
+struct Ip6hdr {
+       uchar vcf[4];           // version:4, traffic class:8, flow label:20
+       uchar ploadlen[2];      // payload length: packet length - 40
+       uchar proto;            // next header type
+       uchar ttl;              // hop limit
+       uchar src[IPaddrlen];
+       uchar dst[IPaddrlen];
+};
+
+struct Opthdr {
+       uchar nexthdr;
+       uchar len;
+};
+
+struct Routinghdr {
+       uchar nexthdr;
+       uchar len;
+       uchar rtetype;
+       uchar segrem;
+};
+
+struct Fraghdr6 {
+       uchar nexthdr;
+       uchar res;
+       uchar offsetRM[2];      // Offset, Res, M flag
+       uchar id[4];
+};
+
+
+enum {                 /* Header Types */
+       HBH             = 0,    //?
+       ICMP            = 1,
+       IGMP            = 2,
+       GGP             = 3,
+       IPINIP          = 4,
+       ST              = 5,
+       TCP             = 6,
+       UDP             = 17,
+       ISO_TP4         = 29,
+       RH              = 43,
+       FH              = 44,
+       IDRP            = 45,
+       RSVP            = 46,
+       AH              = 51,
+       ESP             = 52,
+       ICMPv6          = 58,
+       NNH             = 59,
+       DOH             = 60,
+       ISO_IP          = 80,
+       IGRP            = 88,
+       OSPF            = 89,
+
+       Maxhdrtype      = 256,
+};
+
+
+enum {
+       //      multicast flgs and scop
+
+       well_known_flg                          = 0,
+       transient_flg                           = 1,
+
+       node_local_scop                         = 1,
+       link_local_scop                         = 2,
+       site_local_scop                         = 5,
+       org_local_scop                          = 8,
+       global_scop                             = 14,
+
+       //      various prefix lengths
+
+       SOLN_PREF_LEN                           = 13,
+
+       //      icmpv6 unreach codes
+       icmp6_no_route                          = 0,
+       icmp6_ad_prohib                         = 1,
+       icmp6_unassigned                        = 2,
+       icmp6_adr_unreach                       = 3,
+       icmp6_port_unreach                      = 4,
+       icmp6_unkn_code                         = 5,
+
+       //      various flags & constants
+
+       v6MINTU                                 = 1280,
+       HOP_LIMIT                               = 255,
+       ETHERHDR_LEN                            = 14,
+       IPV6HDR_LEN                             = 40,
+       IPV4HDR_LEN                             = 20,
+
+       //      option types
+
+       SRC_LLADDRESS                           = 1,
+       TARGET_LLADDRESS                        = 2,
+       PREFIX_INFO                             = 3,
+       REDIR_HEADER                            = 4,
+       MTU_OPTION                              = 5,
+
+       SRC_UNSPEC                              = 0,
+       SRC_UNI                                 = 1,
+       TARG_UNI                                = 2,
+       TARG_MULTI                              = 3,
+
+       t_unitent                               = 1,
+       t_uniproxy                              = 2,
+       t_unirany                               = 3,
+
+       //      Router constants (all times in milliseconds)
+
+       MAX_INITIAL_RTR_ADVERT_INTERVAL         = 16000,
+       MAX_INITIAL_RTR_ADVERTISEMENTS          = 3,
+       MAX_FINAL_RTR_ADVERTISEMENTS            = 3,
+       MIN_DELAY_BETWEEN_RAS                   = 3000,
+       MAX_RA_DELAY_TIME                       = 500,
+
+       //      Host constants
+
+       MAX_RTR_SOLICITATION_DELAY              = 1000,
+       RTR_SOLICITATION_INTERVAL               = 4000,
+       MAX_RTR_SOLICITATIONS                   = 3,
+
+       //      Node constants
+
+       MAX_MULTICAST_SOLICIT                   = 3,
+       MAX_UNICAST_SOLICIT                     = 3,
+       MAX_ANYCAST_DELAY_TIME                  = 1000,
+       MAX_NEIGHBOR_ADVERTISEMENT              = 3,
+       REACHABLE_TIME                          = 30000,
+       RETRANS_TIMER                           = 1000,
+       DELAY_FIRST_PROBE_TIME                  = 5000,
+
+};
+
+extern void ipv62smcast(uchar *, uchar *);
+extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6allroutersS[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6sitelocal[IPaddrlen];
+extern uchar v6sitelocalmask[IPaddrlen];
+extern uchar v6glunicast[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6slpreflen;
+extern int v6lbpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
diff --git a/kern/include/ns.h b/kern/include/ns.h
new file mode 100644 (file)
index 0000000..545778d
--- /dev/null
@@ -0,0 +1,812 @@
+//INFERNO
+#pragma src "/usr/inferno/lib9"
+#pragma        lib     "libc.a"
+
+#define        VERSION9P       "9P2000"
+
+#define        MAXWELEM        16
+
+typedef
+struct Fcall
+{
+       uchar   type;
+       u32int  fid;
+       ushort  tag;
+       /* union { */
+               /* struct { */
+                       u32int  msize;          /* Tversion, Rversion */
+                       char    *version;       /* Tversion, Rversion */
+               /* }; */
+               /* struct { */
+                       ushort  oldtag;         /* Tflush */
+               /* }; */
+               /* struct { */
+                       char    *ename;         /* Rerror */
+               /* }; */
+               /* struct { */
+                       Qid     qid;            /* Rattach, Ropen, Rcreate */
+                       u32int  iounit;         /* Ropen, Rcreate */
+               /* }; */
+               /* struct { */
+                       Qid     aqid;           /* Rauth */
+               /* }; */
+               /* struct { */
+                       u32int  afid;           /* Tauth, Tattach */
+                       char    *uname;         /* Tauth, Tattach */
+                       char    *aname;         /* Tauth, Tattach */
+               /* }; */
+               /* struct { */
+                       u32int  perm;           /* Tcreate */ 
+                       char    *name;          /* Tcreate */
+                       uchar   mode;           /* Tcreate, Topen */
+               /* }; */
+               /* struct { */
+                       u32int  newfid;         /* Twalk */
+                       ushort  nwname;         /* Twalk */
+                       char    *wname[MAXWELEM];       /* Twalk */
+               /* }; */
+               /* struct { */
+                       ushort  nwqid;          /* Rwalk */
+                       Qid     wqid[MAXWELEM];         /* Rwalk */
+               /* }; */
+               /* struct { */
+                       vlong   offset;         /* Tread, Twrite */
+                       u32int  count;          /* Tread, Twrite, Rread */
+                       char    *data;          /* Twrite, Rread */
+               /* }; */
+               /* struct { */
+                       ushort  nstat;          /* Twstat, Rstat */
+                       uchar   *stat;          /* Twstat, Rstat */
+               /* }; */
+       /* }; */
+} Fcall;
+
+
+#define        GBIT8(p)        ((p)[0])
+#define        GBIT16(p)       ((p)[0]|((p)[1]<<8))
+#define        GBIT32(p)       ((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)))
+#define        GBIT64(p)       ((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\
+                               ((vlong)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32))
+
+#define        PBIT8(p,v)      (p)[0]=(v)
+#define        PBIT16(p,v)     (p)[0]=(v);(p)[1]=(v)>>8
+#define        PBIT32(p,v)     (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+#define        PBIT64(p,v)     (p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24;\
+                       (p)[4]=(v)>>32;(p)[5]=(v)>>40;(p)[6]=(v)>>48;(p)[7]=(v)>>56
+
+#define        BIT8SZ          1
+#define        BIT16SZ         2
+#define        BIT32SZ         4
+#define        BIT64SZ         8
+#define        QIDSZ   (BIT8SZ+BIT32SZ+BIT64SZ)
+
+/* STATFIXLEN includes leading 16-bit count */
+/* The count, however, excludes itself; total size is BIT16SZ+count */
+#define STATFIXLEN     (BIT16SZ+QIDSZ+5*BIT16SZ+4*BIT32SZ+1*BIT64SZ)   /* amount of fixed length data in a stat buffer */
+
+#define        NOTAG           (ushort)~0U     /* Dummy tag */
+#define        NOFID           (u32int)~0U     /* Dummy fid */
+#define        IOHDRSZ         24      /* ample room for Twrite/Rread header (iounit) */
+
+enum
+{
+       Tversion =      100,
+       Rversion,
+       Tauth = 102,
+       Rauth,
+       Tattach =       104,
+       Rattach,
+       Terror =        106,    /* illegal */
+       Rerror,
+       Tflush =        108,
+       Rflush,
+       Twalk =         110,
+       Rwalk,
+       Topen =         112,
+       Ropen,
+       Tcreate =       114,
+       Rcreate,
+       Tread =         116,
+       Rread,
+       Twrite =        118,
+       Rwrite,
+       Tclunk =        120,
+       Rclunk,
+       Tremove =       122,
+       Rremove,
+       Tstat =         124,
+       Rstat,
+       Twstat =        126,
+       Rwstat,
+       Tmax,
+};
+
+uint   convM2S(uchar*, uint, Fcall*);
+uint   convS2M(Fcall*, uchar*, uint);
+uint   sizeS2M(Fcall*);
+
+int    statcheck(uchar *abuf, uint nbuf);
+uint   convM2D(uchar*, uint, Dir*, char*);
+uint   convD2M(Dir*, uchar*, uint);
+uint   sizeD2M(Dir*);
+
+int    fcallfmt(Fmt*);
+int    dirfmt(Fmt*);
+int    dirmodefmt(Fmt*);
+
+int    read9pmsg(int, void*, uint);
+
+#pragma        varargck        type    "F"     Fcall*
+#pragma        varargck        type    "M"     ulong
+#pragma        varargck        type    "D"     Dir*
+
+typedef struct Alarms  Alarms;
+typedef struct Block   Block;
+typedef struct Bkpt Bkpt;
+typedef struct BkptCond BkptCond;
+typedef struct Chan    Chan;
+typedef struct Cmdbuf  Cmdbuf;
+typedef struct Cmdtab  Cmdtab;
+typedef struct Cname   Cname;
+typedef struct Crypt   Crypt;
+typedef struct Dev     Dev;
+typedef struct DevConf DevConf;
+typedef struct Dirtab  Dirtab;
+typedef struct Edf     Edf;
+typedef struct Egrp    Egrp;
+typedef struct Evalue  Evalue;
+typedef struct Fgrp    Fgrp;
+typedef struct List    List;
+typedef struct Log     Log;
+typedef struct Logflag Logflag;
+typedef struct Mntcache Mntcache;
+typedef struct Mntparam Mntparam;
+typedef struct Mount   Mount;
+typedef struct Mntrpc  Mntrpc;
+typedef struct Mntwalk Mntwalk;
+typedef struct Mnt     Mnt;
+typedef struct Mhead   Mhead;
+typedef struct Osenv   Osenv;
+typedef struct Pgrp    Pgrp;
+typedef struct Proc    Proc;
+typedef struct QLock   QLock;
+typedef struct Queue   Queue;
+typedef struct Ref     Ref;
+typedef struct Rendez  Rendez;
+typedef struct Rept    Rept;
+typedef struct Rootdata        Rootdata;
+typedef struct RWlock  RWlock;
+typedef struct Signerkey Signerkey;
+typedef struct Skeyset Skeyset;
+typedef struct Talarm  Talarm;
+typedef struct Timer   Timer;
+typedef struct Timers  Timers;
+typedef struct Uart    Uart;
+typedef struct Walkqid Walkqid;
+typedef int    Devgen(Chan*, char*, Dirtab*, int, int, Dir*);
+
+#pragma incomplete DevConf
+#pragma incomplete Edf
+#pragma incomplete Mntcache
+#pragma incomplete Mntrpc
+#pragma incomplete Queue
+#pragma incomplete Timers
+
+#include "fcall.h"
+#include <pool.h>
+
+struct Ref
+{
+       Lock    l;
+       long    ref;
+};
+
+struct Rendez
+{
+       Lock;
+       Proc    *p;
+};
+
+struct Rept
+{
+       Lock    l;
+       Rendez  r;
+       void    *o;
+       int     t;
+       int     (*active)(void*);
+       int     (*ck)(void*, int);
+       void    (*f)(void*);    /* called with VM acquire()'d */
+};
+
+struct Osenv
+{
+       char    *syserrstr;     /* last error from a system call, errbuf0 or 1 */
+       char    *errstr;        /* reason we're unwinding the error stack, errbuf1 or 0 */
+       char    errbuf0[ERRMAX];
+       char    errbuf1[ERRMAX];
+       Pgrp*   pgrp;           /* Ref to namespace, working dir and root */
+       Fgrp*   fgrp;           /* Ref to file descriptors */
+       Egrp*   egrp;   /* Environment vars */
+       Skeyset*        sigs;           /* Signed module keys */
+       Rendez* rend;           /* Synchro point */
+       Queue*  waitq;          /* Info about dead children */
+       Queue*  childq;         /* Info about children for debuggers */
+       void*   debug;          /* Debugging master */
+       int     uid;            /* Numeric user id for system */
+       int     gid;            /* Numeric group id for system */
+       char*   user;           /* Inferno user name */
+       FPenv   fpu;            /* Floating point thread state */
+};
+
+enum
+{
+       Nopin = -1
+};
+
+struct QLock
+{
+       Lock    use;                    /* to access Qlock structure */
+       Proc    *head;                  /* next process waiting for object */
+       Proc    *tail;                  /* last process waiting for object */
+       int     locked;                 /* flag */
+};
+
+struct RWlock
+{
+       Lock;                           /* Lock modify lock */
+       QLock   x;                      /* Mutual exclusion lock */
+       QLock   k;                      /* Lock for waiting writers */
+       int     readers;                /* Count of readers in lock */
+};
+
+struct Talarm
+{
+       Lock;
+       Proc*   list;
+};
+
+struct Alarms
+{
+       QLock;
+       Proc*   head;
+};
+
+struct Rootdata
+{
+       int     dotdot;
+       void    *ptr;
+       int     size;
+       int     *sizep;
+};
+
+/*
+ * Access types in namec & channel flags
+ */
+enum
+{
+       Aaccess,                        /* as in stat, wstat */
+       Abind,                  /* for left-hand-side of bind */
+       Atodir,                         /* as in chdir */
+       Aopen,                          /* for i/o */
+       Amount,                         /* to be mounted or mounted upon */
+       Acreate,                        /* is to be created */
+       Aremove,                        /* will be removed by caller */
+
+       COPEN   = 0x0001,               /* for i/o */
+       CMSG    = 0x0002,               /* the message channel for a mount */
+       CCEXEC  = 0x0008,               /* close on exec */
+       CFREE   = 0x0010,               /* not in use */
+       CRCLOSE = 0x0020,               /* remove on close */
+       CCACHE  = 0x0080,               /* client cache */
+};
+
+enum
+{
+       BINTR           =       (1<<0),
+       BFREE           =       (1<<1),
+       Bipck   =       (1<<2),         /* ip checksum */
+       Budpck  =       (1<<3),         /* udp checksum */
+       Btcpck  =       (1<<4),         /* tcp checksum */
+       Bpktck  =       (1<<5),         /* packet checksum */
+};
+
+struct Block
+{
+       Block*  next;
+       Block*  list;
+       uchar*  rp;                     /* first unconsumed byte */
+       uchar*  wp;                     /* first empty byte */
+       uchar*  lim;                    /* 1 past the end of the buffer */
+       uchar*  base;                   /* start of the buffer */
+       void    (*free)(Block*);
+       ushort  flag;
+       ushort  checksum;               /* IP checksum of complete packet (minus media header) */
+};
+#define BLEN(s)        ((s)->wp - (s)->rp)
+#define BALLOC(s) ((s)->lim - (s)->base)
+
+struct Chan
+{
+       Lock;
+       Ref;
+       Chan*   next;                   /* allocation */
+       Chan*   link;
+       vlong   offset;                 /* in file */
+       ushort  type;
+       ulong   dev;
+       ushort  mode;                   /* read/write */
+       ushort  flag;
+       Qid     qid;
+       int     fid;                    /* for devmnt */
+       ulong   iounit; /* chunk size for i/o; 0==default */
+       Mhead*  umh;                    /* mount point that derived Chan; used in unionread */
+       Chan*   umc;                    /* channel in union; held for union read */
+       QLock   umqlock;                /* serialize unionreads */
+       int     uri;                    /* union read index */
+       int     dri;                    /* devdirread index */
+       ulong   mountid;
+       Mntcache *mcp;                  /* Mount cache pointer */
+       Mnt             *mux;           /* Mnt for clients using me for messages */
+       union {
+               void*   aux;
+               char    tag[4];         /* for iproute */
+       };
+       Chan*   mchan;                  /* channel to mounted server */
+       Qid     mqid;                   /* qid of root of mount point */
+       Cname   *name;
+};
+
+struct Cname
+{
+       Ref;
+       int     alen;                   /* allocated length */
+       int     len;                    /* strlen(s) */
+       char    *s;
+};
+
+struct Dev
+{
+       int     dc;
+       char*   name;
+
+       void    (*reset)(void);
+       void    (*init)(void);
+       void    (*shutdown)(void);
+       Chan*   (*attach)(char*);
+       Walkqid*        (*walk)(Chan*, Chan*, char**, int);
+       int     (*stat)(Chan*, uchar*, int);
+       Chan*   (*open)(Chan*, int);
+       void    (*create)(Chan*, char*, int, ulong);
+       void    (*close)(Chan*);
+       long    (*read)(Chan*, void*, long, vlong);
+       Block*  (*bread)(Chan*, long, ulong);
+       long    (*write)(Chan*, void*, long, vlong);
+       long    (*bwrite)(Chan*, Block*, ulong);
+       void    (*remove)(Chan*);
+       int     (*wstat)(Chan*, uchar*, int);
+       void    (*power)(int);  /* power mgt: power(1) → on, power (0) → off */
+       int     (*config)(int, char*, DevConf*);
+};
+
+struct Dirtab
+{
+       char    name[KNAMELEN];
+       Qid     qid;
+       vlong   length;
+       long    perm;
+};
+
+struct Walkqid
+{
+       Chan    *clone;
+       int     nqid;
+       Qid     qid[1];
+};
+
+enum
+{
+       NSMAX   =       1000,
+       NSLOG   =       7,
+       NSCACHE =       (1<<NSLOG),
+};
+
+struct Mntwalk                         /* state for /proc/#/ns */
+{
+       int             cddone;
+       ulong   id;
+       Mhead*  mh;
+       Mount*  cm;
+};
+
+struct Mount
+{
+       ulong   mountid;
+       Mount*  next;
+       Mhead*  head;
+       Mount*  copy;
+       Mount*  order;
+       Chan*   to;                     /* channel replacing channel */
+       int     mflag;
+       char    *spec;
+};
+
+struct Mhead
+{
+       Ref;
+       RWlock  lock;
+       Chan*   from;                   /* channel mounted upon */
+       Mount*  mount;                  /* what's mounted upon it */
+       Mhead*  hash;                   /* Hash chain */
+};
+
+struct Mnt
+{
+       Lock;
+       /* references are counted using c->ref; channels on this mount point incref(c->mchan) == Mnt.c */
+       Chan    *c;             /* Channel to file service */
+       Proc    *rip;           /* Reader in progress */
+       Mntrpc  *queue;         /* Queue of pending requests on this channel */
+       ulong   id;             /* Multiplexer id for channel check */
+       Mnt     *list;          /* Free list */
+       int     flags;          /* cache */
+       int     msize;          /* data + IOHDRSZ */
+       char    *version;                       /* 9P version */
+       Queue   *q;             /* input queue */
+};
+
+enum
+{
+       RENDLOG =       5,
+       RENDHASH =      1<<RENDLOG,             /* Hash to lookup rendezvous tags */
+       MNTLOG  =       5,
+       MNTHASH =       1<<MNTLOG,              /* Hash to walk mount table */
+       DELTAFD=                20,             /* allocation quantum for process file descriptors */
+       MAXNFD =                4000,           /* max per process file descriptors */
+       MAXKEY =                8,      /* keys for signed modules */
+};
+#define MOUNTH(p,qid)  ((p)->mnthash[(qid).path&((1<<MNTLOG)-1)])
+
+struct Mntparam {
+       Chan*   chan;
+       Chan*   authchan;
+       char*   spec;
+       int     flags;
+};
+
+struct Pgrp
+{
+       Ref;                            /* also used as a lock when mounting */
+       ulong   pgrpid;
+       QLock   debug;                  /* single access via devproc.c */
+       RWlock  ns;                     /* Namespace n read/one write lock */
+       QLock   nsh;
+       Mhead*  mnthash[MNTHASH];
+       int     progmode;
+       Chan*   dot;
+       Chan*   slash;
+       int     nodevs;
+       int     pin;
+};
+
+struct Fgrp
+{
+       Lock;
+       Ref;
+       Chan**  fd;
+       int     nfd;                    /* number of fd slots */
+       int     maxfd;                  /* highest fd in use */
+       int     minfd;                  /* lower bound on free fd */
+};
+
+struct Evalue
+{
+       char    *var;
+       char    *val;
+       int     len;
+       Qid     qid;
+       Evalue  *next;
+};
+
+struct Egrp
+{
+       Ref;
+       QLock;
+       Evalue  *entries;
+       ulong   path;   /* qid.path of next Evalue to be allocated */
+       ulong   vers;   /* of Egrp */
+};
+
+struct Signerkey
+{
+       Ref;
+       char*   owner;
+       ushort  footprint;
+       ulong   expires;
+       void*   alg;
+       void*   pk;
+       void    (*pkfree)(void*);
+};
+
+struct Skeyset
+{
+       Ref;
+       QLock;
+       ulong   flags;
+       char*   devs;
+       int     nkey;
+       Signerkey       *keys[MAXKEY];
+};
+
+/*
+ * fasttick timer interrupts
+ */
+enum {
+       /* Mode */
+       Trelative,      /* timer programmed in ns from now */
+       Tabsolute,      /* timer programmed in ns since epoch */
+       Tperiodic,      /* periodic timer, period in ns */
+};
+
+struct Timer
+{
+       /* Public interface */
+       int     tmode;          /* See above */
+       vlong   tns;            /* meaning defined by mode */
+       void    (*tf)(Ureg*, Timer*);
+       void    *ta;
+       /* Internal */
+       Lock;
+       Timers  *tt;            /* Timers queue this timer runs on */
+       vlong   twhen;          /* ns represented in fastticks */
+       Timer   *tnext;
+};
+
+enum
+{
+       Dead = 0,               /* Process states */
+       Moribund,
+       Ready,
+       Scheding,
+       Running,
+       Queueing,
+       Wakeme,
+       Broken,
+       Stopped,
+       Rendezvous,
+       Waitrelease,
+
+       Proc_stopme = 1,        /* devproc requests */
+       Proc_exitme,
+       Proc_traceme,
+       Proc_exitbig,
+
+       NERR            = 30,
+
+       Unknown         = 0,
+       IdleGC,
+       Interp,
+       BusyGC,
+
+       PriLock         = 0,    /* Holding Spin lock */
+       PriEdf, /* active edf processes */
+       PriRelease,     /* released edf processes */
+       PriRealtime,            /* Video telephony */
+       PriHicodec,             /* MPEG codec */
+       PriLocodec,             /* Audio codec */
+       PriHi,                  /* Important task */
+       PriNormal,
+       PriLo,
+       PriBackground,
+       PriExtra,       /* edf processes we don't care about */
+       Nrq
+};
+
+struct Proc
+{
+       Label           sched;          /* known to l.s */
+       char*           kstack;         /* known to l.s */
+       Mach*           mach;           /* machine running this proc */
+       char            text[KNAMELEN];
+       Proc*           rnext;          /* next process in run queue */
+       Proc*           qnext;          /* next process on queue for a QLock */
+       QLock*          qlock;          /* addrof qlock being queued for DEBUG */
+       int             state;
+       int             type;
+       void*           prog;           /* Dummy Prog for interp release */
+       void*           iprog;
+       Osenv*          env;
+       Osenv           defenv;
+       int             swipend;        /* software interrupt pending for Prog */
+       Lock            sysio;          /* note handler lock */
+       char*           psstate;        /* What /proc/#/status reports */
+       ulong           pid;
+       int             fpstate;
+       int             procctl;        /* Control for /proc debugging */
+       ulong           pc;             /* DEBUG only */
+       Lock    rlock;  /* sync between sleep/swiproc for r */
+       Rendez*         r;              /* rendezvous point slept on */
+       Rendez          sleep;          /* place for syssleep/debug */
+       int             killed;         /* by swiproc */
+       int             kp;             /* true if a kernel process */
+       ulong           alarm;          /* Time of call */
+       int             pri;            /* scheduler priority */
+       ulong           twhen;
+       Rendez*         trend;
+       Proc*           tlink;
+       int             (*tfn)(void*);
+       void            (*kpfun)(void*);
+       void*           arg;
+       FPU             fpsave;
+       int             scallnr;
+       int             nerrlab;
+       Label           errlab[NERR];
+       char    genbuf[128];    /* buffer used e.g. for last name element from namec */
+       Mach*           mp;             /* machine this process last ran on */
+       Mach*           wired;
+       ulong           movetime;       /* next time process should switch processors */
+       ulong           delaysched;
+       int                     preempted;      /* process yielding in interrupt */
+       ulong           qpc;            /* last call that blocked in qlock */
+       void*           dbgreg;         /* User registers for devproc */
+       int             dbgstop;                /* don't run this kproc */
+       Edf*    edf;    /* if non-null, real-time proc, edf contains scheduling params */
+};
+
+enum
+{
+       /* kproc flags */
+       KPDUPPG         = (1<<0),
+       KPDUPFDG        = (1<<1),
+       KPDUPENVG       = (1<<2),
+       KPDUP = KPDUPPG | KPDUPFDG | KPDUPENVG
+};
+
+enum {
+       BrkSched,
+       BrkNoSched,
+};
+
+struct BkptCond
+{
+       uchar op;
+       ulong val;
+       BkptCond *next;
+};
+
+struct Bkpt
+{
+       int id;
+       ulong addr;
+       BkptCond *conditions;
+       Instr instr;
+       void (*handler)(Bkpt*);
+       void *aux;
+       Bkpt *next;
+       Bkpt *link;
+};
+
+enum
+{
+       PRINTSIZE =     256,
+       NUMSIZE =       12,             /* size of formatted number */
+       MB =            (1024*1024),
+       READSTR =       1000,           /* temporary buffer size for device reads */
+};
+
+extern Conf    conf;
+extern char*   conffile;
+extern int     consoleprint;
+extern Dev*    devtab[];
+extern char*   eve;
+extern int     hwcurs;
+extern FPU     initfp;
+extern  Queue  *kbdq;
+extern  Queue  *kscanq;
+extern  Ref    noteidalloc;
+extern  Queue  *printq;
+extern uint    qiomaxatomic;
+extern char*   statename[];
+extern char*   sysname;
+extern Talarm  talarm;
+
+/*
+ *  action log
+ */
+struct Log {
+       Lock;
+       int     opens;
+       char*   buf;
+       char    *end;
+       char    *rptr;
+       int     len;
+       int     nlog;
+       int     minread;
+
+       int     logmask;        /* mask of things to debug */
+
+       QLock   readq;
+       Rendez  readr;
+};
+
+struct Logflag {
+       char*   name;
+       int     mask;
+};
+
+struct Cmdbuf
+{
+       char    *buf;
+       char    **f;
+       int     nf;
+};
+
+struct Cmdtab
+{
+       int     index;  /* used by client to switch on result */
+       char    *cmd;   /* command name */
+       int     narg;   /* expected #args; 0 ==> variadic */
+};
+
+enum
+{
+       MAXPOOL         = 8,
+};
+
+extern Pool*   mainmem;
+extern Pool*   heapmem;
+extern Pool*   imagmem;
+
+/* queue state bits,  Qmsg, Qcoalesce, and Qkick can be set in qopen */
+enum
+{
+       /* Queue.state */
+       Qstarve         = (1<<0),       /* consumer starved */
+       Qmsg            = (1<<1),       /* message stream */
+       Qclosed         = (1<<2),       /* queue has been closed/hungup */
+       Qflow           = (1<<3),       /* producer flow controlled */
+       Qcoalesce       = (1<<4),       /* coallesce packets on read */
+       Qkick           = (1<<5),       /* always call the kick routine after qwrite */
+};
+
+#define DEVDOTDOT -1
+
+#pragma        varargck        argpos  print   1
+#pragma        varargck        argpos  snprint 3
+#pragma        varargck        argpos  seprint 3
+#pragma        varargck        argpos  sprint  2
+#pragma        varargck        argpos  fprint  2
+#pragma        varargck        argpos  iprint  1
+#pragma        varargck        argpos  panic   1
+#pragma        varargck        argpos  kwerrstr        1
+#pragma        varargck        argpos  kprint  1
+
+#pragma        varargck        type    "lld"   vlong
+#pragma        varargck        type    "llx"   vlong
+#pragma        varargck        type    "lld"   uvlong
+#pragma        varargck        type    "llx"   uvlong
+#pragma        varargck        type    "lx"    void*
+#pragma        varargck        type    "ld"    long
+#pragma        varargck        type    "lx"    long
+#pragma        varargck        type    "ld"    ulong
+#pragma        varargck        type    "lx"    ulong
+#pragma        varargck        type    "d"     int
+#pragma        varargck        type    "x"     int
+#pragma        varargck        type    "c"     int
+#pragma        varargck        type    "C"     int
+#pragma        varargck        type    "d"     uint
+#pragma        varargck        type    "x"     uint
+#pragma        varargck        type    "c"     uint
+#pragma        varargck        type    "C"     uint
+#pragma        varargck        type    "f"     double
+#pragma        varargck        type    "e"     double
+#pragma        varargck        type    "g"     double
+#pragma        varargck        type    "s"     char*
+#pragma        varargck        type    "S"     Rune*
+#pragma        varargck        type    "r"     void
+#pragma        varargck        type    "%"     void
+#pragma        varargck        type    "I"     uchar*
+#pragma        varargck        type    "V"     uchar*
+#pragma        varargck        type    "E"     uchar*
+#pragma        varargck        type    "M"     uchar*
+#pragma        varargck        type    "p"     void*
+#pragma        varargck        type    "q"     char*
diff --git a/kern/src/ns/chan.c b/kern/src/ns/chan.c
new file mode 100644 (file)
index 0000000..c3a56b0
--- /dev/null
@@ -0,0 +1,1431 @@
+#include       "u.h"
+#include       "../port/lib.h"
+#include       "mem.h"
+#include       "dat.h"
+#include       "fns.h"
+#include       "../port/error.h"
+
+char*
+channame(Chan *c)              /* DEBUGGING */
+{
+       if(c == nil)
+               return "<nil chan>";
+       if(c->name == nil)
+               return "<nil name>";
+       if(c->name->s == nil)
+               return "<nil name.s>";
+       return c->name->s;
+}
+
+enum
+{
+       CNAMESLOP       = 20
+};
+
+struct
+{
+       Lock;
+       int     fid;
+       Chan    *free;
+       Chan    *list;
+}chanalloc;
+
+typedef struct Elemlist Elemlist;
+
+struct Elemlist
+{
+       char    *name;  /* copy of name, so '/' can be overwritten */
+       int     nelems;
+       char    **elems;
+       int     *off;
+       int     mustbedir;
+};
+
+#define SEP(c) ((c) == 0 || (c) == '/')
+void cleancname(Cname*);
+
+int
+isdotdot(char *p)
+{
+       return p[0]=='.' && p[1]=='.' && p[2]=='\0';
+}
+
+int
+incref(Ref *r)
+{
+       int x;
+
+       lock(&r->l);
+       x = ++r->ref;
+       unlock(&r->l);
+       return x;
+}
+
+int
+decref(Ref *r)
+{
+       int x;
+
+       lock(&r->l);
+       x = --r->ref;
+       unlock(&r->l);
+       if(x < 0)
+               panic("decref, pc=0x%lux", getcallerpc(&r));
+
+       return x;
+}
+
+/*
+ * Rather than strncpy, which zeros the rest of the buffer, kstrcpy
+ * truncates if necessary, always zero terminates, does not zero fill,
+ * and puts ... at the end of the string if it's too long.  Usually used to
+ * save a string in up->genbuf;
+ */
+void
+kstrcpy(char *s, char *t, int ns)
+{
+       int nt;
+
+       nt = strlen(t);
+       if(nt+1 <= ns){
+               memmove(s, t, nt+1);
+               return;
+       }
+       /* too long */
+       if(ns < 4){
+               /* but very short! */
+               strncpy(s, t, ns);
+               return;
+       }
+       /* truncate with ... at character boundary (very rare case) */
+       memmove(s, t, ns-4);
+       ns -= 4;
+       s[ns] = '\0';
+       /* look for first byte of UTF-8 sequence by skipping continuation bytes */
+       while(ns>0 && (s[--ns]&0xC0)==0x80)
+               ;
+       strcpy(s+ns, "...");
+}
+
+int
+emptystr(char *s)
+{
+       if(s == nil)
+               return 1;
+       if(s[0] == '\0')
+               return 1;
+       return 0;
+}
+
+/*
+ * Atomically replace *p with copy of s
+ */
+void
+kstrdup(char **p, char *s)
+{
+       int n;
+       char *t, *prev;
+
+       n = strlen(s)+1;
+       /* if it's a user, we can wait for memory; if not, something's very wrong */
+       if(up){
+               t = smalloc(n);
+               setmalloctag(t, getcallerpc(&p));
+       }else{
+               t = malloc(n);
+               if(t == nil)
+                       panic("kstrdup: no memory");
+       }
+       memmove(t, s, n);
+       prev = *p;
+       *p = t;
+       free(prev);
+}
+
+void
+chandevreset(void)
+{
+       int i;
+
+       for(i=0; devtab[i] != nil; i++)
+               devtab[i]->reset();
+}
+
+void
+chandevinit(void)
+{
+       int i;
+
+       for(i=0; devtab[i] != nil; i++)
+               devtab[i]->init();
+}
+
+void
+chandevshutdown(void)
+{
+       int i;
+       
+       /* shutdown in reverse order */
+       for(i=0; devtab[i] != nil; i++)
+               ;
+       for(i--; i >= 0; i--)
+               devtab[i]->shutdown();
+}
+
+Chan*
+newchan(void)
+{
+       Chan *c;
+
+       lock(&chanalloc);
+       c = chanalloc.free;
+       if(c != 0)
+               chanalloc.free = c->next;
+       unlock(&chanalloc);
+
+       if(c == nil) {
+               c = smalloc(sizeof(Chan));
+               lock(&chanalloc);
+               c->fid = ++chanalloc.fid;
+               c->link = chanalloc.list;
+               chanalloc.list = c;
+               unlock(&chanalloc);
+       }
+
+       /* if you get an error before associating with a dev,
+          close calls rootclose, a nop */
+       c->type = 0;
+       c->flag = 0;
+       c->ref = 1;
+       c->dev = 0;
+       c->offset = 0;
+       c->iounit = 0;
+       c->umh = 0;
+       c->uri = 0;
+       c->dri = 0;
+       c->aux = 0;
+       c->mchan = 0;
+       c->mcp = 0;
+       c->mux = 0;
+       c->mqid.path = 0;
+       c->mqid.vers = 0;
+       c->mqid.type = 0;
+       c->name = 0;
+       return c;
+}
+
+static Ref ncname;
+
+Cname*
+newcname(char *s)
+{
+       Cname *n;
+       int i;
+
+       n = smalloc(sizeof(Cname));
+       i = strlen(s);
+       n->len = i;
+       n->alen = i+CNAMESLOP;
+       n->s = smalloc(n->alen);
+       memmove(n->s, s, i+1);
+       n->ref = 1;
+       incref(&ncname);
+       return n;
+}
+
+void
+cnameclose(Cname *n)
+{
+       if(n == nil)
+               return;
+       if(decref(n))
+               return;
+       decref(&ncname);
+       free(n->s);
+       free(n);
+}
+
+Cname*
+addelem(Cname *n, char *s)
+{
+       int i, a;
+       char *t;
+       Cname *new;
+
+       if(s[0]=='.' && s[1]=='\0')
+               return n;
+
+       if(n->ref > 1){
+               /* copy on write */
+               new = newcname(n->s);
+               cnameclose(n);
+               n = new;
+       }
+
+       i = strlen(s);
+       if(n->len+1+i+1 > n->alen){
+               a = n->len+1+i+1 + CNAMESLOP;
+               t = smalloc(a);
+               memmove(t, n->s, n->len+1);
+               free(n->s);
+               n->s = t;
+               n->alen = a;
+       }
+       if(n->len>0 && n->s[n->len-1]!='/' && s[0]!='/')        /* don't insert extra slash if one is present */
+               n->s[n->len++] = '/';
+       memmove(n->s+n->len, s, i+1);
+       n->len += i;
+       if(isdotdot(s))
+               cleancname(n);
+       return n;
+}
+
+void
+chanfree(Chan *c)
+{
+       c->flag = CFREE;
+
+       if(c->umh != nil){
+               putmhead(c->umh);
+               c->umh = nil;
+       }
+       if(c->umc != nil){
+               cclose(c->umc);
+               c->umc = nil;
+       }
+       if(c->mux != nil){
+               muxclose(c->mux);
+               c->mux = nil;
+       }
+       if(c->mchan != nil){
+               cclose(c->mchan);
+               c->mchan = nil;
+       }
+
+       cnameclose(c->name);
+
+       lock(&chanalloc);
+       c->next = chanalloc.free;
+       chanalloc.free = c;
+       unlock(&chanalloc);
+}
+
+void
+cclose(Chan *c)
+{
+       if(c == 0)
+               return;
+
+       if(c->flag&CFREE)
+               panic("cclose %lux", getcallerpc(&c));
+
+       if(decref(c))
+               return;
+
+       if(!waserror()){
+               devtab[c->type]->close(c);
+               poperror();
+       }
+       chanfree(c);
+}
+
+/*
+ * Make sure we have the only copy of c.  (Copy on write.)
+ */
+Chan*
+cunique(Chan *c)
+{
+       Chan *nc;
+
+       if(c->ref != 1) {
+               nc = cclone(c);
+               cclose(c);
+               c = nc;
+       }
+
+       return c;
+}
+
+int
+eqqid(Qid a, Qid b)
+{
+       return a.path==b.path && a.vers==b.vers;
+}
+
+int
+eqchan(Chan *a, Chan *b, int pathonly)
+{
+       if(a->qid.path != b->qid.path)
+               return 0;
+       if(!pathonly && a->qid.vers!=b->qid.vers)
+               return 0;
+       if(a->type != b->type)
+               return 0;
+       if(a->dev != b->dev)
+               return 0;
+       return 1;
+}
+
+int
+eqchantdqid(Chan *a, int type, int dev, Qid qid, int pathonly)
+{
+       if(a->qid.path != qid.path)
+               return 0;
+       if(!pathonly && a->qid.vers!=qid.vers)
+               return 0;
+       if(a->type != type)
+               return 0;
+       if(a->dev != dev)
+               return 0;
+       return 1;
+}
+
+Mhead*
+newmhead(Chan *from)
+{
+       Mhead *mh;
+
+       mh = smalloc(sizeof(Mhead));
+       mh->ref = 1;
+       mh->from = from;
+       incref(from);
+
+/*
+       n = from->name->len;
+       if(n >= sizeof(mh->fromname))
+               n = sizeof(mh->fromname)-1;
+       memmove(mh->fromname, from->name->s, n);
+       mh->fromname[n] = 0;
+*/
+       return mh;
+}
+
+int
+cmount(Chan *new, Chan *old, int flag, char *spec)
+{
+       Pgrp *pg;
+       int order, flg;
+       Mhead *m, **l, *mh;
+       Mount *nm, *f, *um, **h;
+
+       if(QTDIR & (old->qid.type^new->qid.type))
+               error(Emount);
+
+if(old->umh)
+       print("cmount old extra umh\n");
+
+       order = flag&MORDER;
+
+       if((old->qid.type&QTDIR)==0 && order != MREPL)
+               error(Emount);
+
+       mh = new->umh;
+
+       /*
+        * Not allowed to bind when the old directory
+        * is itself a union.  (Maybe it should be allowed, but I don't see
+        * what the semantics would be.)
+        *
+        * We need to check mh->mount->next to tell unions apart from
+        * simple mount points, so that things like
+        *      mount -c fd /root
+        *      bind -c /root /
+        * work.  The check of mount->mflag catches things like
+        *      mount fd /root
+        *      bind -c /root /
+        * 
+        * This is far more complicated than it should be, but I don't
+        * see an easier way at the moment.             -rsc
+        */
+       if((flag&MCREATE) && mh && mh->mount
+       && (mh->mount->next || !(mh->mount->mflag&MCREATE)))
+               error(Emount);
+
+       pg = up->env->pgrp;
+       wlock(&pg->ns);
+
+       l = &MOUNTH(pg, old->qid);
+       for(m = *l; m; m = m->hash) {
+               if(eqchan(m->from, old, 1))
+                       break;
+               l = &m->hash;
+       }
+
+       if(m == nil) {
+               /*
+                *  nothing mounted here yet.  create a mount
+                *  head and add to the hash table.
+                */
+               m = newmhead(old);
+               *l = m;
+
+               /*
+                *  if this is a union mount, add the old
+                *  node to the mount chain.
+                */
+               if(order != MREPL)
+                       m->mount = newmount(m, old, 0, 0);
+       }
+       wlock(&m->lock);
+       if(waserror()){
+               wunlock(&m->lock);
+               nexterror();
+       }
+       wunlock(&pg->ns);
+
+       nm = newmount(m, new, flag, spec);
+       if(mh != nil && mh->mount != nil) {
+               /*
+                *  copy a union when binding it onto a directory
+                */
+               flg = order;
+               if(order == MREPL)
+                       flg = MAFTER;
+               h = &nm->next;
+               um = mh->mount;
+               for(um = um->next; um; um = um->next) {
+                       f = newmount(m, um->to, flg, um->spec);
+                       *h = f;
+                       h = &f->next;
+               }
+       }
+
+       if(m->mount && order == MREPL) {
+               mountfree(m->mount);
+               m->mount = 0;
+       }
+
+       if(flag & MCREATE)
+               nm->mflag |= MCREATE;
+
+       if(m->mount && order == MAFTER) {
+               for(f = m->mount; f->next; f = f->next)
+                       ;
+               f->next = nm;
+       }
+       else {
+               for(f = nm; f->next; f = f->next)
+                       ;
+               f->next = m->mount;
+               m->mount = nm;
+       }
+
+       wunlock(&m->lock);
+       poperror();
+       return nm->mountid;
+}
+
+void
+cunmount(Chan *mnt, Chan *mounted)
+{
+       Pgrp *pg;
+       Mhead *m, **l;
+       Mount *f, **p;
+
+       if(mnt->umh)    /* should not happen */
+               print("cunmount newp extra umh %p has %p\n", mnt, mnt->umh);
+
+       /*
+        * It _can_ happen that mounted->umh is non-nil, 
+        * because mounted is the result of namec(Aopen)
+        * (see sysfile.c:/^sysunmount).
+        * If we open a union directory, it will have a umh.
+        * Although surprising, this is okay, since the
+        * cclose will take care of freeing the umh.
+        */
+
+       pg = up->env->pgrp;
+       wlock(&pg->ns);
+
+       l = &MOUNTH(pg, mnt->qid);
+       for(m = *l; m; m = m->hash) {
+               if(eqchan(m->from, mnt, 1))
+                       break;
+               l = &m->hash;
+       }
+
+       if(m == 0) {
+               wunlock(&pg->ns);
+               error(Eunmount);
+       }
+
+       wlock(&m->lock);
+       if(mounted == 0) {
+               *l = m->hash;
+               wunlock(&pg->ns);
+               mountfree(m->mount);
+               m->mount = nil;
+               cclose(m->from);
+               wunlock(&m->lock);
+               putmhead(m);
+               return;
+       }
+
+       p = &m->mount;
+       for(f = *p; f; f = f->next) {
+               /* BUG: Needs to be 2 pass */
+               if(eqchan(f->to, mounted, 1) ||
+                 (f->to->mchan && eqchan(f->to->mchan, mounted, 1))) {
+                       *p = f->next;
+                       f->next = 0;
+                       mountfree(f);
+                       if(m->mount == nil) {
+                               *l = m->hash;
+                               cclose(m->from);
+                               wunlock(&m->lock);
+                               wunlock(&pg->ns);
+                               putmhead(m);
+                               return;
+                       }
+                       wunlock(&m->lock);
+                       wunlock(&pg->ns);
+                       return;
+               }
+               p = &f->next;
+       }
+       wunlock(&m->lock);
+       wunlock(&pg->ns);
+       error(Eunion);
+}
+
+Chan*
+cclone(Chan *c)
+{
+       Chan *nc;
+       Walkqid *wq;
+
+       wq = devtab[c->type]->walk(c, nil, nil, 0);
+       if(wq == nil)
+               error("clone failed");
+       nc = wq->clone;
+       free(wq);
+       nc->name = c->name;
+       if(c->name)
+               incref(c->name);
+       return nc;
+}
+
+int
+findmount(Chan **cp, Mhead **mp, int type, int dev, Qid qid)
+{
+       Pgrp *pg;
+       Mhead *m;
+
+       pg = up->env->pgrp;
+       rlock(&pg->ns);
+       for(m = MOUNTH(pg, qid); m; m = m->hash){
+               rlock(&m->lock);
+if(m->from == nil){
+       print("m %p m->from 0\n", m);
+       runlock(&m->lock);
+       continue;
+}
+               if(eqchantdqid(m->from, type, dev, qid, 1)) {
+                       runlock(&pg->ns);
+                       if(mp != nil){
+                               incref(m);
+                               if(*mp != nil)
+                                       putmhead(*mp);
+                               *mp = m;
+                       }
+                       if(*cp != nil)
+                               cclose(*cp);
+                       incref(m->mount->to);
+                       *cp = m->mount->to;
+                       runlock(&m->lock);
+                       return 1;
+               }
+               runlock(&m->lock);
+       }
+
+       runlock(&pg->ns);
+       return 0;
+}
+
+int
+domount(Chan **cp, Mhead **mp)
+{
+       return findmount(cp, mp, (*cp)->type, (*cp)->dev, (*cp)->qid);
+}
+
+Chan*
+undomount(Chan *c, Cname *name)
+{
+       Chan *nc;
+       Pgrp *pg;
+       Mount *t;
+       Mhead **h, **he, *f;
+
+       pg = up->env->pgrp;
+       rlock(&pg->ns);
+       if(waserror()) {
+               runlock(&pg->ns);
+               nexterror();
+       }
+
+       he = &pg->mnthash[MNTHASH];
+       for(h = pg->mnthash; h < he; h++) {
+               for(f = *h; f; f = f->hash) {
+                       if(strcmp(f->from->name->s, name->s) != 0)
+                               continue;
+                       for(t = f->mount; t; t = t->next) {
+                               if(eqchan(c, t->to, 1)) {
+                                       /*
+                                        * We want to come out on the left hand side of the mount
+                                        * point using the element of the union that we entered on.
+                                        * To do this, find the element that has a from name of
+                                        * c->name->s.
+                                        */
+                                       if(strcmp(t->head->from->name->s, name->s) != 0)
+                                               continue;
+                                       nc = t->head->from;
+                                       incref(nc);
+                                       cclose(c);
+                                       c = nc;
+                                       break;
+                               }
+                       }
+               }
+       }
+       poperror();
+       runlock(&pg->ns);
+       return c;
+}
+
+/*
+ * Either walks all the way or not at all.  No partial results in *cp.
+ * *nerror is the number of names to display in an error message.
+ */
+static char Edoesnotexist[] = "does not exist";
+int
+walk(Chan **cp, char **names, int nnames, int nomount, int *nerror)
+{
+       int dev, dotdot, i, n, nhave, ntry, type;
+       Chan *c, *nc;
+       Cname *cname;
+       Mount *f;
+       Mhead *mh, *nmh;
+       Walkqid *wq;
+
+       c = *cp;
+       incref(c);
+       cname = c->name;
+       incref(cname);
+       mh = nil;
+
+       /*
+        * While we haven't gotten all the way down the path:
+        *    1. step through a mount point, if any
+        *    2. send a walk request for initial dotdot or initial prefix without dotdot
+        *    3. move to the first mountpoint along the way.
+        *    4. repeat.
+        *
+        * An invariant is that each time through the loop, c is on the undomount
+        * side of the mount point, and c's name is cname.
+        */
+       for(nhave=0; nhave<nnames; nhave+=n){
+               if((c->qid.type&QTDIR)==0){
+                       if(nerror)
+                               *nerror = nhave;
+                       cnameclose(cname);
+                       cclose(c);
+                       strcpy(up->env->errstr, Enotdir);
+                       if(mh != nil)
+                               putmhead(mh);
+                       return -1;
+               }
+               ntry = nnames - nhave;
+               if(ntry > MAXWELEM)
+                       ntry = MAXWELEM;
+               dotdot = 0;
+               for(i=0; i<ntry; i++){
+                       if(isdotdot(names[nhave+i])){
+                               if(i==0) {
+                                       dotdot = 1;
+                                       ntry = 1;
+                               } else
+                                       ntry = i;
+                               break;
+                       }
+               }
+
+               if(!dotdot && !nomount)
+                       domount(&c, &mh);
+
+               type = c->type;
+               dev = c->dev;
+
+               if((wq = devtab[type]->walk(c, nil, names+nhave, ntry)) == nil){
+                       /* try a union mount, if any */
+                       if(mh && !nomount){
+                               /*
+                                * mh->mount == c, so start at mh->mount->next
+                                */
+                               rlock(&mh->lock);
+                               for(f = mh->mount->next; f; f = f->next)
+                                       if((wq = devtab[f->to->type]->walk(f->to, nil, names+nhave, ntry)) != nil)
+                                               break;
+                               runlock(&mh->lock);
+                               if(f != nil){
+                                       type = f->to->type;
+                                       dev = f->to->dev;
+                               }
+                       }
+                       if(wq == nil){
+                               cclose(c);
+                               cnameclose(cname);
+                               if(nerror)
+                                       *nerror = nhave+1;
+                               if(mh != nil)
+                                       putmhead(mh);
+                               return -1;
+                       }
+               }
+
+               nmh = nil;
+               if(dotdot) {
+                       assert(wq->nqid == 1);
+                       assert(wq->clone != nil);
+
+                       cname = addelem(cname, "..");
+                       nc = undomount(wq->clone, cname);
+                       n = 1;
+               } else {
+                       nc = nil;
+                       if(!nomount)
+                               for(i=0; i<wq->nqid && i<ntry-1; i++)
+                                       if(findmount(&nc, &nmh, type, dev, wq->qid[i]))
+                                               break;
+                       if(nc == nil){  /* no mount points along path */
+                               if(wq->clone == nil){
+                                       cclose(c);
+                                       cnameclose(cname);
+                                       if(wq->nqid==0 || (wq->qid[wq->nqid-1].type&QTDIR)){
+                                               if(nerror)
+                                                       *nerror = nhave+wq->nqid+1;
+                                               strcpy(up->env->errstr, Edoesnotexist);
+                                       }else{
+                                               if(nerror)
+                                                       *nerror = nhave+wq->nqid;
+                                               strcpy(up->env->errstr, Enotdir);
+                                       }
+                                       free(wq);
+                                       if(mh != nil)
+                                               putmhead(mh);
+                                       return -1;
+                               }
+                               n = wq->nqid;
+                               nc = wq->clone;
+                       }else{          /* stopped early, at a mount point */
+                               if(wq->clone != nil){
+                                       cclose(wq->clone);
+                                       wq->clone = nil;
+                               }
+                               n = i+1;
+                       }
+                       for(i=0; i<n; i++)
+                               cname = addelem(cname, names[nhave+i]);
+               }
+               cclose(c);
+               c = nc;
+               putmhead(mh);
+               mh = nmh;
+               free(wq);
+       }
+
+       putmhead(mh);
+
+       c = cunique(c);
+
+       if(c->umh != nil){      //BUG
+               print("walk umh\n");
+               putmhead(c->umh);
+               c->umh = nil;
+       }
+
+       cnameclose(c->name);
+       c->name = cname;
+
+       cclose(*cp);
+       *cp = c;
+       if(nerror)
+               *nerror = 0;
+       return 0;
+}
+
+/*
+ * c is a mounted non-creatable directory.  find a creatable one.
+ */
+Chan*
+createdir(Chan *c, Mhead *m)
+{
+       Chan *nc;
+       Mount *f;
+
+       rlock(&m->lock);
+       if(waserror()) {
+               runlock(&m->lock);
+               nexterror();
+       }
+       for(f = m->mount; f; f = f->next) {
+               if(f->mflag&MCREATE) {
+                       nc = cclone(f->to);
+                       runlock(&m->lock);
+                       poperror();
+                       cclose(c);
+                       return nc;
+               }
+       }
+       error(Enocreate);
+       return 0;
+}
+
+void
+saveregisters(void)
+{
+}
+
+/*
+ * In place, rewrite name to compress multiple /, eliminate ., and process ..
+ */
+void
+cleancname(Cname *n)
+{
+       char *p;
+
+       if(n->s[0] == '#'){
+               p = strchr(n->s, '/');
+               if(p == nil)
+                       return;
+               cleanname(p);
+
+               /*
+                * The correct name is #i rather than #i/,
+                * but the correct name of #/ is #/.
+                */
+               if(strcmp(p, "/")==0 && n->s[1] != '/')
+                       *p = '\0';
+       }else
+               cleanname(n->s);
+       n->len = strlen(n->s);
+}
+
+static void
+growparse(Elemlist *e)
+{
+       char **new;
+       int *inew;
+       enum { Delta = 8 };
+
+       if(e->nelems % Delta == 0){
+               new = smalloc((e->nelems+Delta) * sizeof(char*));
+               memmove(new, e->elems, e->nelems*sizeof(char*));
+               free(e->elems);
+               e->elems = new;
+               inew = smalloc((e->nelems+Delta+1) * sizeof(int));
+               memmove(inew, e->off, e->nelems*sizeof(int));
+               free(e->off);
+               e->off = inew;
+       }
+}
+
+/*
+ * The name is known to be valid.
+ * Copy the name so slashes can be overwritten.
+ * An empty string will set nelem=0.
+ * A path ending in / or /. or /.//./ etc. will have
+ * e.mustbedir = 1, so that we correctly
+ * reject, e.g., "/adm/users/." when /adm/users is a file
+ * rather than a directory.
+ */
+static void
+parsename(char *name, Elemlist *e)
+{
+       char *slash;
+
+       kstrdup(&e->name, name);
+       name = e->name;
+       e->nelems = 0;
+       e->elems = nil;
+       e->off = smalloc(sizeof(int));
+       e->off[0] = skipslash(name) - name;
+       for(;;){
+               name = skipslash(name);
+               if(*name=='\0'){
+                       e->mustbedir = 1;
+                       break;
+               }
+               growparse(e);
+               
+               e->elems[e->nelems++] = name;
+               slash = utfrune(name, '/');
+               if(slash == nil){
+                       e->off[e->nelems] = name+strlen(name) - e->name;
+                       e->mustbedir = 0;
+                       break;
+               }
+               e->off[e->nelems] = slash - e->name;
+               *slash++ = '\0';
+               name = slash;
+       }
+}
+
+void*
+memrchr(void *va, int c, long n)
+{
+       uchar *a, *e;
+
+       a = va;
+       for(e=a+n-1; e>a; e--)
+               if(*e == c)
+                       return e;
+       return nil;
+}
+
+/*
+ * Turn a name into a channel.
+ * &name[0] is known to be a valid address.  It may be a kernel address.
+ *
+ * Opening with amode Aopen, Acreate, or Aremove guarantees
+ * that the result will be the only reference to that particular fid.
+ * This is necessary since we might pass the result to
+ * devtab[]->remove().
+ *
+ * Opening Atodir, Amount, or Aaccess does not guarantee this.
+ *
+ * Opening Aaccess can, under certain conditions, return a
+ * correct Chan* but with an incorrect Cname attached.
+ * Since the functions that open Aaccess (sysstat, syswstat, sys_stat)
+ * do not use the Cname*, this avoids an unnecessary clone.
+ */
+Chan*
+namec(char *aname, int amode, int omode, ulong perm)
+{
+       int n, prefix, len, t, nomount, npath;
+       Chan *c, *cnew;
+       Cname *cname;
+       Elemlist e;
+       Rune r;
+       Mhead *m;
+       char *createerr, tmperrbuf[ERRMAX];
+       char *name;
+
+       name = aname;
+       if(name[0] == '\0')
+               error("empty file name");
+       validname(name, 1);
+
+       /*
+        * Find the starting off point (the current slash, the root of
+        * a device tree, or the current dot) as well as the name to
+        * evaluate starting there.
+        */
+       nomount = 0;
+       switch(name[0]){
+       case '/':
+               c = up->env->pgrp->slash;
+               incref(c);
+               break;
+       
+       case '#':
+               nomount = 1;
+               up->genbuf[0] = '\0';
+               n = 0;
+               while(*name!='\0' && (*name != '/' || n < 2)){
+                       if(n >= sizeof(up->genbuf)-1)
+                               error(Efilename);
+                       up->genbuf[n++] = *name++;
+               }
+               up->genbuf[n] = '\0';
+               n = chartorune(&r, up->genbuf+1)+1;
+               if(r == 'M')
+                       error(Enoattach);
+               /*
+                *  the nodevs exceptions are
+                *      |  it only gives access to pipes you create
+                *      e  this process's environment
+                *      s  private file2chan creation space
+                *      D private secure sockets name space
+                *      a private TLS name space
+                */
+               if(up->env->pgrp->nodevs &&
+                  (utfrune("|esDa", r) == nil || r == 's' && up->genbuf[n]!='\0'))
+                       error(Enoattach);
+               t = devno(r, 1);
+               if(t == -1)
+                       error(Ebadsharp);
+               c = devtab[t]->attach(up->genbuf+n);
+               break;
+
+       default:
+               c = up->env->pgrp->dot;
+               incref(c);
+               break;
+       }
+       prefix = name - aname;
+
+       e.name = nil;
+       e.elems = nil;
+       e.off = nil;
+       e.nelems = 0;
+       if(waserror()){
+               cclose(c);
+               free(e.name);
+               free(e.elems);
+               free(e.off);
+//dumpmount();
+               nexterror();
+       }
+
+       /*
+        * Build a list of elements in the path.
+        */
+       parsename(name, &e);
+
+       /*
+        * On create, ....
+        */
+       if(amode == Acreate){
+               /* perm must have DMDIR if last element is / or /. */
+               if(e.mustbedir && !(perm&DMDIR)){
+                       npath = e.nelems;
+                       strcpy(tmperrbuf, "create without DMDIR");
+                       goto NameError;
+               }
+
+               /* don't try to walk the last path element just yet. */
+               if(e.nelems == 0)
+                       error(Eexist);
+               e.nelems--;
+       }
+
+       if(walk(&c, e.elems, e.nelems, nomount, &npath) < 0){
+               if(npath < 0 || npath > e.nelems){
+                       print("namec %s walk error npath=%d\n", aname, npath);
+                       nexterror();
+               }
+               strcpy(tmperrbuf, up->env->errstr);
+       NameError:
+               len = prefix+e.off[npath];
+               if(len < ERRMAX/3 || (name=memrchr(aname, '/', len))==nil || name==aname)
+                       snprint(up->genbuf, sizeof up->genbuf, "%.*s", len, aname);
+               else
+                       snprint(up->genbuf, sizeof up->genbuf, "...%.*s", (int)(len-(name-aname)), name);
+               snprint(up->env->errstr, ERRMAX, "%#q %s", up->genbuf, tmperrbuf);
+               nexterror();
+       }
+
+       if(e.mustbedir && !(c->qid.type&QTDIR)){
+               npath = e.nelems;
+               strcpy(tmperrbuf, "not a directory");
+               goto NameError;
+       }
+
+       if(amode == Aopen && (omode&3) == OEXEC && (c->qid.type&QTDIR)){
+               npath = e.nelems;
+               error("cannot exec directory");
+       }
+
+       switch(amode){
+       case Aaccess:
+               if(!nomount)
+                       domount(&c, nil);
+               break;
+
+       case Abind:
+               m = nil;
+               if(!nomount)
+                       domount(&c, &m);
+               if(c->umh != nil)
+                       putmhead(c->umh);
+               c->umh = m;
+               break;
+
+       case Aremove:
+       case Aopen:
+       Open:
+               /* save the name; domount might change c */
+               cname = c->name;
+               incref(cname);
+               m = nil;
+               if(!nomount)
+                       domount(&c, &m);
+
+               /* our own copy to open or remove */
+               c = cunique(c);
+
+               /* now it's our copy anyway, we can put the name back */
+               cnameclose(c->name);
+               c->name = cname;
+
+               switch(amode){
+               case Aremove:
+                       putmhead(m);
+                       break;
+
+               case Aopen:
+               case Acreate:
+if(c->umh != nil){
+       print("cunique umh\n");
+       putmhead(c->umh);
+       c->umh = nil;
+}
+
+                       /* only save the mount head if it's a multiple element union */
+                       if(m && m->mount && m->mount->next)
+                               c->umh = m;
+                       else
+                               putmhead(m);
+
+                       /* save registers else error() in open has wrong value of c saved */
+                       saveregisters();
+
+                       if(omode == OEXEC)
+                               c->flag &= ~CCACHE;
+
+                       c = devtab[c->type]->open(c, omode&~OCEXEC);
+
+                       if(omode & OCEXEC)
+                               c->flag |= CCEXEC;
+                       if(omode & ORCLOSE)
+                               c->flag |= CRCLOSE;
+                       break;
+               }
+               break;
+
+       case Atodir:
+               /*
+                * Directories (e.g. for cd) are left before the mount point,
+                * so one may mount on / or . and see the effect.
+                */
+               if(!(c->qid.type & QTDIR))
+                       error(Enotdir);
+               break;
+
+       case Amount:
+               /*
+                * When mounting on an already mounted upon directory,
+                * one wants subsequent mounts to be attached to the
+                * original directory, not the replacement.  Don't domount.
+                */
+               break;
+
+       case Acreate:
+               /*
+                * We've already walked all but the last element.
+                * If the last exists, try to open it OTRUNC.
+                * If omode&OEXCL is set, just give up.
+                */
+               e.nelems++;
+               if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) == 0){
+                       if(omode&OEXCL)
+                               error(Eexist);
+                       omode |= OTRUNC;
+                       goto Open;
+               }
+
+               /*
+                * The semantics of the create(2) system call are that if the
+                * file exists and can be written, it is to be opened with truncation.
+                * On the other hand, the create(5) message fails if the file exists.
+                * If we get two create(2) calls happening simultaneously, 
+                * they might both get here and send create(5) messages, but only 
+                * one of the messages will succeed.  To provide the expected create(2)
+                * semantics, the call with the failed message needs to try the above
+                * walk again, opening for truncation.  This correctly solves the 
+                * create/create race, in the sense that any observable outcome can
+                * be explained as one happening before the other.
+                * The create/create race is quite common.  For example, it happens
+                * when two rc subshells simultaneously update the same
+                * environment variable.
+                *
+                * The implementation still admits a create/create/remove race:
+                * (A) walk to file, fails
+                * (B) walk to file, fails
+                * (A) create file, succeeds, returns 
+                * (B) create file, fails
+                * (A) remove file, succeeds, returns
+                * (B) walk to file, return failure.
+                *
+                * This is hardly as common as the create/create race, and is really
+                * not too much worse than what might happen if (B) got a hold of a
+                * file descriptor and then the file was removed -- either way (B) can't do
+                * anything with the result of the create call.  So we don't care about this race.
+                *
+                * Applications that care about more fine-grained decision of the races
+                * can use the OEXCL flag to get at the underlying create(5) semantics;
+                * by default we provide the common case.
+                *
+                * We need to stay behind the mount point in case we
+                * need to do the first walk again (should the create fail).
+                *
+                * We also need to cross the mount point and find the directory
+                * in the union in which we should be creating.
+                *
+                * The channel staying behind is c, the one moving forward is cnew.
+                */
+               m = nil;
+               cnew = nil;     /* is this assignment necessary? */
+               if(!waserror()){        /* try create */
+                       if(!nomount && findmount(&cnew, &m, c->type, c->dev, c->qid))
+                               cnew = createdir(cnew, m);
+                       else{
+                               cnew = c;
+                               incref(cnew);
+                       }
+
+                       /*
+                        * We need our own copy of the Chan because we're
+                        * about to send a create, which will move it.  Once we have
+                        * our own copy, we can fix the name, which might be wrong
+                        * if findmount gave us a new Chan.
+                        */
+                       cnew = cunique(cnew);
+                       cnameclose(cnew->name);
+                       cnew->name = c->name;
+                       incref(cnew->name);
+
+                       devtab[cnew->type]->create(cnew, e.elems[e.nelems-1], omode&~(OEXCL|OCEXEC), perm);
+                       poperror();
+                       if(omode & OCEXEC)
+                               cnew->flag |= CCEXEC;
+                       if(omode & ORCLOSE)
+                               cnew->flag |= CRCLOSE;
+                       if(m)
+                               putmhead(m);
+                       cclose(c);
+                       c = cnew;
+                       c->name = addelem(c->name, e.elems[e.nelems-1]);
+                       break;
+               }
+
+               /* create failed */
+               cclose(cnew);
+               if(m)
+                       putmhead(m);
+               if(omode & OEXCL)
+                       nexterror();
+               /* save error */
+               createerr = up->env->errstr;
+               up->env->errstr = tmperrbuf;
+               /* note: we depend that walk does not error */
+               if(walk(&c, e.elems+e.nelems-1, 1, nomount, nil) < 0){
+                       up->env->errstr = createerr;
+                       error(createerr);       /* report true error */
+               }
+               up->env->errstr = createerr;
+               omode |= OTRUNC;
+               goto Open;
+
+       default:
+               panic("unknown namec access %d\n", amode);
+       }
+
+       poperror();
+
+       /* place final element in genbuf for e.g. exec */
+       if(e.nelems > 0)
+               kstrcpy(up->genbuf, e.elems[e.nelems-1], sizeof up->genbuf);
+       else
+               kstrcpy(up->genbuf, ".", sizeof up->genbuf);
+       free(e.name);
+       free(e.elems);
+       free(e.off);
+
+       return c;
+}
+
+/*
+ * name is valid. skip leading / and ./ as much as possible
+ */
+char*
+skipslash(char *name)
+{
+       while(name[0]=='/' || (name[0]=='.' && (name[1]==0 || name[1]=='/')))
+               name++;
+       return name;
+}
+
+char isfrog[256]={
+       /*NUL*/ 1, 1, 1, 1, 1, 1, 1, 1,
+       /*BKS*/ 1, 1, 1, 1, 1, 1, 1, 1,
+       /*DLE*/ 1, 1, 1, 1, 1, 1, 1, 1,
+       /*CAN*/ 1, 1, 1, 1, 1, 1, 1, 1,
+       ['/']   1,
+       [0x7f]  1,
+};
+
+/*
+ * Check that the name
+ *  a) is in valid memory.
+ *  b) is shorter than 2^16 bytes, so it can fit in a 9P string field.
+ *  c) contains no frogs.
+ * The first byte is known to be addressible by the requester, so the
+ * routine works for kernel and user memory both.
+ * The parameter slashok flags whether a slash character is an error
+ * or a valid character.
+ */
+void
+validname(char *aname, int slashok)
+{
+       char *ename, *name;
+       int c;
+       Rune r;
+
+       name = aname;
+       ename = memchr(name, 0, (1<<16));
+
+       if(ename==nil || ename-name>=(1<<16))
+               error("name too long");
+
+       while(*name){
+               /* all characters above '~' are ok */
+               c = *(uchar*)name;
+               if(c >= Runeself)
+                       name += chartorune(&r, name);
+               else{
+                       if(isfrog[c])
+                               if(!slashok || c!='/'){
+                                       snprint(up->genbuf, sizeof(up->genbuf), "%s: %q", Ebadchar, aname);
+                                       error(up->genbuf);
+                       }
+                       name++;
+               }
+       }
+}
+
+void
+isdir(Chan *c)
+{
+       if(c->qid.type & QTDIR)
+               return;
+       error(Enotdir);
+}
+
+/*
+ * This is necessary because there are many
+ * pointers to the top of a given mount list:
+ *
+ *     - the mhead in the namespace hash table
+ *     - the mhead in chans returned from findmount:
+ *       used in namec and then by unionread.
+ *     - the mhead in chans returned from createdir:
+ *       used in the open/create race protect, which is gone.
+ *
+ * The RWlock in the Mhead protects the mount list it contains.
+ * The mount list is deleted when we cunmount.
+ * The RWlock ensures that nothing is using the mount list at that time.
+ *
+ * It is okay to replace c->mh with whatever you want as 
+ * long as you are sure you have a unique reference to it.
+ *
+ * This comment might belong somewhere else.
+ */
+void
+putmhead(Mhead *m)
+{
+       if(m && decref(m) == 0){
+               m->mount = (Mount*)0xCafeBeef;
+               free(m);
+       }
+}