Add the inferno networking code, minus il
authorRonald G. Minnich <rminnich@google.com>
Thu, 16 Jan 2014 19:31:39 +0000 (11:31 -0800)
committerRonald G. Minnich <rminnich@google.com>
Thu, 16 Jan 2014 22:45:37 +0000 (14:45 -0800)
Signed-off-by: Ronald G. Minnich <rminnich@google.com>
36 files changed:
kern/net/arp.c [new file with mode: 0644]
kern/net/bootp.c [new file with mode: 0644]
kern/net/compress.c [new file with mode: 0644]
kern/net/devip.c [new file with mode: 0644]
kern/net/dhcp.c [new file with mode: 0644]
kern/net/eipconvtest.c [new file with mode: 0644]
kern/net/esp.c [new file with mode: 0644]
kern/net/ethermedium.c [new file with mode: 0644]
kern/net/gre.c [new file with mode: 0644]
kern/net/icmp.c [new file with mode: 0644]
kern/net/icmp6.c [new file with mode: 0644]
kern/net/igmp.c [new file with mode: 0644]
kern/net/ihbootp.c [new file with mode: 0644]
kern/net/ip.c [new file with mode: 0644]
kern/net/ip.h [new file with mode: 0644]
kern/net/ipaux.c [new file with mode: 0644]
kern/net/ipifc.c [new file with mode: 0644]
kern/net/ipmux.c [new file with mode: 0644]
kern/net/iproute.c [new file with mode: 0644]
kern/net/iprouter.c [new file with mode: 0644]
kern/net/ipv6.c [new file with mode: 0644]
kern/net/ipv6.h [new file with mode: 0644]
kern/net/kernel.h [new file with mode: 0644]
kern/net/loopbackmedium.c [new file with mode: 0644]
kern/net/netdevmedium.c [new file with mode: 0644]
kern/net/netlog.c [new file with mode: 0644]
kern/net/nullmedium.c [new file with mode: 0644]
kern/net/pktmedium.c [new file with mode: 0644]
kern/net/plan9.c [new file with mode: 0644]
kern/net/ppp.c [new file with mode: 0644]
kern/net/ppp.h [new file with mode: 0644]
kern/net/pppmedium.c [new file with mode: 0644]
kern/net/ptclbsum.c [new file with mode: 0644]
kern/net/rudp.c [new file with mode: 0644]
kern/net/tcp.c [new file with mode: 0644]
kern/net/udp.c [new file with mode: 0644]

diff --git a/kern/net/arp.c b/kern/net/arp.c
new file mode 100644 (file)
index 0000000..11f4fb1
--- /dev/null
@@ -0,0 +1,681 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ *  address resolution tables
+ */
+
+enum
+{
+       NHASH           = (1<<6),
+       NCACHE          = 256,
+
+       AOK             = 1,
+       AWAIT           = 2,
+};
+
+char *arpstate[] =
+{
+       "UNUSED",
+       "OK",
+       "WAIT",
+};
+
+/*
+ *  one per Fs
+ */
+struct Arp
+{
+       QLock;
+       Fs      *f;
+       Arpent  *hash[NHASH];
+       Arpent  cache[NCACHE];
+       Arpent  *rxmt;
+       Proc    *rxmitp;        /* neib sol re-transmit proc */
+       Rendez  rxmtq;
+       Block   *dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((s)[IPaddrlen-1]%NHASH)
+
+extern int     ReTransTimer = RETRANS_TIMER;
+static void    rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+       f->arp = smalloc(sizeof(Arp));
+       f->arp->f = f;
+       f->arp->rxmt = nil;
+       f->arp->dropf = f->arp->dropl = nil;
+       kproc("rxmitproc", rxmitproc, f->arp, 0);
+}
+
+/*
+ *  create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+       uint t;
+       Block *next, *xp;
+       Arpent *a, *e, *f, **l;
+       Medium *m = ifc->m;
+       int empty;
+
+       /* find oldest entry */
+       e = &arp->cache[NCACHE];
+       a = arp->cache;
+       t = a->utime;
+       for(f = a; f < e; f++){
+               if(f->utime < t){
+                       t = f->utime;
+                       a = f;
+               }
+       }
+
+       /* dump waiting packets */
+       xp = a->hold;
+       a->hold = nil;
+
+       if(isv4(a->ip)){
+               while(xp){
+                       next = xp->list;
+                       freeblist(xp);
+                       xp = next;
+               }
+       }
+       else {  // queue icmp unreachable for rxmitproc later on, w/o arp lock
+               if(xp){
+                       if(arp->dropl == nil) 
+                               arp->dropf = xp;
+                       else
+                               arp->dropl->list = xp;
+
+                       for(next = xp->list; next; next = next->list)
+                               xp = next;
+                       arp->dropl = xp;
+                       wakeup(&arp->rxmtq);
+               }
+       }
+
+       /* take out of current chain */
+       l = &arp->hash[haship(a->ip)];
+       for(f = *l; f; f = f->hash){
+               if(f == a){
+                       *l = a->hash;
+                       break;
+               }
+               l = &f->hash;
+       }
+
+       /* insert into new chain */
+       l = &arp->hash[haship(ip)];
+       a->hash = *l;
+       *l = a;
+
+       memmove(a->ip, ip, sizeof(a->ip));
+       a->utime = NOW;
+       a->ctime = 0;
+       a->type = m;
+
+       a->rtime = NOW + ReTransTimer;
+       a->rxtsrem = MAX_MULTICAST_SOLICIT;
+       a->ifc = ifc;
+       a->ifcid = ifc->ifcid;
+
+       /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+       if(!ipismulticast(a->ip) && addrxt){
+               l = &arp->rxmt;
+               empty = (*l==nil);
+
+               for(f = *l; f; f = f->nextrxt){
+                       if(f == a){
+                               *l = a->nextrxt;
+                               break;
+                       }
+                       l = &f->nextrxt;
+               }
+               for(f = *l; f; f = f->nextrxt){
+                       l = &f->nextrxt;
+               }
+               *l = a;
+               if(empty) 
+                       wakeup(&arp->rxmtq);
+       }
+
+       a->nextrxt = nil;
+
+       return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+       Arpent *f, **l;
+
+       a->utime = 0;
+       a->ctime = 0;
+       a->type = 0;
+       a->state = 0;
+       
+       /* take out of current chain */
+       l = &arp->hash[haship(a->ip)];
+       for(f = *l; f; f = f->hash){
+               if(f == a){
+                       *l = a->hash;
+                       break;
+               }
+               l = &f->hash;
+       }
+
+       /* take out of re-transmit chain */
+       l = &arp->rxmt;
+       for(f = *l; f; f = f->nextrxt){
+               if(f == a){
+                       *l = a->nextrxt;
+                       break;
+               }
+               l = &f->nextrxt;
+       }
+       a->nextrxt = nil;
+       a->hash = nil;
+       a->hold = nil;
+       a->last = nil;
+       a->ifc = nil;
+}
+
+/*
+ *  fill in the media address if we have it.  Otherwise return an
+ *  Arpent that represents the state of the address resolution FSM
+ *  for ip.  Add the packet to be sent onto the list of packets
+ *  waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+       int hash;
+       Arpent *a;
+       Medium *type = ifc->m;
+       uchar v6ip[IPaddrlen];
+
+       if(version == V4){
+               v4tov6(v6ip, ip);
+               ip = v6ip;
+       }
+
+       qlock(arp);
+       hash = haship(ip);
+       for(a = arp->hash[hash]; a; a = a->hash){
+               if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+               if(type == a->type)
+                       break;
+       }
+
+       if(a == nil){
+               a = newarp6(arp, ip, ifc, (version != V4));
+               a->state = AWAIT;
+       }
+       a->utime = NOW;
+       if(a->state == AWAIT){
+               if(bp != nil){
+                       if(a->hold)
+                               a->last->list = bp;
+                       else
+                               a->hold = bp;
+                       a->last = bp;
+                       bp->list = nil; 
+               }
+               return a;               /* return with arp qlocked */
+       }
+
+       memmove(mac, a->mac, a->type->maclen);
+
+       /* remove old entries */
+       if(NOW - a->ctime > 15*60*1000)
+               cleanarpent(arp, a);
+
+       qunlock(arp);
+       return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent*)
+{
+       qunlock(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent.  Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+       Block *bp;
+       Arpent *f, **l;
+
+       if(!isv4(a->ip)){
+               l = &arp->rxmt;
+               for(f = *l; f; f = f->nextrxt){
+                       if(f == a){
+                               *l = a->nextrxt;
+                               break;
+                       }
+                       l = &f->nextrxt;
+               }
+       }
+
+       memmove(a->mac, mac, type->maclen);
+       a->type = type;
+       a->state = AOK;
+       a->utime = NOW;
+       bp = a->hold;
+       a->hold = nil;
+       qunlock(arp);
+
+       return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+       Arp *arp;
+       Route *r;
+       Arpent *a, *f, **l;
+       Ipifc *ifc;
+       Medium *type;
+       Block *bp, *next;
+       uchar v6ip[IPaddrlen];
+
+       arp = fs->arp;
+
+       if(n != 6){
+//             print("arp: len = %d\n", n);
+               return;
+       }
+
+       switch(version){
+       case V4:
+               r = v4lookup(fs, ip, nil);
+               v4tov6(v6ip, ip);
+               ip = v6ip;
+               break;
+       case V6:
+               r = v6lookup(fs, ip, nil);
+               break;
+       default:
+               panic("arpenter: version %d", version);
+               return; /* to supress warnings */
+       }
+
+       if(r == nil){
+//             print("arp: no route for entry\n");
+               return;
+       }
+
+       ifc = r->ifc;
+       type = ifc->m;
+
+       qlock(arp);
+       for(a = arp->hash[haship(ip)]; a; a = a->hash){
+               if(a->type != type || (a->state != AWAIT && a->state != AOK))
+                       continue;
+
+               if(ipcmp(a->ip, ip) == 0){
+                       a->state = AOK;
+                       memmove(a->mac, mac, type->maclen);
+
+                       if(version == V6){
+                               /* take out of re-transmit chain */
+                               l = &arp->rxmt;
+                               for(f = *l; f; f = f->nextrxt){
+                                       if(f == a){
+                                               *l = a->nextrxt;
+                                               break;
+                                       }
+                                       l = &f->nextrxt;
+                               }
+                       }
+
+                       a->ifc = ifc;
+                       a->ifcid = ifc->ifcid;
+                       bp = a->hold;
+                       a->hold = nil;
+                       if(version == V4)
+                               ip += IPv4off;
+                       a->utime = NOW;
+                       a->ctime = a->utime;
+                       qunlock(arp);
+
+                       while(bp){
+                               next = bp->list;
+                               if(ifc != nil){
+                                       if(waserror()){
+                                               runlock(ifc);
+                                               nexterror();
+                                       }
+                                       rlock(ifc);
+                                       if(ifc->m != nil)
+                                               ifc->m->bwrite(ifc, bp, version, ip);
+                                       else
+                                               freeb(bp);
+                                       runlock(ifc);
+                                       poperror();
+                               } else
+                                       freeb(bp);
+                               bp = next;
+                       }
+                       return;
+               }
+       }
+
+       if(refresh == 0){
+               a = newarp6(arp, ip, ifc, 0);
+               a->state = AOK;
+               a->type = type;
+               a->ctime = NOW;
+               memmove(a->mac, mac, type->maclen);
+       }
+
+       qunlock(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+       int n;
+       Route *r;
+       Arp *arp;
+       Block *bp;
+       Arpent *a, *fl, **l;
+       Medium *m;
+       char *f[4], buf[256];
+       uchar ip[IPaddrlen], mac[MAClen];
+
+       arp = fs->arp;
+
+       if(len == 0)
+               error(Ebadarp);
+       if(len >= sizeof(buf))
+               len = sizeof(buf)-1;
+       strncpy(buf, s, len);
+       buf[len] = 0;
+       if(len > 0 && buf[len-1] == '\n')
+               buf[len-1] = 0;
+
+       n = getfields(buf, f, 4, 1, " ");
+       if(strcmp(f[0], "flush") == 0){
+               qlock(arp);
+               for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+                       memset(a->ip, 0, sizeof(a->ip));
+                       memset(a->mac, 0, sizeof(a->mac));
+                       a->hash = nil;
+                       a->state = 0;
+                       a->utime = 0;
+                       while(a->hold != nil){
+                               bp = a->hold->list;
+                               freeblist(a->hold);
+                               a->hold = bp;
+                       }
+               }
+               memset(arp->hash, 0, sizeof(arp->hash));
+// clear all pkts on these lists (rxmt, dropf/l)
+               arp->rxmt = nil;
+               arp->dropf = nil;
+               arp->dropl = nil;
+               qunlock(arp);
+       } else if(strcmp(f[0], "add") == 0){
+               switch(n){
+               default:
+                       error(Ebadarg);
+               case 3:
+                       parseip(ip, f[1]);
+                       if(isv4(ip))
+                               r = v4lookup(fs, ip+IPv4off, nil);
+                       else
+                               r = v6lookup(fs, ip, nil);
+                       if(r == nil)
+                               error("Destination unreachable");
+                       m = r->ifc->m;
+                       n = parsemac(mac, f[2], m->maclen);
+                       break;
+               case 4:
+                       m = ipfindmedium(f[1]);
+                       if(m == nil)
+                               error(Ebadarp);
+                       parseip(ip, f[2]);
+                       n = parsemac(mac, f[3], m->maclen);
+                       break;
+               }
+
+               if(m->ares == nil)
+                       error(Ebadarp);
+
+               m->ares(fs, V6, ip, mac, n, 0);
+       } else if(strcmp(f[0], "del") == 0){
+               if(n != 2)
+                       error(Ebadarg);
+
+               parseip(ip, f[1]);
+               qlock(arp);
+
+               l = &arp->hash[haship(ip)];
+               for(a = *l; a; a = a->hash){
+                       if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+                               *l = a->hash;
+                               break;
+                       }
+                       l = &a->hash;
+               }
+       
+               if(a){
+                       /* take out of re-transmit chain */
+                       l = &arp->rxmt;
+                       for(fl = *l; fl; fl = fl->nextrxt){
+                               if(fl == a){
+                                       *l = a->nextrxt;
+                                       break;
+                               }
+                               l = &fl->nextrxt;
+                       }
+
+                       a->nextrxt = nil;
+                       a->hash = nil;
+                       a->hold = nil;
+                       a->last = nil;
+                       a->ifc = nil;
+                       memset(a->ip, 0, sizeof(a->ip));
+                       memset(a->mac, 0, sizeof(a->mac));
+               }
+               qunlock(arp);
+       } else
+               error(Ebadarp);
+
+       return len;
+}
+
+enum
+{
+       Alinelen=       90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+       while(n-- > 0)
+               p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+       Arpent *a;
+       int n;
+       char mac[2*MAClen+1];
+
+       if(offset % Alinelen)
+               return 0;
+
+       offset = offset/Alinelen;
+       len = len/Alinelen;
+
+       n = 0;
+       for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+               if(a->state == 0)
+                       continue;
+               if(offset > 0){
+                       offset--;
+                       continue;
+               }
+               len--;
+               qlock(arp);
+               convmac(mac, a->mac, a->type->maclen);
+               n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+               qunlock(arp);
+       }
+
+       return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+       uint sflag;
+       Block *next, *xp;
+       Arpent *a, *b, **l;
+       Fs *f;
+       uchar ipsrc[IPaddrlen];
+       Ipifc *ifc = nil;
+       long nrxt;
+
+       qlock(arp);
+       f = arp->f;
+
+       a = arp->rxmt;
+       if(a==nil){
+               nrxt = 0;
+               goto dodrops;           //return nrxt;
+       }
+       nrxt = a->rtime - NOW;
+       if(nrxt > 3*ReTransTimer/4) 
+               goto dodrops;           //return nrxt;
+
+       for(; a; a = a->nextrxt){
+               ifc = a->ifc;
+               assert(ifc != nil);
+               if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
+                       xp = a->hold;
+                       a->hold = nil;
+
+                       if(xp){
+                               if(arp->dropl == nil) 
+                                       arp->dropf = xp;
+                               else
+                                       arp->dropl->list = xp;
+                       }
+
+                       cleanarpent(arp, a);
+               }
+               else
+                       break;
+       }
+       if(a == nil)
+               goto dodrops;
+
+
+       qunlock(arp);   /* for icmpns */
+       if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC) 
+               icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac); 
+
+       runlock(ifc);
+       qlock(arp);     
+
+       /* put to the end of re-transmit chain */
+       l = &arp->rxmt;
+       for(b = *l; b; b = b->nextrxt){
+               if(b == a){
+                       *l = a->nextrxt;
+                       break;
+               }
+               l = &b->nextrxt;
+       }
+       for(b = *l; b; b = b->nextrxt){
+               l = &b->nextrxt;
+       }
+       *l = a;
+       a->rxtsrem--;
+       a->nextrxt = nil;
+       a->rtime = NOW + ReTransTimer;
+
+       a = arp->rxmt;
+       if(a==nil)
+               nrxt = 0;
+       else 
+               nrxt = a->rtime - NOW;
+
+dodrops:
+       xp = arp->dropf;
+       arp->dropf = nil;
+       arp->dropl = nil;
+       qunlock(arp);
+
+       for(; xp; xp = next){
+               next = xp->list;
+               icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+       }
+
+       return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+       Arp *arp = (Arp *) v;
+       int x;
+
+       x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+       return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+       Arp *arp = v;
+       long wakeupat;
+
+       arp->rxmitp = up;
+       //print("arp rxmitproc started\n");
+       if(waserror()){
+               arp->rxmitp = 0;
+               pexit("hangup", 1);
+       }
+       for(;;){
+               wakeupat = rxmitsols(arp);
+               if(wakeupat == 0) 
+                       sleep(&arp->rxmtq, rxready, v); 
+               else if(wakeupat > ReTransTimer/4) 
+                       tsleep(&arp->rxmtq, return0, 0, wakeupat); 
+       }
+}
+
diff --git a/kern/net/bootp.c b/kern/net/bootp.c
new file mode 100644 (file)
index 0000000..b7d3fcd
--- /dev/null
@@ -0,0 +1,231 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static ulong   fsip;
+static ulong   auip;
+static ulong   gwip;
+static ulong   ipmask;
+static ulong   ipaddr;
+
+enum
+{
+       Bootrequest = 1,
+       Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+       /* udp.c oldheader */
+       uchar   raddr[IPaddrlen];
+       uchar   laddr[IPaddrlen];
+       uchar   rport[2];
+       uchar   lport[2];
+       /* bootp itself */
+       uchar   op;             /* opcode */
+       uchar   htype;          /* hardware type */
+       uchar   hlen;           /* hardware address len */
+       uchar   hops;           /* hops */
+       uchar   xid[4];         /* a random number */
+       uchar   secs[2];        /* elapsed snce client started booting */
+       uchar   pad[2];
+       uchar   ciaddr[4];      /* client IP address (client tells server) */
+       uchar   yiaddr[4];      /* client IP address (server tells client) */
+       uchar   siaddr[4];      /* server IP address */
+       uchar   giaddr[4];      /* gateway IP address */
+       uchar   chaddr[16];     /* client hardware address */
+       uchar   sname[64];      /* server host name (optional) */
+       uchar   file[128];      /* boot file name */
+       uchar   vend[128];      /* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static Bootp   req;
+static Proc*   rcvprocp;
+static int     recv;
+static int     done;
+static Rendez  bootpr;
+static char    rcvbuf[512+2*IPaddrlen+2*2];
+
+static void
+rcvbootp(void *a)
+{
+       int n, fd;
+       Bootp *rp;
+       char *field[4];
+       uchar ip[IPaddrlen];
+
+       if(waserror())
+               pexit("", 0);
+       rcvprocp = up;  /* store for postnote below */
+       fd = (int)a;
+       while(done == 0) {
+               n = kread(fd, rcvbuf, sizeof(rcvbuf));
+               if(n <= 0)
+                       break;
+               rp = (Bootp*)rcvbuf;
+               /* currently ignore udp's header */
+               if(memcmp(req.chaddr, rp->chaddr, 6) == 0
+               && rp->htype == 1 && rp->hlen == 6
+               && getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
+               && strncmp((char*)rp->vend, "p9  ", 4) == 0){
+                       if(ipaddr == 0)
+                               ipaddr = nhgetl(rp->yiaddr);
+                       if(ipmask == 0)
+                               ipmask = parseip(ip, field[0]);
+                       if(fsip == 0)
+                               fsip = parseip(ip, field[1]);
+                       if(auip == 0)
+                               auip = parseip(ip, field[2]);
+                       if(gwip == 0)
+                               gwip = parseip(ip, field[3]);
+                       break;
+               }
+       }
+       poperror();
+       rcvprocp = nil;
+
+       recv = 1;
+       wakeup(&bootpr);
+       pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+       int cfd, dfd, tries, n;
+       char ia[5+3*24], im[16], *av[3];
+       uchar nipaddr[4], ngwip[4], nipmask[4];
+       char dir[Maxpath];
+
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       cfd = kannounce("udp!*!68", dir);
+       if(cfd < 0)
+               return "bootp announce failed";
+       strcat(dir, "/data");
+       if(kwrite(cfd, "headers", 7) < 0){
+               kclose(cfd);
+               return "bootp ctl headers failed";
+       }
+       kwrite(cfd, "oldheaders", 10);
+       dfd = kopen(dir, ORDWR);
+       if(dfd < 0){
+               kclose(cfd);
+               return "bootp open data failed";
+       }
+       kclose(cfd);
+       
+
+       /* create request */
+       memset(&req, 0, sizeof(req));
+       ipmove(req.raddr, IPv4bcast);
+       hnputs(req.rport, 67);
+       req.op = Bootrequest;
+       req.htype = 1;                  /* ethernet (all we know) */
+       req.hlen = 6;                   /* ethernet (all we know) */
+
+       /* Hardware MAC address */
+       memmove(req.chaddr, ifc->mac, 6);
+       /* Fill in the local IP address if we know it */
+       ipv4local(ifc, req.ciaddr);
+       memset(req.file, 0, sizeof(req.file));
+       strcpy((char*)req.vend, "p9  ");
+
+       done = 0;
+       recv = 0;
+
+       kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+       /*
+        * broadcast bootp's till we get a reply,
+        * or fixed number of tries
+        */
+       tries = 0;
+       while(recv == 0) {
+               if(kwrite(dfd, &req, sizeof(req)) < 0)
+                       print("bootp: write: %s\n", commonerror());
+
+               tsleep(&bootpr, return0, 0, 1000);
+               if(++tries > 10) {
+                       print("bootp: timed out\n");
+                       break;
+               }
+       }
+       kclose(dfd);
+       done = 1;
+       if(rcvprocp != nil){
+               postnote(rcvprocp, 1, "timeout", 0);
+               rcvprocp = nil;
+       }
+
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcrem(ifc, av, 3);
+
+       hnputl(nipaddr, ipaddr);
+       sprint(ia, "%V", nipaddr);
+       hnputl(nipmask, ipmask);
+       sprint(im, "%V", nipmask);
+       av[1] = ia;
+       av[2] = im;
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       if(gwip != 0) {
+               hnputl(ngwip, gwip);
+               n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
+               routewrite(ifc->conv->p->f, nil, ia, n);
+       }
+       return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+       int n;
+       char *buf;
+       uchar a[4];
+
+       buf = smalloc(READSTR);
+       if(waserror()){
+               free(buf);
+               nexterror();
+       }
+       hnputl(a, fsip);
+       n = snprint(buf, READSTR, "fsip %15V\n", a);
+       hnputl(a, auip);
+       n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+       hnputl(a, gwip);
+       n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+       hnputl(a, ipmask);
+       n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+       hnputl(a, ipaddr);
+       snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+
+       len = readstr(offset, bp, len, buf);
+       poperror();
+       free(buf);
+       return len;
+}
+
+char*  (*bootp)(Ipifc*) = rbootp;
+int    (*bootpread)(char*, ulong, int) = rbootpread;
diff --git a/kern/net/compress.c b/kern/net/compress.c
new file mode 100644 (file)
index 0000000..0a7bd7a
--- /dev/null
@@ -0,0 +1,520 @@
+#include       "u.h"
+#include       "../port/lib.h"
+#include       "mem.h"
+#include       "dat.h"
+#include       "fns.h"
+#include       "../port/error.h"
+
+#include       "ip.h"
+#include       "ppp.h"
+
+typedef struct Iphdr   Iphdr;
+typedef struct Tcphdr  Tcphdr;
+typedef struct Ilhdr   Ilhdr;
+typedef struct Hdr     Hdr;
+typedef struct Tcpc    Tcpc;
+
+struct Iphdr
+{
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   length[2];      /* packet length */
+       uchar   id[2];          /* Identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   ttl;            /* Time to live */
+       uchar   proto;          /* Protocol */
+       uchar   cksum[2];       /* Header checksum */
+       ulong   src;            /* Ip source (byte ordering unimportant) */
+       ulong   dst;            /* Ip destination (byte ordering unimportant) */
+};
+
+struct Tcphdr
+{
+       ulong   ports;          /* defined as a ulong to make comparisons easier */
+       uchar   seq[4];
+       uchar   ack[4];
+       uchar   flag[2];
+       uchar   win[2];
+       uchar   cksum[2];
+       uchar   urg[2];
+};
+
+struct Ilhdr
+{
+       uchar   sum[2]; /* Checksum including header */
+       uchar   len[2]; /* Packet length */
+       uchar   type;           /* Packet type */
+       uchar   spec;           /* Special */
+       uchar   src[2]; /* Src port */
+       uchar   dst[2]; /* Dst port */
+       uchar   id[4];  /* Sequence id */
+       uchar   ack[4]; /* Acked sequence */
+};
+
+enum
+{
+       URG             = 0x20,         /* Data marked urgent */
+       ACK             = 0x10,         /* Aknowledge is valid */
+       PSH             = 0x08,         /* Whole data pipe is pushed */
+       RST             = 0x04,         /* Reset connection */
+       SYN             = 0x02,         /* Pkt. is synchronise */
+       FIN             = 0x01,         /* Start close down */
+
+       IP_DF           = 0x4000,       /* Don't fragment */
+
+       IP_TCPPROTO     = 6,
+       IP_ILPROTO      = 40,
+       IL_IPHDR        = 20,
+};
+
+struct Hdr
+{
+       uchar   buf[128];
+       Iphdr   *ip;
+       Tcphdr  *tcp;
+       int     len;
+};
+
+struct Tcpc
+{
+       uchar   lastrecv;
+       uchar   lastxmit;
+       uchar   basexmit;
+       uchar   err;
+       uchar   compressid;
+       Hdr     t[MAX_STATES];
+       Hdr     r[MAX_STATES];
+};
+
+enum
+{      /* flag bits for what changed in a packet */
+       NEW_U=(1<<0),   /* tcp only */
+       NEW_W=(1<<1),   /* tcp only */
+       NEW_A=(1<<2),   /* il tcp */
+       NEW_S=(1<<3),   /* tcp only */
+       NEW_P=(1<<4),   /* tcp only */
+       NEW_I=(1<<5),   /* il tcp */
+       NEW_C=(1<<6),   /* il tcp */
+       NEW_T=(1<<7),   /* il only */
+       TCP_PUSH_BIT    = 0x10,
+};
+
+/* reserved, special-case values of above for tcp */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U)          /* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U)    /* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+int
+encode(void *p, ulong n)
+{
+       uchar   *cp;
+
+       cp = p;
+       if(n >= 256 || n == 0) {
+               *cp++ = 0;
+               cp[0] = n >> 8;
+               cp[1] = n;
+               return 3;
+       } else 
+               *cp = n;
+       return 1;
+}
+
+#define DECODEL(f) { \
+       if (*cp == 0) {\
+               hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
+               cp += 3; \
+       } else { \
+               hnputl(f, nhgetl(f) + (ulong)*cp++); \
+       } \
+}
+#define DECODES(f) { \
+       if (*cp == 0) {\
+               hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
+               cp += 3; \
+       } else { \
+               hnputs(f, nhgets(f) + (ulong)*cp++); \
+       } \
+}
+
+ushort
+tcpcompress(Tcpc *comp, Block *b, Fs *)
+{
+       Iphdr   *ip;            /* current packet */
+       Tcphdr  *tcp;           /* current pkt */
+       ulong   iplen, tcplen, hlen;    /* header length in bytes */
+       ulong   deltaS, deltaA; /* general purpose temporaries */
+       ulong   changes;        /* change mask */
+       uchar   new_seq[16];    /* changes from last to current */
+       uchar   *cp;
+       Hdr     *h;             /* last packet */
+       int     i, j;
+
+       /*
+        * Bail if this is not a compressible TCP/IP packet
+        */
+       ip = (Iphdr*)b->rp;
+       iplen = (ip->vihl & 0xf) << 2;
+       tcp = (Tcphdr*)(b->rp + iplen);
+       tcplen = (tcp->flag[0] & 0xf0) >> 2;
+       hlen = iplen + tcplen;
+       if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
+               return Pip;     /* connection control */
+
+       /*
+        * Packet is compressible, look for a connection
+        */
+       changes = 0;
+       cp = new_seq;
+       j = comp->lastxmit;
+       h = &comp->t[j];
+       if(ip->src != h->ip->src || ip->dst != h->ip->dst
+       || tcp->ports != h->tcp->ports) {
+               for(i = 0; i < MAX_STATES; ++i) {
+                       j = (comp->basexmit + i) % MAX_STATES;
+                       h = &comp->t[j];
+                       if(ip->src == h->ip->src && ip->dst == h->ip->dst
+                       && tcp->ports == h->tcp->ports)
+                               goto found;
+               }
+
+               /* no connection, reuse the oldest */
+               if(i == MAX_STATES) {
+                       j = comp->basexmit;
+                       j = (j + MAX_STATES - 1) % MAX_STATES;
+                       comp->basexmit = j;
+                       h = &comp->t[j];
+                       goto raise;
+               }
+       }
+found:
+
+       /*
+        * Make sure that only what we expect to change changed. 
+        */
+       if(ip->vihl  != h->ip->vihl || ip->tos   != h->ip->tos ||
+          ip->ttl   != h->ip->ttl  || ip->proto != h->ip->proto)
+               goto raise;     /* headers changed */
+       if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
+               goto raise;     /* ip options changed */
+       if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
+               goto raise;     /* tcp options changed */
+
+       if(tcp->flag[1] & URG) {
+               cp += encode(cp, nhgets(tcp->urg));
+               changes |= NEW_U;
+       } else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
+               goto raise;
+       if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
+               cp += encode(cp, deltaS);
+               changes |= NEW_W;
+       }
+       if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
+               if(deltaA > 0xffff)
+                       goto raise;
+               cp += encode(cp, deltaA);
+               changes |= NEW_A;
+       }
+       if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
+               if (deltaS > 0xffff)
+                       goto raise;
+               cp += encode(cp, deltaS);
+               changes |= NEW_S;
+       }
+
+       /*
+        * Look for the special-case encodings.
+        */
+       switch(changes) {
+       case 0:
+               /*
+                * Nothing changed. If this packet contains data and the last
+                * one didn't, this is probably a data packet following an
+                * ack (normal on an interactive connection) and we send it
+                * compressed. Otherwise it's probably a retransmit,
+                * retransmitted ack or window probe.  Send it uncompressed
+                * in case the other side missed the compressed version.
+                */
+               if(nhgets(ip->length) == nhgets(h->ip->length) ||
+                  nhgets(h->ip->length) != hlen)
+                       goto raise;
+               break;
+       case SPECIAL_I:
+       case SPECIAL_D:
+               /*
+                * Actual changes match one of our special case encodings --
+                * send packet uncompressed.
+                */
+               goto raise;
+       case NEW_S | NEW_A:
+               if (deltaS == deltaA &&
+                       deltaS == nhgets(h->ip->length) - hlen) {
+                       /* special case for echoed terminal traffic */
+                       changes = SPECIAL_I;
+                       cp = new_seq;
+               }
+               break;
+       case NEW_S:
+               if (deltaS == nhgets(h->ip->length) - hlen) {
+                       /* special case for data xfer */
+                       changes = SPECIAL_D;
+                       cp = new_seq;
+               }
+               break;
+       }
+       deltaS = nhgets(ip->id) - nhgets(h->ip->id);
+       if(deltaS != 1) {
+               cp += encode(cp, deltaS);
+               changes |= NEW_I;
+       }
+       if (tcp->flag[1] & PSH)
+               changes |= TCP_PUSH_BIT;
+       /*
+        * Grab the cksum before we overwrite it below. Then update our
+        * state with this packet's header.
+        */
+       deltaA = nhgets(tcp->cksum);
+       memmove(h->buf, b->rp, hlen);
+       h->len = hlen;
+       h->tcp = (Tcphdr*)(h->buf + iplen);
+
+       /*
+        * We want to use the original packet as our compressed packet. (cp -
+        * new_seq) is the number of bytes we need for compressed sequence
+        * numbers. In addition we need one byte for the change mask, one
+        * for the connection id and two for the tcp checksum. So, (cp -
+        * new_seq) + 4 bytes of header are needed. hlen is how many bytes
+        * of the original packet to toss so subtract the two to get the new
+        * packet size. The temporaries are gross -egs.
+        */
+       deltaS = cp - new_seq;
+       cp = b->rp;
+       if(comp->lastxmit != j || comp->compressid == 0) {
+               comp->lastxmit = j;
+               hlen -= deltaS + 4;
+               cp += hlen;
+               *cp++ = (changes | NEW_C);
+               *cp++ = j;
+       } else {
+               hlen -= deltaS + 3;
+               cp += hlen;
+               *cp++ = changes;
+       }
+       b->rp += hlen;
+       hnputs(cp, deltaA);
+       cp += 2;
+       memmove(cp, new_seq, deltaS);
+       return Pvjctcp;
+
+raise:
+       /*
+        * Update connection state & send uncompressed packet
+        */
+       memmove(h->buf, b->rp, hlen);
+       h->tcp = (Tcphdr*)(h->buf + iplen);
+       h->len = hlen;
+       h->ip->proto = j;
+       comp->lastxmit = j;
+       return Pvjutcp;
+}
+
+Block*
+tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
+{
+       uchar   *cp, changes;
+       int     i;
+       int     iplen, len;
+       Iphdr   *ip;
+       Tcphdr  *tcp;
+       Hdr     *h;
+
+       if(type == Pvjutcp) {
+               /*
+                *  Locate the saved state for this connection. If the state
+                *  index is legal, clear the 'discard' flag.
+                */
+               ip = (Iphdr*)b->rp;
+               if(ip->proto >= MAX_STATES)
+                       goto raise;
+               iplen = (ip->vihl & 0xf) << 2;
+               tcp = (Tcphdr*)(b->rp + iplen);
+               comp->lastrecv = ip->proto;
+               len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
+               comp->err = 0;
+netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
+               /*
+                * Restore the IP protocol field then save a copy of this
+                * packet header. The checksum is zeroed in the copy so we
+                * don't have to zero it each time we process a compressed
+                * packet.
+                */
+               ip->proto = IP_TCPPROTO;
+               h = &comp->r[comp->lastrecv];
+               memmove(h->buf, b->rp, len);
+               h->tcp = (Tcphdr*)(h->buf + iplen);
+               h->len = len;
+               h->ip->cksum[0] = h->ip->cksum[1] = 0;
+               return b;
+       }
+
+       cp = b->rp;
+       changes = *cp++;
+       if(changes & NEW_C) {
+               /*
+                * Make sure the state index is in range, then grab the
+                * state. If we have a good state index, clear the 'discard'
+                * flag.
+                */
+               if(*cp >= MAX_STATES)
+                       goto raise;
+               comp->err = 0;
+               comp->lastrecv = *cp++;
+netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
+       } else {
+               /*
+                * This packet has no state index. If we've had a
+                * line error since the last time we got an explicit state
+                * index, we have to toss the packet.
+                */
+               if(comp->err != 0){
+                       freeblist(b);
+                       return nil;
+               }
+netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
+       }
+
+       /*
+        * Find the state then fill in the TCP checksum and PUSH bit.
+        */
+       h = &comp->r[comp->lastrecv];
+       ip = h->ip;
+       tcp = h->tcp;
+       len = h->len;
+       memmove(tcp->cksum, cp, sizeof tcp->cksum);
+       cp += 2;
+       if(changes & TCP_PUSH_BIT)
+               tcp->flag[1] |= PSH;
+       else
+               tcp->flag[1] &= ~PSH;
+       /*
+        * Fix up the state's ack, seq, urg and win fields based on the
+        * changemask.
+        */
+       switch (changes & SPECIALS_MASK) {
+       case SPECIAL_I:
+               i = nhgets(ip->length) - len;
+               hnputl(tcp->ack, nhgetl(tcp->ack) + i);
+               hnputl(tcp->seq, nhgetl(tcp->seq) + i);
+               break;
+
+       case SPECIAL_D:
+               hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
+               break;
+
+       default:
+               if(changes & NEW_U) {
+                       tcp->flag[1] |= URG;
+                       if(*cp == 0){
+                               hnputs(tcp->urg, nhgets(cp+1));
+                               cp += 3;
+                       }else
+                               hnputs(tcp->urg, *cp++);
+               } else
+                       tcp->flag[1] &= ~URG;
+               if(changes & NEW_W)
+                       DECODES(tcp->win)
+               if(changes & NEW_A)
+                       DECODEL(tcp->ack)
+               if(changes & NEW_S)
+                       DECODEL(tcp->seq)
+               break;
+       }
+
+       /* Update the IP ID */
+       if(changes & NEW_I)
+               DECODES(ip->id)
+       else
+               hnputs(ip->id, nhgets(ip->id) + 1);
+
+       /*
+        *  At this point, cp points to the first byte of data in the packet.
+        *  Back up cp by the TCP/IP header length to make room for the
+        *  reconstructed header.
+        *  We assume the packet we were handed has enough space to prepend
+        *  up to 128 bytes of header.
+        */
+       b->rp = cp;
+       if(b->rp - b->base < len){
+               b = padblock(b, len);
+               b = pullupblock(b, blocklen(b));
+       } else
+               b->rp -= len;
+       hnputs(ip->length, BLEN(b));
+       memmove(b->rp, ip, len);
+       
+       /* recompute the ip header checksum */
+       ip = (Iphdr*)b->rp;
+       hnputs(ip->cksum, ipcsum(b->rp));
+       return b;
+
+raise:
+       netlog(f, Logcompress, "Bad Packet!\n");
+       comp->err = 1;
+       freeblist(b);
+       return nil;
+}
+
+Tcpc*
+compress_init(Tcpc *c)
+{
+       int i;
+       Hdr *h;
+
+       if(c == nil){
+               c = malloc(sizeof(Tcpc));
+               if(c == nil)
+                       return nil;
+       }
+       memset(c, 0, sizeof(*c));
+       for(i = 0; i < MAX_STATES; i++){
+               h = &c->t[i];
+               h->ip = (Iphdr*)h->buf;
+               h->tcp = (Tcphdr*)(h->buf + 10);
+               h->len = 20;
+               h = &c->r[i];
+               h->ip = (Iphdr*)h->buf;
+               h->tcp = (Tcphdr*)(h->buf + 10);
+               h->len = 20;
+       }
+
+       return c;
+}
+
+ushort
+compress(Tcpc *tcp, Block *b, Fs *f)
+{
+       Iphdr           *ip;
+
+       /*
+        * Bail if this is not a compressible IP packet
+        */
+       ip = (Iphdr*)b->rp;
+       if((nhgets(ip->frag) & 0x3fff) != 0)
+               return Pip;
+
+       switch(ip->proto) {
+       case IP_TCPPROTO:
+               return tcpcompress(tcp, b, f);
+       default:
+               return Pip;
+       }
+}
+
+int
+compress_negotiate(Tcpc *tcp, uchar *data)
+{
+       if(data[0] != MAX_STATES - 1)
+               return -1;
+       tcp->compressid = data[1];
+       return 0;
+}
diff --git a/kern/net/devip.c b/kern/net/devip.c
new file mode 100644 (file)
index 0000000..bb37d34
--- /dev/null
@@ -0,0 +1,1417 @@
+#include       "u.h"
+#include       "../port/lib.h"
+#include       "mem.h"
+#include       "dat.h"
+#include       "fns.h"
+#include       "../port/error.h"
+#include       "../ip/ip.h"
+
+enum
+{
+       Qtopdir=        1,              /* top level directory */
+       Qtopbase,
+       Qarp=           Qtopbase,
+       Qbootp,
+       Qndb,
+       Qiproute,
+       Qiprouter,
+       Qipselftab,
+       Qlog,
+
+       Qprotodir,                      /* directory for a protocol */
+       Qprotobase,
+       Qclone=         Qprotobase,
+       Qstats,
+
+       Qconvdir,                       /* directory for a conversation */
+       Qconvbase,
+       Qctl=           Qconvbase,
+       Qdata,
+       Qerr,
+       Qlisten,
+       Qlocal,
+       Qremote,
+       Qstatus,
+       Qsnoop,
+
+       Logtype=        5,
+       Masktype=       (1<<Logtype)-1,
+       Logconv=        12,
+       Maskconv=       (1<<Logconv)-1,
+       Shiftconv=      Logtype,
+       Logproto=       8,
+       Maskproto=      (1<<Logproto)-1,
+       Shiftproto=     Logtype + Logconv,
+
+       Nfs=            32,
+};
+#define TYPE(x)        ( ((ulong)(x).path) & Masktype )
+#define CONV(x)        ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x)       ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y)   ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock  fslock;
+Fs     *ipfs[Nfs];     /* attached fs's */
+Queue  *qlog;
+
+extern void nullmediumlink(void);
+extern void pktmediumlink(void);
+static long ndbwrite(Fs*, char*, ulong, int);
+static void    closeconv(Conv*);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+       Qid q;
+       Conv *cv;
+       char *p;
+
+       cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+       if(cv->owner == nil)
+               kstrdup(&cv->owner, eve);
+       mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+       switch(i) {
+       default:
+               return -1;
+       case Qctl:
+               devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+               return 1;
+       case Qdata:
+               devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+               return 1;
+       case Qerr:
+               devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+               return 1;
+       case Qlisten:
+               devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+               return 1;
+       case Qlocal:
+               p = "local";
+               break;
+       case Qremote:
+               p = "remote";
+               break;
+       case Qsnoop:
+               if(strcmp(cv->p->name, "ipifc") != 0)
+                       return -1;
+               devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+               return 1;
+       case Qstatus:
+               p = "status";
+               break;
+       }
+       devdir(c, q, p, 0, cv->owner, 0444, dp);
+       return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+       Qid q;
+
+       switch(i) {
+       case Qclone:
+               mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+               devdir(c, q, "clone", 0, network, 0666, dp);
+               return 1;
+       case Qstats:
+               mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+               devdir(c, q, "stats", 0, network, 0444, dp);
+               return 1;
+       }       
+       return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+       Qid q;
+       char *p;
+       int prot;
+       int len = 0;
+       Fs *f;
+       extern ulong    kerndate;
+
+       f = ipfs[c->dev];
+
+       prot = 0666;
+       mkqid(&q, QID(0, 0, i), 0, QTFILE);
+       switch(i) {
+       default:
+               return -1;
+       case Qarp:
+               p = "arp";
+               break;
+       case Qbootp:
+               p = "bootp";
+               if(bootp == nil)
+                       return 0;
+               break;
+       case Qndb:
+               p = "ndb";
+               len = strlen(f->ndb);
+               q.vers = f->ndbvers;
+               break;
+       case Qiproute:
+               p = "iproute";
+               break;
+       case Qipselftab:
+               p = "ipselftab";
+               prot = 0444;
+               break;
+       case Qiprouter:
+               p = "iprouter";
+               break;
+       case Qlog:
+               p = "log";
+               break;
+       }
+       devdir(c, q, p, len, network, prot, dp);
+       if(i == Qndb && f->ndbmtime > kerndate)
+               dp->mtime = f->ndbmtime;
+       return 1;
+}
+
+static int
+ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp)
+{
+       Qid q;
+       Conv *cv;
+       Fs *f;
+
+       f = ipfs[c->dev];
+
+       switch(TYPE(c->qid)) {
+       case Qtopdir:
+               if(s == DEVDOTDOT){
+                       mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+                       sprint(up->genbuf, "#I%lud", c->dev);
+                       devdir(c, q, up->genbuf, 0, network, 0555, dp);
+                       return 1;
+               }
+               if(s < f->np) {
+                       if(f->p[s]->connect == nil)
+                               return 0;       /* protocol with no user interface */
+                       mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+                       devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+                       return 1;
+               }
+               s -= f->np;
+               return ip1gen(c, s+Qtopbase, dp);
+       case Qarp:
+       case Qbootp:
+       case Qndb:
+       case Qlog:
+       case Qiproute:
+       case Qiprouter:
+       case Qipselftab:
+               return ip1gen(c, TYPE(c->qid), dp);
+       case Qprotodir:
+               if(s == DEVDOTDOT){
+                       mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+                       sprint(up->genbuf, "#I%lud", c->dev);
+                       devdir(c, q, up->genbuf, 0, network, 0555, dp);
+                       return 1;
+               }
+               if(s < f->p[PROTO(c->qid)]->ac) {
+                       cv = f->p[PROTO(c->qid)]->conv[s];
+                       sprint(up->genbuf, "%d", s);
+                       mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+                       devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+                       return 1;
+               }
+               s -= f->p[PROTO(c->qid)]->ac;
+               return ip2gen(c, s+Qprotobase, dp);
+       case Qclone:
+       case Qstats:
+               return ip2gen(c, TYPE(c->qid), dp);
+       case Qconvdir:
+               if(s == DEVDOTDOT){
+                       s = PROTO(c->qid);
+                       mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+                       devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+                       return 1;
+               }
+               return ip3gen(c, s+Qconvbase, dp);
+       case Qctl:
+       case Qdata:
+       case Qerr:
+       case Qlisten:
+       case Qlocal:
+       case Qremote:
+       case Qstatus:
+       case Qsnoop:
+               return ip3gen(c, TYPE(c->qid), dp);
+       }
+       return -1;
+}
+
+static void
+ipreset(void)
+{
+       nullmediumlink();
+       pktmediumlink();
+
+       fmtinstall('i', eipfmt);
+       fmtinstall('I', eipfmt);
+       fmtinstall('E', eipfmt);
+       fmtinstall('V', eipfmt);
+       fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+       extern void (*ipprotoinit[])(Fs*);
+       Fs *f;
+       int i;
+
+       if(dev >= Nfs)
+               return nil;
+
+       qlock(&fslock);
+       if(ipfs[dev] == nil){
+               f = smalloc(sizeof(Fs));
+               ip_init(f);
+               arpinit(f);
+               netloginit(f);
+               for(i = 0; ipprotoinit[i]; i++)
+                       ipprotoinit[i](f);
+               f->dev = dev;
+               ipfs[dev] = f;
+       }
+       qunlock(&fslock);
+
+       return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+       IPaux *a;
+       int n;
+
+       a = smalloc(sizeof(*a));
+       kstrdup(&a->owner, owner);
+       memset(a->tag, ' ', sizeof(a->tag));
+       n = strlen(tag);
+       if(n > sizeof(a->tag))
+               n = sizeof(a->tag);
+       memmove(a->tag, tag, n);
+       return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+       Chan *c;
+       int dev;
+
+       dev = atoi(spec);
+       if(dev >= Nfs)
+               error("bad specification");
+
+       ipgetfs(dev);
+       c = devattach('I', spec);
+       mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+       c->dev = dev;
+
+       c->aux = newipaux(commonuser(), "none");
+
+       return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
+{
+       IPaux *a = c->aux;
+       Walkqid* w;
+
+       w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+       if(w != nil && w->clone != nil)
+               w->clone->aux = newipaux(a->owner, a->tag);
+       return w;
+}
+
+static int
+ipstat(Chan* c, uchar* db, int n)
+{
+       return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+       Conv *conv;
+
+       conv = arg;
+       return conv->incall != nil;
+}
+
+static int m2p[] = {
+       [OREAD]         4,
+       [OWRITE]        2,
+       [ORDWR]         6
+};
+
+static Chan*
+ipopen(Chan* c, int omode)
+{
+       Conv *cv, *nc;
+       Proto *p;
+       int perm;
+       Fs *f;
+
+       perm = m2p[omode&3];
+
+       f = ipfs[c->dev];
+
+       switch(TYPE(c->qid)) {
+       default:
+               break;
+       case Qndb:
+               if(omode & (OWRITE|OTRUNC) && !iseve())
+                       error(Eperm);
+               if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+                       f->ndb[0] = 0;
+               break;
+       case Qlog:
+               netlogopen(f);
+               break;
+       case Qiprouter:
+               iprouteropen(f);
+               break;
+       case Qiproute:
+               break;
+       case Qtopdir:
+       case Qprotodir:
+       case Qconvdir:
+       case Qstatus:
+       case Qremote:
+       case Qlocal:
+       case Qstats:
+       case Qbootp:
+       case Qipselftab:
+               if(omode != OREAD)
+                       error(Eperm);
+               break;
+       case Qsnoop:
+               if(omode != OREAD)
+                       error(Eperm);
+               p = f->p[PROTO(c->qid)];
+               cv = p->conv[CONV(c->qid)];
+               if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+                       error(Eperm);
+               incref(&cv->snoopers);
+               break;
+       case Qclone:
+               p = f->p[PROTO(c->qid)];
+               qlock(p);
+               if(waserror()){
+                       qunlock(p);
+                       nexterror();
+               }
+               cv = Fsprotoclone(p, ATTACHER(c));
+               qunlock(p);
+               poperror();
+               if(cv == nil) {
+                       error(Enodev);
+                       break;
+               }
+               mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+               break;
+       case Qdata:
+       case Qctl:
+       case Qerr:
+               p = f->p[PROTO(c->qid)];
+               qlock(p);
+               cv = p->conv[CONV(c->qid)];
+               qlock(cv);
+               if(waserror()) {
+                       qunlock(cv);
+                       qunlock(p);
+                       nexterror();
+               }
+               if((perm & (cv->perm>>6)) != perm) {
+                       if(strcmp(ATTACHER(c), cv->owner) != 0)
+                               error(Eperm);
+                       if((perm & cv->perm) != perm)
+                               error(Eperm); 
+
+               }
+               cv->inuse++;
+               if(cv->inuse == 1){
+                       kstrdup(&cv->owner, ATTACHER(c));
+                       cv->perm = 0660;
+               }
+               qunlock(cv);
+               qunlock(p);
+               poperror();
+               break;
+       case Qlisten:
+               cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+               if((perm & (cv->perm>>6)) != perm) {
+                       if(strcmp(ATTACHER(c), cv->owner) != 0)
+                               error(Eperm);
+                       if((perm & cv->perm) != perm)
+                               error(Eperm); 
+
+               }
+
+               if(cv->state != Announced)
+                       error("not announced");
+
+               if(waserror()){
+                       closeconv(cv);
+                       nexterror();
+               }
+               qlock(cv);
+               cv->inuse++;
+               qunlock(cv);
+
+               nc = nil;
+               while(nc == nil) {
+                       /* give up if we got a hangup */
+                       if(qisclosed(cv->rq))
+                               error("listen hungup");
+
+                       qlock(&cv->listenq);
+                       if(waserror()) {
+                               qunlock(&cv->listenq);
+                               nexterror();
+                       }
+
+                       /* wait for a connect */
+                       sleep(&cv->listenr, incoming, cv);
+
+                       qlock(cv);
+                       nc = cv->incall;
+                       if(nc != nil){
+                               cv->incall = nc->next;
+                               mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+                               kstrdup(&cv->owner, ATTACHER(c));
+                       }
+                       qunlock(cv);
+
+                       qunlock(&cv->listenq);
+                       poperror();
+               }
+               closeconv(cv);
+               poperror();
+               break;
+       }
+       c->mode = openmode(omode);
+       c->flag |= COPEN;
+       c->offset = 0;
+       return c;
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+       Dir *d;
+       Conv *cv;
+       Fs *f;
+       Proto *p;
+
+       f = ipfs[c->dev];
+       switch(TYPE(c->qid)) {
+       default:
+               error(Eperm);
+               break;
+       case Qctl:
+       case Qdata:
+               break;
+       }
+
+       d = smalloc(sizeof(*d)+n);
+       if(waserror()){
+               free(d);
+               nexterror();
+       }
+       n = convM2D(dp, n, d, (char*)&d[1]);
+       if(n == 0)
+               error(Eshortstat);
+       p = f->p[PROTO(c->qid)];
+       cv = p->conv[CONV(c->qid)];
+       if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+               error(Eperm);
+       if(!emptystr(d->uid))
+               kstrdup(&cv->owner, d->uid);
+       if(d->mode != ~0UL)
+               cv->perm = d->mode & 0777;
+       poperror();
+       free(d);
+       return n;
+}
+
+static void
+closeconv(Conv *cv)
+{
+       Conv *nc;
+       Ipmulti *mp;
+
+       qlock(cv);
+
+       if(--cv->inuse > 0) {
+               qunlock(cv);
+               return;
+       }
+
+       /* close all incoming calls since no listen will ever happen */
+       for(nc = cv->incall; nc; nc = cv->incall){
+               cv->incall = nc->next;
+               closeconv(nc);
+       }
+       cv->incall = nil;
+
+       kstrdup(&cv->owner, network);
+       cv->perm = 0660;
+
+       while((mp = cv->multi) != nil)
+               ipifcremmulti(cv, mp->ma, mp->ia);
+
+       cv->r = nil;
+       cv->rgen = 0;
+       cv->p->close(cv);
+       cv->state = Idle;
+       qunlock(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+       Fs *f;
+
+       f = ipfs[c->dev];
+       switch(TYPE(c->qid)) {
+       default:
+               break;
+       case Qlog:
+               if(c->flag & COPEN)
+                       netlogclose(f);
+               break;
+       case Qiprouter:
+               if(c->flag & COPEN)
+                       iprouterclose(f);
+               break;
+       case Qdata:
+       case Qctl:
+       case Qerr:
+               if(c->flag & COPEN)
+                       closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+               break;
+       case Qsnoop:
+               if(c->flag & COPEN)
+                       decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+               break;
+       }
+       free(((IPaux*)c->aux)->owner);
+       free(c->aux);
+}
+
+enum
+{
+       Statelen=       32*1024,
+};
+
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
+{
+       Conv *c;
+       Proto *x;
+       char *buf, *p;
+       long rv;
+       Fs *f;
+       ulong offset = off;
+
+       f = ipfs[ch->dev];
+
+       p = a;
+       switch(TYPE(ch->qid)) {
+       default:
+               error(Eperm);
+       case Qtopdir:
+       case Qprotodir:
+       case Qconvdir:
+               return devdirread(ch, a, n, 0, 0, ipgen);
+       case Qarp:
+               return arpread(f->arp, a, offset, n);
+       case Qbootp:
+               return bootpread(a, offset, n);
+       case Qndb:
+               return readstr(offset, a, n, f->ndb);
+       case Qiproute:
+               return routeread(f, a, offset, n);
+       case Qiprouter:
+               return iprouterread(f, a, n);
+       case Qipselftab:
+               return ipselftabread(f, a, offset, n);
+       case Qlog:
+               return netlogread(f, a, offset, n);
+       case Qctl:
+               sprint(up->genbuf, "%lud", CONV(ch->qid));
+               return readstr(offset, p, n, up->genbuf);
+       case Qremote:
+               buf = smalloc(Statelen);
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+               if(x->remote == nil) {
+                       sprint(buf, "%I!%d\n", c->raddr, c->rport);
+               } else {
+                       (*x->remote)(c, buf, Statelen-2);
+               }
+               rv = readstr(offset, p, n, buf);
+               free(buf);
+               return rv;
+       case Qlocal:
+               buf = smalloc(Statelen);
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+               if(x->local == nil) {
+                       sprint(buf, "%I!%d\n", c->laddr, c->lport);
+               } else {
+                       (*x->local)(c, buf, Statelen-2);
+               }
+               rv = readstr(offset, p, n, buf);
+               free(buf);
+               return rv;
+       case Qstatus:
+               buf = smalloc(Statelen);
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+               (*x->state)(c, buf, Statelen-2);
+               rv = readstr(offset, p, n, buf);
+               free(buf);
+               return rv;
+       case Qdata:
+               c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+               return qread(c->rq, a, n);
+       case Qerr:
+               c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+               return qread(c->eq, a, n);
+       case Qsnoop:
+               c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+               return qread(c->sq, a, n);
+       case Qstats:
+               x = f->p[PROTO(ch->qid)];
+               if(x->stats == nil)
+                       error("stats not implemented");
+               buf = smalloc(Statelen);
+               (*x->stats)(x, buf, Statelen);
+               rv = readstr(offset, p, n, buf);
+               free(buf);
+               return rv;
+       }
+}
+
+static Block*
+ipbread(Chan* ch, long n, ulong offset)
+{
+       Conv *c;
+       Proto *x;
+       Fs *f;
+
+       switch(TYPE(ch->qid)){
+       case Qdata:
+               f = ipfs[ch->dev];
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+               return qbread(c->rq, n);
+       default:
+               return devbread(ch, n, offset);
+       }
+}
+
+/*
+ *  set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+       findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+static char*
+setluniqueport(Conv* c, int lport)
+{
+       Proto *p;
+       Conv *xp;
+       int x;
+
+       p = c->p;
+
+       qlock(p);
+       for(x = 0; x < p->nc; x++){
+               xp = p->conv[x];
+               if(xp == nil)
+                       break;
+               if(xp == c)
+                       continue;
+               if((xp->state == Connected || xp->state == Announced)
+               && xp->lport == lport
+               && xp->rport == c->rport
+               && ipcmp(xp->raddr, c->raddr) == 0
+               && ipcmp(xp->laddr, c->laddr) == 0){
+                       qunlock(p);
+                       return "address in use";
+               }
+       }
+       c->lport = lport;
+       qunlock(p);
+       return nil;
+}
+
+/*
+ *  pick a local port and set it
+ */
+static void
+setlport(Conv* c)
+{
+       Proto *p;
+       ushort *pp;
+       int x, found;
+
+       p = c->p;
+       if(c->restricted)
+               pp = &p->nextrport;
+       else
+               pp = &p->nextport;
+       qlock(p);
+       for(;;(*pp)++){
+               /*
+                * Fsproto initialises p->nextport to 0 and the restricted
+                * ports (p->nextrport) to 600.
+                * Restricted ports must lie between 600 and 1024.
+                * For the initial condition or if the unrestricted port number
+                * has wrapped round, select a random port between 5000 and 1<<15
+                * to start at.
+                */
+               if(c->restricted){
+                       if(*pp >= 1024)
+                               *pp = 600;
+               }
+               else while(*pp < 5000)
+                       *pp = nrand(1<<15);
+
+               found = 0;
+               for(x = 0; x < p->nc; x++){
+                       if(p->conv[x] == nil)
+                               break;
+                       if(p->conv[x]->lport == *pp){
+                               found = 1;
+                               break;
+                       }
+               }
+               if(!found)
+                       break;
+       }
+       c->lport = (*pp)++;
+       qunlock(p);
+}
+
+/*
+ *  set a local address and port from a string of the form
+ *     [address!]port[!r]
+ */
+static char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+       char *p;
+       char *rv;
+       ushort lport;
+       uchar addr[IPaddrlen];
+
+       rv = nil;
+
+       /*
+        *  ignore restricted part if it exists.  it's
+        *  meaningless on local ports.
+        */
+       p = strchr(str, '!');
+       if(p != nil){
+               *p++ = 0;
+               if(strcmp(p, "r") == 0)
+                       p = nil;
+       }
+
+       c->lport = 0;
+       if(p == nil){
+               if(announcing)
+                       ipmove(c->laddr, IPnoaddr);
+               else
+                       setladdr(c);
+               p = str;
+       } else {
+               if(strcmp(str, "*") == 0)
+                       ipmove(c->laddr, IPnoaddr);
+               else {
+                       parseip(addr, str);
+                       if(ipforme(c->p->f, addr))
+                               ipmove(c->laddr, addr);
+                       else
+                               return "not a local IP address";
+               }
+       }
+
+       /* one process can get all connections */
+       if(announcing && strcmp(p, "*") == 0){
+               if(!iseve())
+                       error(Eperm);
+               return setluniqueport(c, 0);
+       }
+
+       lport = atoi(p);
+       if(lport <= 0)
+               setlport(c);
+       else
+               rv = setluniqueport(c, lport);
+       return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+       char *p;
+
+       p = strchr(str, '!');
+       if(p == nil)
+               return "malformed address";
+       *p++ = 0;
+       parseip(c->raddr, str);
+       c->rport = atoi(p);
+       p = strchr(p, '!');
+       if(p){
+               if(strstr(p, "!r") != nil)
+                       c->restricted = 1;
+       }
+       return nil;
+}
+
+/*
+ *  called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+       char *p;
+
+       switch(argc) {
+       default:
+               return "bad args to connect";
+       case 2:
+               p = setraddrport(c, argv[1]);
+               if(p != nil)
+                       return p;
+               setladdr(c);
+               setlport(c);
+               break;
+       case 3:
+               p = setraddrport(c, argv[1]);
+               if(p != nil)
+                       return p;
+               p = setladdrport(c, argv[2], 0);
+               if(p != nil)
+                       return p;
+       }
+
+       if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+               memcmp(c->laddr, v4prefix, IPv4off) == 0)
+               || ipcmp(c->raddr, IPnoaddr) == 0)
+               c->ipversion = V4;
+       else
+               c->ipversion = V6;
+
+       return nil;
+}
+/*
+ *  initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+       return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+       char *p;
+
+       if(c->state != 0)
+               error(Econinuse);
+       c->state = Connecting;
+       c->cerr[0] = '\0';
+       if(x->connect == nil)
+               error("connect not supported");
+       p = x->connect(c, cb->f, cb->nf);
+       if(p != nil)
+               error(p);
+
+       qunlock(c);
+       if(waserror()){
+               qlock(c);
+               nexterror();
+       }
+       sleep(&c->cr, connected, c);
+       qlock(c);
+       poperror();
+
+       if(c->cerr[0] != '\0')
+               error(c->cerr);
+}
+
+/*
+ *  called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+       memset(c->raddr, 0, sizeof(c->raddr));
+       c->rport = 0;
+       switch(argc){
+       default:
+               return "bad args to announce";
+       case 2:
+               return setladdrport(c, argv[1], 1);
+       }
+}
+
+/*
+ *  initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+       return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+       char *p;
+
+       if(c->state != 0)
+               error(Econinuse);
+       c->state = Announcing;
+       c->cerr[0] = '\0';
+       if(x->announce == nil)
+               error("announce not supported");
+       p = x->announce(c, cb->f, cb->nf);
+       if(p != nil)
+               error(p);
+
+       qunlock(c);
+       if(waserror()){
+               qlock(c);
+               nexterror();
+       }
+       sleep(&c->cr, announced, c);
+       qlock(c);
+       poperror();
+
+       if(c->cerr[0] != '\0')
+               error(c->cerr);
+}
+
+/*
+ *  called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+       switch(argc){
+       default:
+               return "bad args to bind";
+       case 2:
+               return setladdrport(c, argv[1], 0);
+       }
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+       char *p;
+
+       if(x->bind == nil)
+               p = Fsstdbind(c, cb->f, cb->nf);
+       else
+               p = x->bind(c, cb->f, cb->nf);
+       if(p != nil)
+               error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+       if(cb->nf < 2)
+               c->tos = 0;
+       else
+               c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+       if(cb->nf < 2)
+               c->ttl = MAXTTL;
+       else
+               c->ttl = atoi(cb->f[1]);
+}
+
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
+{
+       Conv *c;
+       Proto *x;
+       char *p;
+       Cmdbuf *cb;
+       uchar ia[IPaddrlen], ma[IPaddrlen];
+       Fs *f;
+       char *a;
+
+       a = v;
+       f = ipfs[ch->dev];
+
+       switch(TYPE(ch->qid)){
+       default:
+               error(Eperm);
+       case Qdata:
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+
+               if(c->wq == nil)
+                       error(Eperm);
+
+               qwrite(c->wq, a, n);
+               break;
+       case Qarp:
+               return arpwrite(f, a, n);
+       case Qiproute:
+               return routewrite(f, ch, a, n);
+       case Qlog:
+               netlogctl(f, a, n);
+               return n;
+       case Qndb:
+               return ndbwrite(f, a, off, n);
+       case Qctl:
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+               cb = parsecmd(a, n);
+
+               qlock(c);
+               if(waserror()) {
+                       qunlock(c);
+                       free(cb);
+                       nexterror();
+               }
+               if(cb->nf < 1)
+                       error("short control request");
+               if(strcmp(cb->f[0], "connect") == 0)
+                       connectctlmsg(x, c, cb);
+               else if(strcmp(cb->f[0], "announce") == 0)
+                       announcectlmsg(x, c, cb);
+               else if(strcmp(cb->f[0], "bind") == 0)
+                       bindctlmsg(x, c, cb);
+               else if(strcmp(cb->f[0], "ttl") == 0)
+                       ttlctlmsg(c, cb);
+               else if(strcmp(cb->f[0], "tos") == 0)
+                       tosctlmsg(c, cb);
+               else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+                       c->ignoreadvice = 1;
+               else if(strcmp(cb->f[0], "addmulti") == 0){
+                       if(cb->nf < 2)
+                               error("addmulti needs interface address");
+                       if(cb->nf == 2){
+                               if(!ipismulticast(c->raddr))
+                                       error("addmulti for a non multicast address");
+                               parseip(ia, cb->f[1]);
+                               ipifcaddmulti(c, c->raddr, ia);
+                       } else {
+                               parseip(ma, cb->f[2]);
+                               if(!ipismulticast(ma))
+                                       error("addmulti for a non multicast address");
+                               parseip(ia, cb->f[1]);
+                               ipifcaddmulti(c, ma, ia);
+                       }
+               } else if(strcmp(cb->f[0], "remmulti") == 0){
+                       if(cb->nf < 2)
+                               error("remmulti needs interface address");
+                       if(!ipismulticast(c->raddr))
+                               error("remmulti for a non multicast address");
+                       parseip(ia, cb->f[1]);
+                       ipifcremmulti(c, c->raddr, ia);
+               } else if(x->ctl != nil) {
+                       p = x->ctl(c, cb->f, cb->nf);
+                       if(p != nil)
+                               error(p);
+               } else
+                       error("unknown control request");
+               qunlock(c);
+               free(cb);
+               poperror();
+       }
+       return n;
+}
+
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
+{
+       Conv *c;
+       Proto *x;
+       Fs *f;
+       int n;
+
+       switch(TYPE(ch->qid)){
+       case Qdata:
+               f = ipfs[ch->dev];
+               x = f->p[PROTO(ch->qid)];
+               c = x->conv[CONV(ch->qid)];
+
+               if(c->wq == nil)
+                       error(Eperm);
+
+               if(bp->next)
+                       bp = concatblock(bp);
+               n = BLEN(bp);
+               qbwrite(c->wq, bp);
+               return n;
+       default:
+               return devbwrite(ch, bp, offset);
+       }
+}
+
+Dev ipdevtab = {
+       'I',
+       "ip",
+
+       ipreset,
+       devinit,
+       devshutdown,
+       ipattach,
+       ipwalk,
+       ipstat,
+       ipopen,
+       devcreate,
+       ipclose,
+       ipread,
+       ipbread,
+       ipwrite,
+       ipbwrite,
+       devremove,
+       ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+       if(f->np >= Maxproto)
+               return -1;
+
+       p->f = f;
+
+       if(p->ipproto > 0){
+               if(f->t2p[p->ipproto] != nil)
+                       return -1;
+               f->t2p[p->ipproto] = p;
+       }
+
+       p->qid.type = QTDIR;
+       p->qid.path = QID(f->np, 0, Qprotodir);
+       p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+       if(p->conv == nil)
+               panic("Fsproto");
+
+       p->x = f->np;
+       p->nextport = 0;
+       p->nextrport = 600;
+       f->p[f->np++] = p;
+
+       return 0;
+}
+
+/*
+ *  return true if this protocol is
+ *  built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+       return f->t2p[proto] != nil;
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+       Conv *c, **pp, **ep;
+
+retry:
+       c = nil;
+       ep = &p->conv[p->nc];
+       for(pp = p->conv; pp < ep; pp++) {
+               c = *pp;
+               if(c == nil){
+                       c = malloc(sizeof(Conv));
+                       if(c == nil)
+                               error(Enomem);
+                       qlock(c);
+                       c->p = p;
+                       c->x = pp - p->conv;
+                       if(p->ptclsize != 0){
+                               c->ptcl = malloc(p->ptclsize);
+                               if(c->ptcl == nil) {
+                                       free(c);
+                                       error(Enomem);
+                               }
+                       }
+                       *pp = c;
+                       p->ac++;
+                       c->eq = qopen(1024, Qmsg, 0, 0);
+                       (*p->create)(c);
+                       break;
+               }
+               if(canqlock(c)){
+                       /*
+                        *  make sure both processes and protocol
+                        *  are done with this Conv
+                        */
+                       if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+                               break;
+
+                       qunlock(c);
+               }
+       }
+       if(pp >= ep) {
+               if(p->gc != nil && (*p->gc)(p))
+                       goto retry;
+               return nil;
+       }
+
+       c->inuse = 1;
+       kstrdup(&c->owner, user);
+       c->perm = 0660;
+       c->state = Idle;
+       ipmove(c->laddr, IPnoaddr);
+       ipmove(c->raddr, IPnoaddr);
+       c->r = nil;
+       c->rgen = 0;
+       c->lport = 0;
+       c->rport = 0;
+       c->restricted = 0;
+       c->ttl = MAXTTL;
+       c->tos = DFLTTOS;
+       qreopen(c->rq);
+       qreopen(c->wq);
+       qreopen(c->eq);
+
+       qunlock(c);
+       return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+       if(msg != nil && *msg != '\0')
+               kstrcpy(c->cerr, msg, sizeof(c->cerr));
+
+       switch(c->state){
+
+       case Announcing:
+               c->state = Announced;
+               break;
+
+       case Connecting:
+               c->state = Connected;
+               break;
+       }
+
+       wakeup(&c->cr);
+       return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+       if(f->ipmux)
+               return f->ipmux;
+       else
+               return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+       return f->t2p[proto];
+}
+
+/*
+ *  called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+       Conv *nc;
+       Conv **l;
+       int i;
+
+       qlock(c);
+       i = 0;
+       for(l = &c->incall; *l; l = &(*l)->next)
+               i++;
+       if(i >= Maxincall) {
+               qunlock(c);
+               return nil;
+       }
+
+       /* find a free conversation */
+       nc = Fsprotoclone(c->p, network);
+       if(nc == nil) {
+               qunlock(c);
+               return nil;
+       }
+       ipmove(nc->raddr, raddr);
+       nc->rport = rport;
+       ipmove(nc->laddr, laddr);
+       nc->lport = lport;
+       nc->next = nil;
+       *l = nc;
+       nc->state = Connected;
+       nc->ipversion = version;
+
+       qunlock(c);
+
+       wakeup(&c->listenr);
+
+       return nc;
+}
+
+static long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+       if(off > strlen(f->ndb))
+               error(Eio);
+       if(off+n >= sizeof(f->ndb)-1)
+               error(Eio);
+       memmove(f->ndb+off, a, n);
+       f->ndb[off+n] = 0;
+       f->ndbvers++;
+       f->ndbmtime = seconds();
+       return n;
+}
+
+ulong
+scalednconv(void)
+{
+       if(conf.npage*BY2PG >= 128*MB)
+               return Nchans*4;
+       return Nchans;
+}
diff --git a/kern/net/dhcp.c b/kern/net/dhcp.c
new file mode 100644 (file)
index 0000000..639e51b
--- /dev/null
@@ -0,0 +1,447 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+#include "ppp.h"
+
+Ipaddr pppdns[2];
+
+static ulong   fsip;
+static ulong   auip;
+static ulong   gwip;
+static ulong   ipmask;
+static ulong   ipaddr;
+static ulong   dns1ip;
+static ulong   dns2ip;
+
+int            dhcpmsgtype;
+int            debug=0;
+enum
+{
+       Bootrequest = 1,
+       Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+       /* udp.c oldheader */
+       uchar   raddr[IPaddrlen];
+       uchar   laddr[IPaddrlen];
+       uchar   rport[2];
+       uchar   lport[2];
+       /* bootp itself */
+       uchar   op;                     /* opcode */
+       uchar   htype;          /* hardware type */
+       uchar   hlen;                   /* hardware address len */
+       uchar   hops;           /* hops */
+       uchar   xid[4];         /* a random number */
+       uchar   secs[2];                /* elapsed snce client started booting */
+       uchar   flags[2];               /* flags */
+       uchar   ciaddr[4];              /* client IP address (client tells server) */
+       uchar   yiaddr[4];              /* client IP address (server tells client) */
+       uchar   siaddr[4];              /* server IP address */
+       uchar   giaddr[4];              /* gateway IP address */
+       uchar   chaddr[16];     /* client hardware address */
+       uchar   sname[64];      /* server host name (optional) */
+       uchar   file[128];              /* boot file name */
+       uchar   vend[128];      /* vendor-specific goo 340 */
+} Bootp;
+
+static Bootp   req;
+static Proc*   rcvprocp;
+static int     recv;
+static int     done;
+static Rendez  bootpr;
+static char    rcvbuf[512+2*IPaddrlen+2*2];      /* 576 */
+static uchar sid[4];
+static ulong iplease;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dns1ip      d.d.d.d
+ * dns2ip      d.d.d.d
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+       Last change:  SUN  13 Sep 2001    4:36 pm
+ */
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno 
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will being with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static int
+parsevend(uchar* pvend)
+{      
+       uchar *vend=pvend;
+       int dhcpmsg=0;
+       /* The field must start with 99.130.83.99 to be compliant */
+       if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
+               print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+               return -1;
+       }
+
+       /* Skip over the magic cookie */
+       vend += 4;
+
+       while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+               int i;
+//     
+               if(debug){
+                       print(">>>Opt[%d] [%d]", vend[0], vend[1]);
+                       for(i=0; i<vend[1]; i++)
+                               print(" %2.2x", vend[i+2]);
+                       print("\n");
+               }
+//
+               switch (vend[0]) {
+               case 1: /* Subnet mask field */
+                       /* There must be only one subnet mask */
+                       if (vend[1] == 4)
+                               ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
+                       else{ 
+                               return -1;
+                       }
+                       break;
+
+               case 3: /* Gateway/router field */
+                       /* We are only concerned with first address */
+                       if (vend[1] >0 && vend[1]%4==0)
+                               gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+                       else 
+                               return -1;
+                       break;
+               case 6: /* domain name server */
+                       if(vend[1]>0 && vend[1] %4==0){
+                               dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+                               if(vend[1]>4)
+                                       dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
+                       }else
+                               return -1;
+                       break;
+
+               case 8: /* "Cookie server" (auth server) field */
+                       /* We are only concerned with first address */
+                       if (vend[1] > 0 && vend[1]%4==0)
+                               auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+                       else
+                               return -1;
+                       break;
+
+               case 11:        /* "Resource loc server" (file server) field */
+                       /* We are only concerned with first address */
+                       if (vend[1] > 0 && vend[1]%4==0)
+                               fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
+                       else
+                               return -1;
+                       break;
+               case 51:        /* ip lease time */
+                       if(vend[1]==4){
+                               iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+                       }else
+                               return -1;
+                       break;
+               case 53:        /* DHCP message type */
+                       if(vend[1]==1)
+                               dhcpmsg=vend[2];
+                       else
+                               return -1;
+                       break;
+               case 54:        /* server identifier */
+                       if(vend[1]==4){
+                               memmove(sid, vend+2, 4);
+                       }else
+                               return -1;
+                       break;
+
+               default:        /* Everything else stops us */
+                       break;
+               }
+
+               /* Skip over the field */
+               vend += vend[1] + 2;
+       }
+       if(debug)
+               print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
+       return dhcpmsg;
+}
+
+static void
+dispvend(uchar* pvend)
+{      
+       uchar *vend=pvend;
+
+       //print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
+       
+       vend += 4;              /* Skip over the magic cookie */
+       while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+       //      int i;
+         //    print("<<<Opt[%d] [%d]", vend[0], vend[1]);
+               //for(i=0; i<vend[1]; i++)
+               //      print(" %2.2x", vend[i+2]);
+               //print("\n");
+       
+               vend += vend[1] + 2;
+       }
+       //print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
+}
+
+static void
+rcvbootp(void *a)
+{
+       int n, fd, dhcp;
+       Bootp *rp;
+
+       if(waserror())
+               pexit("", 0);
+       rcvprocp = up;  /* store for postnote below */
+       fd = (int)a;
+       while(done == 0) {
+               if(debug)
+                       print("rcvbootp:looping\n");
+
+               n = kread(fd, rcvbuf, sizeof(rcvbuf));
+               if(n <= 0)
+                       break;
+               rp = (Bootp*)rcvbuf;
+               if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
+                       ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
+                       if(debug)
+                               print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
+                       //memmove(req.siaddr, rp->siaddr, 4);   /* siaddr */
+                       dhcp = parsevend(rp->vend);
+       
+                       if(dhcpmsgtype < dhcp){
+                               dhcpmsgtype=dhcp;
+                               recv = 1;
+                               wakeup(&bootpr);
+                               if(dhcp==0 || dhcp ==5 || dhcp == 6 )
+                                       break;
+                       }
+               }
+       }
+       poperror();
+       rcvprocp = nil;
+
+       if(debug)
+               print("rcvbootp exit\n");
+       pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+       int cfd, dfd, tries, n;
+       char ia[5+3*16], im[16], *av[3];
+       uchar nipaddr[4], ngwip[4], nipmask[4];
+       char dir[Maxpath];
+       static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+       uchar *vend;
+
+       /*
+        * broadcast bootp's till we get a reply,
+        * or fixed number of tries
+        */
+       if(debug)
+           print("dhcp: bootp() called\n");
+       tries = 0;
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       cfd = kannounce("udp!*!68", dir);
+       if(cfd < 0)
+               return "dhcp announce failed";
+       strcat(dir, "/data");
+       if(kwrite(cfd, "headers", 7) < 0){
+               kclose(cfd);
+               return "dhcp ctl headers failed";
+       }
+       kwrite(cfd, "oldheaders", 10);
+       dfd = kopen(dir, ORDWR);
+       if(dfd < 0){
+               kclose(cfd);
+               return "dhcp open data failed";
+       }
+       kclose(cfd);
+       
+       while(tries<1){
+               tries++;
+               memset(sid, 0, 4);
+               iplease=0;
+               dhcpmsgtype=-2;
+/* DHCPDISCOVER*/
+               done = 0;
+               recv = 0;
+               kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+               /* Prepare DHCPDISCOVER */      
+               memset(&req, 0, sizeof(req));
+               ipmove(req.raddr, IPv4bcast);
+               hnputs(req.rport, 67);
+               req.op = Bootrequest;
+               req.htype = 1;                  /* ethernet (all we know) */
+               req.hlen = 6;                   /* ethernet (all we know) */
+               
+               memmove(req.chaddr, ifc->mac, 6);       /* Hardware MAC address */
+               //ipv4local(ifc, req.ciaddr);                           /* Fill in the local IP address if we know it */
+               memset(req.file, 0, sizeof(req.file));
+               vend=req.vend;
+               memmove(vend, vend_rfc1048, 4); vend+=4;
+               *vend++=53; *vend++=1;*vend++=1;                /* dhcp msg type==3, dhcprequest */
+               
+               *vend++=61;*vend++=7;*vend++=1;
+               memmove(vend, ifc->mac, 6);vend+=6;
+               *vend=0xff;
+
+               if(debug)
+                       dispvend(req.vend); 
+               for(n=0;n<4;n++){
+                       if(kwrite(dfd, &req, sizeof(req))<0)    /* SEND DHCPDISCOVER */
+                               print("DHCPDISCOVER: %r");
+               
+                       tsleep(&bootpr, return0, 0, 1000);      /* wait DHCPOFFER */
+                       if(debug)
+                               print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
+
+                       if(dhcpmsgtype==2)              /* DHCPOFFER */
+                               break;
+                       else if(dhcpmsgtype==0) /* bootp */
+                               return nil;
+                       else if(dhcpmsgtype== -2)       /* time out */
+                               continue;
+                       else
+                               break;
+                       
+               }
+               if(dhcpmsgtype!=2)
+                       continue;
+
+/* DHCPREQUEST */      
+               memset(req.vend, 0, sizeof(req.vend));
+               vend=req.vend;
+               memmove(vend, vend_rfc1048, 4);vend+=4; 
+
+               *vend++=53; *vend++=1;*vend++=3;                /* dhcp msg type==3, dhcprequest */
+
+               *vend++=50;     *vend++=4;                              /* requested ip address */
+               *vend++=(ipaddr >> 24)&0xff;
+               *vend++=(ipaddr >> 16)&0xff;
+               *vend++=(ipaddr >> 8) & 0xff;
+               *vend++=ipaddr & 0xff;
+
+               *vend++=51;*vend++=4;                                   /* lease time */
+               *vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
+
+               *vend++=54; *vend++=4;                                  /* server identifier */
+               memmove(vend, sid, 4);  vend+=4;
+       
+               *vend++=61;*vend++=07;*vend++=01;               /* client identifier */
+               memmove(vend, ifc->mac, 6);vend+=6;
+               *vend=0xff;
+               if(debug) 
+                       dispvend(req.vend); 
+               if(kwrite(dfd, &req, sizeof(req))<0){
+                       print("DHCPREQUEST: %r");
+                       continue;
+               }
+               tsleep(&bootpr, return0, 0, 2000);
+               if(dhcpmsgtype==5)              /* wait for DHCPACK */
+                       break;
+               else
+                       continue;
+               /* CHECK ARP */
+               /* DHCPDECLINE */
+       }
+       kclose(dfd);
+       done = 1;
+       if(rcvprocp != nil){
+               postnote(rcvprocp, 1, "timeout", 0);
+               rcvprocp = nil;
+       }
+
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcrem(ifc, av, 3);
+
+       hnputl(nipaddr, ipaddr);
+       sprint(ia, "%V", nipaddr);
+       hnputl(nipmask, ipmask);
+       sprint(im, "%V", nipmask);
+       av[1] = ia;
+       av[2] = im;
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       if(gwip != 0) {
+               hnputl(ngwip, gwip);
+               n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+               routewrite(ifc->conv->p->f, nil, ia, n);
+       }
+       return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+       int n, i;
+       char *buf;
+       uchar a[4];
+
+       if(debug)
+               print("dhcp: bootpread() \n");
+       buf = smalloc(READSTR);
+       if(waserror()){
+               free(buf);
+               nexterror();
+       }
+
+       hnputl(a, fsip);
+       n = snprint(buf, READSTR, "fsip %15V\n", a);
+       hnputl(a, auip);
+       n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+       hnputl(a, gwip);
+       n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+       hnputl(a, ipmask);
+       n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+       hnputl(a, ipaddr);
+       n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+       n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
+
+       n += snprint(buf + n, READSTR-n, "dns");
+       if(dns2ip){
+               hnputl(a, dns2ip);
+               n+=snprint(buf + n, READSTR-n, " %15V", a);
+       }
+       if(dns1ip){
+               hnputl(a, dns1ip);
+               n += snprint(buf + n, READSTR-n, " %15V", a);
+       }
+
+       for(i=0; i<2; i++)
+               if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
+                       n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
+
+       snprint(buf + n, READSTR-n, "\n");
+       len = readstr(offset, bp, len, buf);
+       poperror();
+       free(buf);
+       return len;
+}
+
+char*  (*bootp)(Ipifc*) = rbootp;
+int    (*bootpread)(char*, ulong, int) = rbootpread;
diff --git a/kern/net/eipconvtest.c b/kern/net/eipconvtest.c
new file mode 100644 (file)
index 0000000..06b0f9b
--- /dev/null
@@ -0,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+       Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+       0, 0, 0, 0,
+       0, 0, 0, 0,
+       0, 0, 0xff, 0xff,
+       0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+       uchar *a;
+
+       a = p;
+       a[0] = v>>24;
+       a[1] = v>>16;
+       a[2] = v>>8;
+       a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+       char buf[8*5];
+       static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+       static char *ifmt = "%d.%d.%d.%d";
+       uchar *p, ip[16];
+       ulong *lp;
+       ushort s;
+       int i, j, n, eln, eli;
+
+       switch(f->chr) {
+       case 'E':               /* Ethernet address */
+               p = va_arg(*arg, uchar*);
+               sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+               break;
+       case 'I':               /* Ip address */
+               p = va_arg(*arg, uchar*);
+common:
+               if(memcmp(p, v4prefix, 12) == 0)
+                       sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+               else {
+                       /* find longest elision */
+                       eln = eli = -1;
+                       for(i = 0; i < 16; i += 2){
+                               for(j = i; j < 16; j += 2)
+                                       if(p[j] != 0 || p[j+1] != 0)
+                                               break;
+                               if(j > i && j - i > eln){
+                                       eli = i;
+                                       eln = j - i;
+                               }
+                       }
+
+                       /* print with possible elision */
+                       n = 0;
+                       for(i = 0; i < 16; i += 2){
+                               if(i == eli){
+                                       n += sprint(buf+n, "::");
+                                       i += eln;
+                                       if(i >= 16)
+                                               break;
+                               } else if(i != 0)
+                                       n += sprint(buf+n, ":");
+                               s = (p[i]<<8) + p[i+1];
+                               n += sprint(buf+n, "%ux", s);
+                       }
+               }
+               break;
+       case 'i':               /* v6 address as 4 longs */
+               lp = va_arg(*arg, ulong*);
+               for(i = 0; i < 4; i++)
+                       hnputl(ip+4*i, *lp++);
+               p = ip;
+               goto common;
+       case 'V':               /* v4 ip address */
+               p = va_arg(*arg, uchar*);
+               sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+               break;
+       case 'M':               /* ip mask */
+               p = va_arg(*arg, uchar*);
+
+               /* look for a prefix mask */
+               for(i = 0; i < 16; i++)
+                       if(p[i] != 0xff)
+                               break;
+               if(i < 16){
+                       if((prefixvals[p[i]] & Isprefix) == 0)
+                               goto common;
+                       for(j = i+1; j < 16; j++)
+                               if(p[j] != 0)
+                                       goto common;
+                       n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+               } else
+                       n = 8*16;
+
+               /* got one, use /xx format */
+               sprint(buf, "/%d", n);
+               break;
+       default:
+               strcpy(buf, "(eipconv)");
+       }
+       strconv(buf, f);
+       return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+       int i;
+
+       fmtinstall('I', eipconv);
+       fmtinstall('M', eipconv);
+       for(i = 0; i < 11; i++)
+               print("%I\n%M\n", testvec[i], testvec[i]);
+       exits(0);
+}
diff --git a/kern/net/esp.c b/kern/net/esp.c
new file mode 100644 (file)
index 0000000..9c9f33f
--- /dev/null
@@ -0,0 +1,866 @@
+#include       "u.h"
+#include       "../port/lib.h"
+#include       "mem.h"
+#include       "dat.h"
+#include       "fns.h"
+#include       "../port/error.h"
+
+#include       "ip.h"
+
+#include       "libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+typedef struct Esprc4 Esprc4;
+
+#define DPRINT if(0)print
+
+enum
+{
+       IP_ESPPROTO     = 50,
+       EsphdrSize      = 28,   // includes IP header
+       IphdrSize       = 20,   // options have been striped
+       EsptailSize     = 2,    // does not include pad or auth data
+       UserhdrSize     = 4,    // user visable header size - if enabled
+};
+
+struct Esphdr
+{
+       /* ip header */
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   length[2];      /* packet length */
+       uchar   id[2];          /* Identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   Unused; 
+       uchar   espproto;       /* Protocol */
+       uchar   espplen[2];     /* Header plus data length */
+       uchar   espsrc[4];      /* Ip source */
+       uchar   espdst[4];      /* Ip destination */
+
+       /* esp header */
+       uchar   espspi[4];      /* Security parameter index */
+       uchar   espseq[4];      /* Sequence number */
+};
+
+struct Esptail
+{
+       uchar   pad;
+       uchar   nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+       uchar   nexthdr;        // next protocol
+       uchar   unused[3];
+};
+
+struct Esppriv
+{
+       ulong   in;
+       ulong   inerrors;
+};
+
+/*
+ *  protocol specific part of Conv
+ */
+struct Espcb
+{
+       int     incoming;
+       int     header;         // user user level header
+       ulong   spi;
+       ulong   seq;            // last seq sent
+       ulong   window;         // for replay attacks
+       char    *espalg;
+       void    *espstate;      // other state for esp
+       int     espivlen;       // in bytes
+       int     espblklen;
+       int     (*cipher)(Espcb*, uchar *buf, int len);
+       char    *ahalg;
+       void    *ahstate;       // other state for esp
+       int     ahlen;          // auth data length in bytes
+       int     ahblklen;
+       int     (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+       char    *name;
+       int     keylen;         // in bits
+       void    (*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+
+enum {
+       RC4forward      = 10*1024*1024, // maximum skip forward
+       RC4back = 100*1024,             // maximum look back
+};
+
+struct Esprc4
+{
+       ulong cseq;     // current byte sequence number
+       RC4state current;
+
+       int ovalid;     // old is valid
+       ulong lgseq; // last good sequence
+       ulong oseq;     // old byte sequence number
+       RC4state old;
+};
+
+static Conv* convlookup(Proto *esp, ulong spi);
+static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+static void espkick(void *x);
+
+static Algorithm espalg[] =
+{
+       "null",                 0,      nullespinit,
+       "des_56_cbc",           64,     desespinit,
+       "rc4_128",              128,    rc4espinit,
+       nil,                    0,      nil,
+};
+
+static Algorithm ahalg[] =
+{
+       "null",                 0,      nullahinit,
+       "hmac_sha1_96",         128,    shaahinit,
+       "hmac_md5_96",          128,    md5ahinit,
+       nil,                    0,      nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+       char *p, *pp;
+       char *e = nil;
+       ulong spi;
+       Espcb *ecb = (Espcb*)c->ptcl;
+
+       switch(argc) {
+       default:
+               e = "bad args to connect";
+               break;
+       case 2:
+               p = strchr(argv[1], '!');
+               if(p == nil){
+                       e = "malformed address";
+                       break;
+               }
+               *p++ = 0;
+               parseip(c->raddr, argv[1]);
+               findlocalip(c->p->f, c->laddr, c->raddr);
+               ecb->incoming = 0;
+               ecb->seq = 0;
+               if(strcmp(p, "*") == 0) {
+                       qlock(c->p);
+                       for(;;) {
+                               spi = nrand(1<<16) + 256;
+                               if(convlookup(c->p, spi) == nil)
+                                       break;
+                       }
+                       qunlock(c->p);
+                       ecb->spi = spi;
+                       ecb->incoming = 1;
+                       qhangup(c->wq, nil);
+               } else {
+                       spi = strtoul(p, &pp, 10);
+                       if(pp == p) {
+                               e = "malformed address";
+                               break;
+                       }
+                       ecb->spi = spi;
+                       qhangup(c->rq, nil);
+               }
+               nullespinit(ecb, "null", nil, 0);
+               nullahinit(ecb, "null", nil, 0);
+       }
+       Fsconnected(c, e);
+
+       return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+       return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+       c->rq = qopen(64*1024, Qmsg, 0, 0);
+       c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+       Espcb *ecb;
+
+       qclose(c->rq);
+       qclose(c->wq);
+       qclose(c->eq);
+       ipmove(c->laddr, IPnoaddr);
+       ipmove(c->raddr, IPnoaddr);
+
+       ecb = (Espcb*)c->ptcl;
+       free(ecb->espstate);
+       free(ecb->ahstate);
+       memset(ecb, 0, sizeof(Espcb));
+}
+
+static void
+espkick(void *x)
+{
+       Conv *c = x;
+       Esphdr *eh;
+       Esptail *et;
+       Userhdr *uh;
+       Espcb *ecb;
+       Block *bp;
+       int nexthdr;
+       int payload;
+       int pad;
+       int align;
+       uchar *auth;
+
+       bp = qget(c->wq);
+       if(bp == nil)
+               return;
+
+       qlock(c);
+       ecb = c->ptcl;
+
+       if(ecb->header) {
+               /* make sure the message has a User header */
+               bp = pullupblock(bp, UserhdrSize);
+               if(bp == nil) {
+                       qunlock(c);
+                       return;
+               }
+               uh = (Userhdr*)bp->rp;
+               nexthdr = uh->nexthdr;
+               bp->rp += UserhdrSize;
+       } else {
+               nexthdr = 0;  // what should this be?
+       }
+
+       payload = BLEN(bp) + ecb->espivlen;
+
+       /* Make space to fit ip header */
+       bp = padblock(bp, EsphdrSize + ecb->espivlen);
+
+       align = 4;
+       if(ecb->espblklen > align)
+               align = ecb->espblklen;
+       if(align % ecb->ahblklen != 0)
+               panic("espkick: ahblklen is important after all");
+       pad = (align-1) - (payload + EsptailSize-1)%align;
+
+       /*
+        * Make space for tail
+        * this is done by calling padblock with a negative size
+        * Padblock does not change bp->wp!
+        */
+       bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
+       bp->wp += pad+EsptailSize+ecb->ahlen;
+
+       eh = (Esphdr *)(bp->rp);
+       et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+
+       // fill in tail
+       et->pad = pad;
+       et->nexthdr = nexthdr;
+
+       ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
+       auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+
+       // fill in head
+       eh->vihl = IP_VER4;
+       hnputl(eh->espspi, ecb->spi);
+       hnputl(eh->espseq, ++ecb->seq);
+       v6tov4(eh->espsrc, c->laddr);
+       v6tov4(eh->espdst, c->raddr);
+       eh->espproto = IP_ESPPROTO;
+       eh->frag[0] = 0;
+       eh->frag[1] = 0;
+
+       ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+
+       qunlock(c);
+       //print("esp: pass down: %uld\n", BLEN(bp));
+       ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc*, Block *bp)
+{
+       Esphdr *eh;
+       Esptail *et;
+       Userhdr *uh;
+       Conv *c;
+       Espcb *ecb;
+       uchar raddr[IPaddrlen], laddr[IPaddrlen];
+       Fs *f;
+       uchar *auth;
+       ulong spi;
+       int payload, nexthdr;
+
+       f = esp->f;
+
+       bp = pullupblock(bp, EsphdrSize+EsptailSize);
+       if(bp == nil) {
+               netlog(f, Logesp, "esp: short packet\n");
+               return;
+       }
+
+       eh = (Esphdr*)(bp->rp);
+       spi = nhgetl(eh->espspi);
+       v4tov6(raddr, eh->espsrc);
+       v4tov6(laddr, eh->espdst);
+
+       qlock(esp);
+       /* Look for a conversation structure for this port */
+       c = convlookup(esp, spi);
+       if(c == nil) {
+               qunlock(esp);
+               netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+                       laddr, spi);
+               icmpnoconv(f, bp);
+               freeblist(bp);
+               return;
+       }
+
+       qlock(c);
+       qunlock(esp);
+
+       ecb = c->ptcl;
+       // too hard to do decryption/authentication on block lists
+       if(bp->next)
+               bp = concatblock(bp);
+
+       if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+               qunlock(c);
+               netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+                       laddr, spi);
+               freeb(bp);
+               return;
+       }
+
+       eh = (Esphdr*)(bp->rp);
+       auth = bp->wp - ecb->ahlen;
+       if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+               qunlock(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+               netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+                       laddr, spi);
+               freeb(bp);
+               return;
+       }
+
+       payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
+       if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+               qunlock(c);
+               netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
+                       laddr, spi, payload, BLEN(bp));
+               freeb(bp);
+               return;
+       }
+       if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+               qunlock(c);
+print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
+               netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
+                       laddr, spi);
+               freeb(bp);
+               return;
+       }
+
+       payload -= EsptailSize;
+       et = (Esptail*)(bp->rp + EsphdrSize + payload);
+       payload -= et->pad + ecb->espivlen;
+       nexthdr = et->nexthdr;
+       if(payload <= 0) {
+               qunlock(c);
+               netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
+                       laddr, spi);
+               freeb(bp);
+               return;
+       }
+
+       // trim packet
+       bp->rp += EsphdrSize + ecb->espivlen;
+       bp->wp = bp->rp + payload;
+       if(ecb->header) {
+               // assume UserhdrSize < EsphdrSize
+               bp->rp -= UserhdrSize;
+               uh = (Userhdr*)bp->rp;
+               memset(uh, 0, UserhdrSize);
+               uh->nexthdr = nexthdr;
+       }
+
+       if(qfull(c->rq)){
+               netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+                       laddr, spi);
+               freeblist(bp);
+       }else {
+//print("esp: pass up: %uld\n", BLEN(bp));
+               qpass(c->rq, bp);
+       }
+
+       qunlock(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+       Espcb *ecb = c->ptcl;
+       char *e = nil;
+
+       if(strcmp(f[0], "esp") == 0)
+               e = setalg(ecb, f, n, espalg);
+       else if(strcmp(f[0], "ah") == 0)
+               e = setalg(ecb, f, n, ahalg);
+       else if(strcmp(f[0], "header") == 0)
+               ecb->header = 1;
+       else if(strcmp(f[0], "noheader") == 0)
+               ecb->header = 0;
+       else
+               e = "unknown control request";
+       return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+       Esphdr *h;
+       Conv *c;
+       ulong spi;
+
+       h = (Esphdr*)(bp->rp);
+
+       spi = nhgets(h->espspi);
+       qlock(esp);
+       c = convlookup(esp, spi);
+       if(c != nil) {
+               qhangup(c->rq, msg);
+               qhangup(c->wq, msg);
+       }
+       qunlock(esp);
+       freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+       Esppriv *upriv;
+
+       upriv = esp->priv;
+       return snprint(buf, len, "%lud %lud\n",
+               upriv->in,
+               upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+       Espcb *ecb = c->ptcl;
+       int n;
+
+       qlock(c);
+       if(ecb->incoming)
+               n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+       else
+               n = snprint(buf, len, "%I\n", c->laddr);
+       qunlock(c);
+       return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+       Espcb *ecb = c->ptcl;
+       int n;
+
+       qlock(c);
+       if(ecb->incoming)
+               n = snprint(buf, len, "%I\n", c->raddr);
+       else
+               n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+       qunlock(c);
+       return n;
+}
+
+static Conv*
+convlookup(Proto *esp, ulong spi)
+{
+       Conv *c, **p;
+       Espcb *ecb;
+
+       for(p=esp->conv; *p; p++){
+               c = *p;
+               ecb = c->ptcl;
+               if(ecb->incoming && ecb->spi == spi)
+                       return c;
+       }
+       return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+       uchar *key;
+       int i, nbyte, nchar;
+       int c;
+
+       if(n < 2)
+               return "bad format";
+       for(; alg->name; alg++)
+               if(strcmp(f[1], alg->name) == 0)
+                       break;
+       if(alg->name == nil)
+               return "unknown algorithm";
+
+       if(n != 3)
+               return "bad format";
+       nbyte = (alg->keylen + 7) >> 3;
+       nchar = strlen(f[2]);
+       for(i=0; i<nchar; i++) {
+               c = f[2][i];
+               if(c >= '0' && c <= '9')
+                       f[2][i] -= '0';
+               else if(c >= 'a' && c <= 'f')
+                       f[2][i] -= 'a'-10;
+               else if(c >= 'A' && c <= 'F')
+                       f[2][i] -= 'A'-10;
+               else
+                       return "bad character in key";
+       }
+       key = smalloc(nbyte);
+       for(i=0; i<nchar && i*2<nbyte; i++) {
+               c = f[2][nchar-i-1];
+               if(i&1)
+                       c <<= 4;
+               key[i>>1] |= c;
+       }
+
+       alg->init(ecb, alg->name, key, alg->keylen);
+       free(key);
+       return nil;
+}
+
+static int
+nullcipher(Espcb*, uchar*, int)
+{
+       return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar*, int)
+{
+       ecb->espalg = name;
+       ecb->espblklen = 1;
+       ecb->espivlen = 0;
+       ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb*, uchar*, int, uchar*)
+{
+       return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar*, int)
+{
+       ecb->ahalg = name;
+       ecb->ahblklen = 1;
+       ecb->ahlen = 0;
+       ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+       uchar ipad[65], opad[65];
+       int i;
+       DigestState *digest;
+       uchar innerhash[SHA1dlen];
+
+       for(i=0; i<64; i++){
+               ipad[i] = 0x36;
+               opad[i] = 0x5c;
+       }
+       ipad[64] = opad[64] = 0;
+       for(i=0; i<klen; i++){
+               ipad[i] ^= key[i];
+               opad[i] ^= key[i];
+       }
+       digest = sha1(ipad, 64, nil, nil);
+       sha1(t, tlen, innerhash, digest);
+       digest = sha1(opad, 64, nil, nil);
+       sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+       uchar hash[SHA1dlen];
+       int r;
+
+       memset(hash, 0, SHA1dlen);
+       seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+       r = memcmp(auth, hash, ecb->ahlen) == 0;
+       memmove(auth, hash, ecb->ahlen);
+       return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+       if(klen != 128)
+               panic("shaahinit: bad keylen");
+       klen >>= 8;     // convert to bytes
+
+       ecb->ahalg = name;
+       ecb->ahblklen = 1;
+       ecb->ahlen = 12;
+       ecb->auth = shaauth;
+       ecb->ahstate = smalloc(klen);
+       memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+       uchar ipad[65], opad[65];
+       int i;
+       DigestState *digest;
+       uchar innerhash[MD5dlen];
+
+       for(i=0; i<64; i++){
+               ipad[i] = 0x36;
+               opad[i] = 0x5c;
+       }
+       ipad[64] = opad[64] = 0;
+       for(i=0; i<klen; i++){
+               ipad[i] ^= key[i];
+               opad[i] ^= key[i];
+       }
+       digest = md5(ipad, 64, nil, nil);
+       md5(t, tlen, innerhash, digest);
+       digest = md5(opad, 64, nil, nil);
+       md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+       uchar hash[MD5dlen];
+       int r;
+
+       memset(hash, 0, MD5dlen);
+       seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+       r = memcmp(auth, hash, ecb->ahlen) == 0;
+       memmove(auth, hash, ecb->ahlen);
+       return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+       if(klen != 128)
+               panic("md5ahinit: bad keylen");
+       klen >>= 3;     // convert to bytes
+
+
+       ecb->ahalg = name;
+       ecb->ahblklen = 1;
+       ecb->ahlen = 12;
+       ecb->auth = md5auth;
+       ecb->ahstate = smalloc(klen);
+       memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+       uchar tmp[8];
+       uchar *pp, *tp, *ip, *eip, *ep;
+       DESstate *ds = ecb->espstate;
+
+       ep = p + n;
+       if(ecb->incoming) {
+               memmove(ds->ivec, p, 8);
+               p += 8;
+               while(p < ep){
+                       memmove(tmp, p, 8);
+                       block_cipher(ds->expanded, p, 1);
+                       tp = tmp;
+                       ip = ds->ivec;
+                       for(eip = ip+8; ip < eip; ){
+                               *p++ ^= *ip;
+                               *ip++ = *tp++;
+                       }
+               }
+       } else {
+               memmove(p, ds->ivec, 8);
+               for(p += 8; p < ep; p += 8){
+                       pp = p;
+                       ip = ds->ivec;
+                       for(eip = ip+8; ip < eip; )
+                               *pp++ ^= *ip++;
+                       block_cipher(ds->expanded, p, 0);
+                       memmove(ds->ivec, p, 8);
+               }
+       }
+       return 1;
+}
+       
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+       uchar key[8];
+       uchar ivec[8];
+       int i;
+       
+       // bits to bytes
+       n = (n+7)>>3;
+       if(n > 8)
+               n = 8;
+       memset(key, 0, sizeof(key));
+       memmove(key, k, n);
+       for(i=0; i<8; i++)
+               ivec[i] = nrand(256);
+       ecb->espalg = name;
+       ecb->espblklen = 8;
+       ecb->espivlen = 8;
+       ecb->cipher = descipher;
+       ecb->espstate = smalloc(sizeof(DESstate));
+       setupDESstate(ecb->espstate, key, ivec);
+}
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+       Esprc4 *esprc4;
+       RC4state tmpstate;
+       ulong seq;
+       long d, dd;
+
+       if(n < 4)
+               return 0;
+
+       esprc4 = ecb->espstate;
+       if(ecb->incoming) {
+               seq = nhgetl(p);
+               p += 4;
+               n -= 4;
+               d = seq-esprc4->cseq;
+               if(d == 0) {
+                       rc4(&esprc4->current, p, n);
+                       esprc4->cseq += n;
+                       if(esprc4->ovalid) {
+                               dd = esprc4->cseq - esprc4->lgseq;
+                               if(dd > RC4back)
+                                       esprc4->ovalid = 0;
+                       }
+               } else if(d > 0) {
+print("missing packet: %uld %ld\n", seq, d);
+                       // this link is hosed
+                       if(d > RC4forward) {
+                               strcpy(up->errstr, "rc4cipher: skipped too much");
+                               return 0;
+                       }
+                       esprc4->lgseq = seq;
+                       if(!esprc4->ovalid) {
+                               esprc4->ovalid = 1;
+                               esprc4->oseq = esprc4->cseq;
+                               memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
+                       }
+                       rc4skip(&esprc4->current, d);
+                       rc4(&esprc4->current, p, n);
+                       esprc4->cseq = seq+n;
+               } else {
+print("reordered packet: %uld %ld\n", seq, d);
+                       dd = seq - esprc4->oseq;
+                       if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+                               strcpy(up->errstr, "rc4cipher: too far back");
+                               return 0;
+                       }
+                       memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+                       rc4skip(&tmpstate, dd);
+                       rc4(&tmpstate, p, n);
+                       return 1;
+               }
+
+               // move old state up
+               if(esprc4->ovalid) {
+                       dd = esprc4->cseq - RC4back - esprc4->oseq;
+                       if(dd > 0) {
+                               rc4skip(&esprc4->old, dd);
+                               esprc4->oseq += dd;
+                       }
+               }
+       } else {
+               hnputl(p, esprc4->cseq);
+               p += 4;
+               n -= 4;
+               rc4(&esprc4->current, p, n);
+               esprc4->cseq += n;
+       }
+       return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{      
+       Esprc4 *esprc4;
+
+       // bits to bytes
+       n = (n+7)>>3;
+       esprc4 = smalloc(sizeof(Esprc4));
+       memset(esprc4, 0, sizeof(Esprc4));
+       setupRC4state(&esprc4->current, k, n);
+       ecb->espalg = name;
+       ecb->espblklen = 4;
+       ecb->espivlen = 4;
+       ecb->cipher = rc4cipher;
+       ecb->espstate = esprc4;
+}
+       
+void
+espinit(Fs *fs)
+{
+       Proto *esp;
+
+       esp = smalloc(sizeof(Proto));
+       esp->priv = smalloc(sizeof(Esppriv));
+       esp->name = "esp";
+       esp->connect = espconnect;
+       esp->announce = nil;
+       esp->ctl = espctl;
+       esp->state = espstate;
+       esp->create = espcreate;
+       esp->close = espclose;
+       esp->rcv = espiput;
+       esp->advise = espadvise;
+       esp->stats = espstats;
+       esp->local = esplocal;
+       esp->remote = espremote;
+       esp->ipproto = IP_ESPPROTO;
+       esp->nc = Nchans;
+       esp->ptclsize = sizeof(Espcb);
+
+       Fsproto(fs, esp);
+}
diff --git a/kern/net/ethermedium.c b/kern/net/ethermedium.c
new file mode 100644 (file)
index 0000000..1877817
--- /dev/null
@@ -0,0 +1,792 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+#include "kernel.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+       uchar   d[6];
+       uchar   s[6];
+       uchar   t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+       0xff,0xff,0xff,0xff,  
+       0xff,0xff,0xff,0xff,  
+       0xff,0xff,0xff,0xff,  
+       0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void    etherread4(void *a);
+static void    etherread6(void *a);
+static void    etherbind(Ipifc *ifc, int argc, char **argv);
+static void    etherunbind(Ipifc *ifc);
+static void    etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void    etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void    etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block*  multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void    sendarp(Ipifc *ifc, Arpent *a);
+static void    sendgarp(Ipifc *ifc, uchar*);
+static int     multicastea(uchar *ea, uchar *ip);
+static void    recvarpproc(void*);
+static void    resolveaddr6(Ipifc *ifc, Arpent *a);
+static void    etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name=         "ether",
+.hsize=                14,
+.mintu=                60,
+.maxtu=                1514,
+.maclen=       6,
+.bind=         etherbind,
+.unbind=       etherunbind,
+.bwrite=       etherbwrite,
+.addmulti=     etheraddmulti,
+.remmulti=     etherremmulti,
+.ares=         arpenter,
+.areg=         sendgarp,
+.pref2addr=    etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name=         "gbe",
+.hsize=                14,
+.mintu=                60,
+.maxtu=                9014,
+.maclen=       6,
+.bind=         etherbind,
+.unbind=       etherunbind,
+.bwrite=       etherbwrite,
+.addmulti=     etheraddmulti,
+.remmulti=     etherremmulti,
+.ares=         arpenter,
+.areg=         sendgarp,
+.pref2addr=    etherpref2addr,
+};
+
+typedef struct Etherrock Etherrock;
+struct Etherrock
+{
+       Fs      *f;             /* file system we belong to */
+       Proc    *arpp;          /* arp process */
+       Proc    *read4p;        /* reading process (v4)*/
+       Proc    *read6p;        /* reading process (v6)*/
+       Chan    *mchan4;        /* Data channel for v4 */
+       Chan    *achan;         /* Arp channel */
+       Chan    *cchan4;        /* Control channel for v4 */
+       Chan    *mchan6;        /* Data channel for v6 */
+       Chan    *cchan6;        /* Control channel for v6 */
+};
+
+/*
+ *  ethernet arp request
+ */
+enum
+{
+       ETARP           = 0x0806,
+       ETIP4           = 0x0800,
+       ETIP6           = 0x86DD,
+       ARPREQUEST      = 1,
+       ARPREPLY        = 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+       uchar   d[6];
+       uchar   s[6];
+       uchar   type[2];
+       uchar   hrd[2];
+       uchar   pro[2];
+       uchar   hln;
+       uchar   pln;
+       uchar   op[2];
+       uchar   sha[6];
+       uchar   spa[4];
+       uchar   tha[6];
+       uchar   tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ *  called to bind an IP ifc to an ethernet device
+ *  called with ifc wlock'd
+ */
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+       Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
+       char addr[Maxpath];     //char addr[2*KNAMELEN];
+       char dir[Maxpath];      //char dir[2*KNAMELEN];
+       char *buf;
+       int fd, cfd, n;
+       char *ptr;
+       Etherrock *er;
+
+       if(argc < 2)
+               error(Ebadarg);
+
+       mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+       buf = nil;
+       if(waserror()){
+               if(mchan4 != nil)
+                       cclose(mchan4);
+               if(cchan4 != nil)
+                       cclose(cchan4);
+               if(achan != nil)
+                       cclose(achan);
+               if(mchan6 != nil)
+                       cclose(mchan6);
+               if(cchan6 != nil)
+                       cclose(cchan6);
+               if(buf != nil)
+                       free(buf);
+               nexterror(); 
+       }
+
+       /*
+        *  open ip converstation
+        *
+        *  the dial will fail if the type is already open on
+        *  this device.
+        */
+       snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
+       fd = kdial(addr, nil, dir, &cfd);
+       if(fd < 0)
+               errorf("dial 0x800 failed: %s", up->env->errstr);
+       mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
+       cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
+       kclose(fd);
+       kclose(cfd);
+
+       /*
+        *  make it non-blocking
+        */
+       devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+       /*
+        *  get mac address and speed
+        */
+       snprint(addr, sizeof(addr), "%s/stats", dir);
+       fd = kopen(addr, OREAD);
+       if(fd < 0)
+               errorf("can't open ether stats: %s", up->env->errstr);
+
+       buf = smalloc(512);
+       n = kread(fd, buf, 511);
+       kclose(fd);
+       if(n <= 0)
+               error(Eio);
+       buf[n] = 0;
+
+       ptr = strstr(buf, "addr: ");
+       if(!ptr)
+               error(Eio);
+       ptr += 6;
+       parsemac(ifc->mac, ptr, 6);
+
+       ptr = strstr(buf, "mbps: ");
+       if(ptr){
+               ptr += 6;
+               ifc->mbps = atoi(ptr);
+       } else
+               ifc->mbps = 100;
+
+       /*
+        *  open arp conversation
+        */
+       snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
+       fd = kdial(addr, nil, nil, nil);
+       if(fd < 0)
+               errorf("dial 0x806 failed: %s", up->env->errstr);
+       achan = commonfdtochan(fd, ORDWR, 0, 1);
+       kclose(fd);
+
+       /*
+        *  open ip conversation
+        *
+        *  the dial will fail if the type is already open on
+        *  this device.
+        */
+       snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
+       fd = kdial(addr, nil, dir, &cfd);
+       if(fd < 0)
+               errorf("dial 0x86DD failed: %s", up->env->errstr);
+       mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
+       cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
+       kclose(fd);
+       kclose(cfd);
+
+       /*
+        *  make it non-blocking
+        */
+       devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+       er = smalloc(sizeof(*er));
+       er->mchan4 = mchan4;
+       er->cchan4 = cchan4;
+       er->achan = achan;
+       er->mchan6 = mchan6;
+       er->cchan6 = cchan6;
+       er->f = ifc->conv->p->f;
+       ifc->arg = er;
+
+       free(buf);
+       poperror();
+
+       kproc("etherread4", etherread4, ifc, 0);
+       kproc("recvarpproc", recvarpproc, ifc, 0);
+       kproc("etherread6", etherread6, ifc, 0);
+}
+
+/*
+ *  called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+       Etherrock *er = ifc->arg;
+
+       if(er->read4p)
+               postnote(er->read4p, 1, "unbind", 0);
+       if(er->read6p)
+               postnote(er->read6p, 1, "unbind", 0);
+       if(er->arpp)
+               postnote(er->arpp, 1, "unbind", 0);
+
+       /* wait for readers to die */
+       while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+               tsleep(&up->sleep, return0, 0, 300);
+
+       if(er->mchan4 != nil)
+               cclose(er->mchan4);
+       if(er->achan != nil)
+               cclose(er->achan);
+       if(er->cchan4 != nil)
+               cclose(er->cchan4);
+       if(er->mchan6 != nil)
+               cclose(er->mchan6);
+       if(er->cchan6 != nil)
+               cclose(er->cchan6);
+
+       free(er);
+}
+
+/*
+ *  called by ipoput with a single block to write with ifc rlock'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+       Etherhdr *eh;
+       Arpent *a;
+       uchar mac[6];
+       Etherrock *er = ifc->arg;
+
+       /* get mac address of destination */
+       a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+       if(a){
+               /* check for broadcast or multicast */
+               bp = multicastarp(er->f, a, ifc->m, mac);
+               if(bp==nil){
+                       switch(version){
+                       case V4:
+                               sendarp(ifc, a);
+                               break;
+                       case V6: 
+                               resolveaddr6(ifc, a);
+                               break;
+                       default:
+                               panic("etherbwrite: version %d", version);
+                       }
+                       return;
+               }
+       }
+
+       /* make it a single block with space for the ether header */
+       bp = padblock(bp, ifc->m->hsize);
+       if(bp->next)
+               bp = concatblock(bp);
+       if(BLEN(bp) < ifc->mintu)
+               bp = adjustblock(bp, ifc->mintu);
+       eh = (Etherhdr*)bp->rp;
+
+       /* copy in mac addresses and ether type */
+       memmove(eh->s, ifc->mac, sizeof(eh->s));
+       memmove(eh->d, mac, sizeof(eh->d));
+
+       switch(version){
+       case V4:
+               eh->t[0] = 0x08;
+               eh->t[1] = 0x00;
+               devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+               break;
+       case V6:
+               eh->t[0] = 0x86;
+               eh->t[1] = 0xDD;
+               devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+               break;
+       default:
+               panic("etherbwrite2: version %d", version);
+       }
+       ifc->out++;
+}
+
+
+/*
+ *  process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+       Ipifc *ifc;
+       Block *bp;
+       Etherrock *er;
+
+       ifc = a;
+       er = ifc->arg;
+       er->read4p = up;        /* hide identity under a rock for unbind */
+       if(waserror()){
+               er->read4p = 0;
+               pexit("hangup", 1);
+       }
+       for(;;){
+               bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+               if(!canrlock(ifc)){
+                       freeb(bp);
+                       continue;
+               }
+               if(waserror()){
+                       runlock(ifc);
+                       nexterror();
+               }
+               ifc->in++;
+               bp->rp += ifc->m->hsize;
+               if(ifc->lifc == nil)
+                       freeb(bp);
+               else
+                       ipiput4(er->f, ifc, bp);
+               runlock(ifc);
+               poperror();
+       }
+}
+
+
+/*
+ *  process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+       Ipifc *ifc;
+       Block *bp;
+       Etherrock *er;
+
+       ifc = a;
+       er = ifc->arg;
+       er->read6p = up;        /* hide identity under a rock for unbind */
+       if(waserror()){
+               er->read6p = 0;
+               pexit("hangup", 1);
+       }
+       for(;;){
+               bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+               if(!canrlock(ifc)){
+                       freeb(bp);
+                       continue;
+               }
+               if(waserror()){
+                       runlock(ifc);
+                       nexterror();
+               }
+               ifc->in++;
+               bp->rp += ifc->m->hsize;
+               if(ifc->lifc == nil)
+                       freeb(bp);
+               else
+                       ipiput6(er->f, ifc, bp);
+               runlock(ifc);
+               poperror();
+       }
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+       uchar mac[6];
+       char buf[64];
+       Etherrock *er = ifc->arg;
+       int version;
+
+       version = multicastea(mac, a);
+       sprint(buf, "addmulti %E", mac);
+       switch(version){
+       case V4:
+               devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+               break;
+       case V6:
+               devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+               break;
+       default:
+               panic("etheraddmulti: version %d", version);
+       }
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+       uchar mac[6];
+       char buf[64];
+       Etherrock *er = ifc->arg;
+       int version;
+
+       version = multicastea(mac, a);
+       sprint(buf, "remmulti %E", mac);
+       switch(version){
+       case V4:
+               devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+               break;
+       case V6:
+               devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+               break;
+       default:
+               panic("etherremmulti: version %d", version);
+       }
+}
+
+/*
+ *  send an ethernet arp
+ *  (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+       int n;
+       Block *bp;
+       Etherarp *e;
+       Etherrock *er = ifc->arg;
+
+       /* don't do anything if it's been less than a second since the last */
+       if(NOW - a->ctime < 1000){
+               arprelease(er->f->arp, a);
+               return;
+       }
+
+       /* remove all but the last message */
+       while((bp = a->hold) != nil){
+               if(bp == a->last)
+                       break;
+               a->hold = bp->list;
+               freeblist(bp);
+       }
+
+       /* try to keep it around for a second more */
+       a->ctime = NOW;
+       arprelease(er->f->arp, a);
+
+       n = sizeof(Etherarp);
+       if(n < a->type->mintu)
+               n = a->type->mintu;
+       bp = allocb(n);
+       memset(bp->rp, 0, n);
+       e = (Etherarp*)bp->rp;
+       memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+       ipv4local(ifc, e->spa);
+       memmove(e->sha, ifc->mac, sizeof(e->sha));
+       memset(e->d, 0xff, sizeof(e->d));               /* ethernet broadcast */
+       memmove(e->s, ifc->mac, sizeof(e->s));
+
+       hnputs(e->type, ETARP);
+       hnputs(e->hrd, 1);
+       hnputs(e->pro, ETIP4);
+       e->hln = sizeof(e->sha);
+       e->pln = sizeof(e->spa);
+       hnputs(e->op, ARPREQUEST);
+       bp->wp += n;
+
+       n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+       if(n < 0)
+               print("arp: send: %r\n");
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+       int sflag;
+       Block *bp;
+       Etherrock *er = ifc->arg;
+       uchar ipsrc[IPaddrlen];
+
+       /* don't do anything if it's been less than a second since the last */
+       if(NOW - a->ctime < ReTransTimer){
+               arprelease(er->f->arp, a);
+               return;
+       }
+
+       /* remove all but the last message */
+       while((bp = a->hold) != nil){
+               if(bp == a->last)
+                       break;
+               a->hold = bp->list;
+               freeblist(bp);
+       }
+
+       /* try to keep it around for a second more */
+       a->ctime = NOW;
+       a->rtime = NOW + ReTransTimer;
+       if(a->rxtsrem <= 0) {
+               arprelease(er->f->arp, a);
+               return;
+       }
+
+       a->rxtsrem--;
+       arprelease(er->f->arp, a);
+
+       if(sflag = ipv6anylocal(ifc, ipsrc)) 
+               icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ *  send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+       int n;
+       Block *bp;
+       Etherarp *e;
+       Etherrock *er = ifc->arg;
+
+       /* don't arp for our initial non address */
+       if(ipcmp(ip, IPnoaddr) == 0)
+               return;
+
+       n = sizeof(Etherarp);
+       if(n < ifc->m->mintu)
+               n = ifc->m->mintu;
+       bp = allocb(n);
+       memset(bp->rp, 0, n);
+       e = (Etherarp*)bp->rp;
+       memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+       memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+       memmove(e->sha, ifc->mac, sizeof(e->sha));
+       memset(e->d, 0xff, sizeof(e->d));               /* ethernet broadcast */
+       memmove(e->s, ifc->mac, sizeof(e->s));
+
+       hnputs(e->type, ETARP);
+       hnputs(e->hrd, 1);
+       hnputs(e->pro, ETIP4);
+       e->hln = sizeof(e->sha);
+       e->pln = sizeof(e->spa);
+       hnputs(e->op, ARPREQUEST);
+       bp->wp += n;
+
+       n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+       if(n < 0)
+               print("garp: send: %r\n");
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+       int n;
+       Block *ebp, *rbp;
+       Etherarp *e, *r;
+       uchar ip[IPaddrlen];
+       static uchar eprinted[4];
+       Etherrock *er = ifc->arg;
+
+       ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+       if(ebp == nil) {
+               print("arp: rcv: %r\n");
+               return;
+       }
+
+       e = (Etherarp*)ebp->rp;
+       switch(nhgets(e->op)) {
+       default:
+               break;
+
+       case ARPREPLY:
+               /* check for machine using my ip address */
+               v4tov6(ip, e->spa);
+               if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+                       if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+                               print("arprep: 0x%E/0x%E also has ip addr %V\n",
+                                       e->s, e->sha, e->spa);
+                               break;
+                       }
+               }
+
+               /* make sure we're not entering broadcast addresses */
+               if(ipcmp(ip, ipbroadcast) == 0 ||
+                       !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+                       print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+                               e->s, e->sha, e->spa);
+                       break;
+               }
+
+               arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+               break;
+
+       case ARPREQUEST:
+               /* don't answer arps till we know who we are */
+               if(ifc->lifc == 0)
+                       break;
+
+               /* check for machine using my ip or ether address */
+               v4tov6(ip, e->spa);
+               if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+                       if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+                               if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+                                       /* print only once */
+                                       print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+                                       memmove(eprinted, e->spa, sizeof(e->spa));
+                               }
+                       }
+               } else {
+                       if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+                               print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+                               break;
+                       }
+               }
+
+               /* refresh what we know about sender */
+               arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+               /* answer only requests for our address or systems we're proxying for */
+               v4tov6(ip, e->tpa);
+               if(!iplocalonifc(ifc, ip))
+               if(!ipproxyifc(er->f, ifc, ip))
+                       break;
+
+               n = sizeof(Etherarp);
+               if(n < ifc->mintu)
+                       n = ifc->mintu;
+               rbp = allocb(n);
+               r = (Etherarp*)rbp->rp;
+               memset(r, 0, sizeof(Etherarp));
+               hnputs(r->type, ETARP);
+               hnputs(r->hrd, 1);
+               hnputs(r->pro, ETIP4);
+               r->hln = sizeof(r->sha);
+               r->pln = sizeof(r->spa);
+               hnputs(r->op, ARPREPLY);
+               memmove(r->tha, e->sha, sizeof(r->tha));
+               memmove(r->tpa, e->spa, sizeof(r->tpa));
+               memmove(r->sha, ifc->mac, sizeof(r->sha));
+               memmove(r->spa, e->tpa, sizeof(r->spa));
+               memmove(r->d, e->sha, sizeof(r->d));
+               memmove(r->s, ifc->mac, sizeof(r->s));
+               rbp->wp += n;
+
+               n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+               if(n < 0)
+                       print("arp: write: %r\n");
+       }
+       freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+       Ipifc *ifc = v;
+       Etherrock *er = ifc->arg;
+
+       er->arpp = up;
+       if(waserror()){
+               er->arpp = 0;
+               pexit("hangup", 1);
+       }
+       for(;;)
+               recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+       int x;
+
+       switch(x = ipismulticast(ip)){
+       case V4:
+               ea[0] = 0x01;
+               ea[1] = 0x00;
+               ea[2] = 0x5e;
+               ea[3] = ip[13] & 0x7f;
+               ea[4] = ip[14];
+               ea[5] = ip[15];
+               break;
+       case V6:
+               ea[0] = 0x33;
+               ea[1] = 0x33;
+               ea[2] = ip[12];
+               ea[3] = ip[13];
+               ea[4] = ip[14];
+               ea[5] = ip[15];
+               break;
+       }
+       return x;
+}
+
+/*
+ *  fill in an arp entry for broadcast or multicast
+ *  addresses.  Return the first queued packet for the
+ *  IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+       /* is it broadcast? */
+       switch(ipforme(f, a->ip)){
+       case Runi:
+               return nil;
+       case Rbcast:
+               memset(mac, 0xff, 6);
+               return arpresolve(f->arp, a, medium, mac);
+       default:
+               break;
+       }
+
+       /* if multicast, fill in mac */
+       switch(multicastea(mac, a->ip)){
+       case V4:
+       case V6:
+               return arpresolve(f->arp, a, medium, mac);
+       }
+
+       /* let arp take care of it */
+       return nil;
+}
+
+void
+ethermediumlink(void)
+{
+       addipmedium(&ethermedium);
+       addipmedium(&gbemedium);
+}
+
+
+static void 
+etherpref2addr(uchar *pref, uchar *ea)
+{
+       pref[8]  = ea[0] | 0x2;
+       pref[9]  = ea[1];
+       pref[10] = ea[2];
+       pref[11] = 0xFF;
+       pref[12] = 0xFE;
+       pref[13] = ea[3];
+       pref[14] = ea[4];
+       pref[15] = ea[5];
+}
diff --git a/kern/net/gre.c b/kern/net/gre.c
new file mode 100644 (file)
index 0000000..9610633
--- /dev/null
@@ -0,0 +1,282 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+#define DPRINT if(0)print
+
+enum
+{
+       GRE_IPONLY      = 12,           /* size of ip header */
+       GRE_IPPLUSGRE   = 12,           /* minimum size of GRE header */
+       IP_GREPROTO     = 47,
+
+       GRErxms         = 200,
+       GREtickms       = 100,
+       GREmaxxmit      = 10,
+};
+
+typedef struct GREhdr
+{
+       /* ip header */
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   len[2];         /* packet length (including headers) */
+       uchar   id[2];          /* Identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   Unused; 
+       uchar   proto;          /* Protocol */
+       uchar   cksum[2];       /* checksum */
+       uchar   src[4];         /* Ip source */
+       uchar   dst[4];         /* Ip destination */
+
+       /* gre header */
+       uchar   flags[2];
+       uchar   eproto[2];      /* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+       int             raw;                    /* Raw GRE mode */
+
+       /* non-MIB stats */
+       ulong           csumerr;                /* checksum errors */
+       ulong           lenerr;                 /* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+       Proto *p;
+       char *err;
+       Conv *tc, **cp, **ecp;
+
+       err = Fsstdconnect(c, argv, argc);
+       if(err != nil)
+               return err;
+
+       /* make sure noone's already connected to this other sys */
+       p = c->p;
+       qlock(p);
+       ecp = &p->conv[p->nc];
+       for(cp = p->conv; cp < ecp; cp++){
+               tc = *cp;
+               if(tc == nil)
+                       break;
+               if(tc == c)
+                       continue;
+               if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+                       err = "already connected to that addr/proto";
+                       ipmove(c->laddr, IPnoaddr);
+                       ipmove(c->raddr, IPnoaddr);
+                       break;
+               }
+       }
+       qunlock(p);
+
+       if(err != nil)
+               return err;
+       Fsconnected(c, nil);
+
+       return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+       c->rq = qopen(64*1024, Qmsg, 0, c);
+       c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+       USED(c);
+       return snprint(state, n, "%s", "Datagram");
+}
+
+static char*
+greannounce(Conv*, char**, int)
+{
+       return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+       qclose(c->rq);
+       qclose(c->wq);
+       qclose(c->eq);
+       ipmove(c->laddr, IPnoaddr);
+       ipmove(c->raddr, IPnoaddr);
+       c->lport = 0;
+       c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+       Conv *c = x;
+       GREhdr *ghp;
+       uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+       if(bp == nil)
+               return;
+
+       /* Make space to fit ip header (gre header already there) */
+       bp = padblock(bp, GRE_IPONLY);
+       if(bp == nil)
+               return;
+
+       /* make sure the message has a GRE header */
+       bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+       if(bp == nil)
+               return;
+
+       ghp = (GREhdr *)(bp->rp);
+       ghp->vihl = IP_VER4;
+
+       if(!((GREpriv*)c->p->priv)->raw){
+               v4tov6(raddr, ghp->dst);
+               if(ipcmp(raddr, v4prefix) == 0)
+                       memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+               v4tov6(laddr, ghp->src);
+               if(ipcmp(laddr, v4prefix) == 0){
+                       if(ipcmp(c->laddr, IPnoaddr) == 0)
+                               findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+                       memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+               }
+               hnputs(ghp->eproto, c->rport);
+       }
+
+       ghp->proto = IP_GREPROTO;
+       ghp->frag[0] = 0;
+       ghp->frag[1] = 0;
+
+       ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc*, Block *bp)
+{
+       int len;
+       GREhdr *ghp;
+       Conv *c, **p;
+       ushort eproto;
+       uchar raddr[IPaddrlen];
+       GREpriv *gpriv;
+
+       gpriv = gre->priv;
+       ghp = (GREhdr*)(bp->rp);
+
+       v4tov6(raddr, ghp->src);
+       eproto = nhgets(ghp->eproto);
+       qlock(gre);
+
+       /* Look for a conversation structure for this port and address */
+       c = nil;
+       for(p = gre->conv; *p; p++) {
+               c = *p;
+               if(c->inuse == 0)
+                       continue;
+               if(c->rport == eproto && 
+                       (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+                       break;
+       }
+
+       if(*p == nil) {
+               qunlock(gre);
+               freeblist(bp);
+               return;
+       }
+
+       qunlock(gre);
+
+       /*
+        * Trim the packet down to data size
+        */
+       len = nhgets(ghp->len) - GRE_IPONLY;
+       if(len < GRE_IPPLUSGRE){
+               freeblist(bp);
+               return;
+       }
+       bp = trimblock(bp, GRE_IPONLY, len);
+       if(bp == nil){
+               gpriv->lenerr++;
+               return;
+       }
+
+       /*
+        *  Can't delimit packet so pull it all into one block.
+        */
+       if(qlen(c->rq) > 64*1024)
+               freeblist(bp);
+       else{
+               bp = concatblock(bp);
+               if(bp == 0)
+                       panic("greiput");
+               qpass(c->rq, bp);
+       }
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+       GREpriv *gpriv;
+
+       gpriv = gre->priv;
+
+       return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+       GREpriv *gpriv;
+
+       gpriv = c->p->priv;
+       if(n == 1){
+               if(strcmp(f[0], "raw") == 0){
+                       gpriv->raw = 1;
+                       return nil;
+               }
+               else if(strcmp(f[0], "cooked") == 0){
+                       gpriv->raw = 0;
+                       return nil;
+               }
+       }
+       return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+       Proto *gre;
+
+       gre = smalloc(sizeof(Proto));
+       gre->priv = smalloc(sizeof(GREpriv));
+       gre->name = "gre";
+       gre->connect = greconnect;
+       gre->announce = greannounce;
+       gre->state = grestate;
+       gre->create = grecreate;
+       gre->close = greclose;
+       gre->rcv = greiput;
+       gre->ctl = grectl;
+       gre->advise = nil;
+       gre->stats = grestats;
+       gre->ipproto = IP_GREPROTO;
+       gre->nc = 64;
+       gre->ptclsize = 0;
+
+       Fsproto(fs, gre);
+}
diff --git a/kern/net/icmp.c b/kern/net/icmp.c
new file mode 100644 (file)
index 0000000..53eaf37
--- /dev/null
@@ -0,0 +1,490 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   length[2];      /* packet length */
+       uchar   id[2];          /* Identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   ttl;            /* Time to live */
+       uchar   proto;          /* Protocol */
+       uchar   ipcksum[2];     /* Header checksum */
+       uchar   src[4];         /* Ip source */
+       uchar   dst[4];         /* Ip destination */
+       uchar   type;
+       uchar   code;
+       uchar   cksum[2];
+       uchar   icmpid[2];
+       uchar   seq[2];
+       uchar   data[1];
+} Icmp;
+
+enum {                 /* Packet Types */
+       EchoReply       = 0,
+       Unreachable     = 3,
+       SrcQuench       = 4,
+       Redirect        = 5,
+       EchoRequest     = 8,
+       TimeExceed      = 11,
+       InParmProblem   = 12,
+       Timestamp       = 13,
+       TimestampReply  = 14,
+       InfoRequest     = 15,
+       InfoReply       = 16,
+       AddrMaskRequest = 17,
+       AddrMaskReply   = 18,
+
+       Maxtype         = 18,
+};
+
+enum
+{
+       MinAdvise       = 24,   /* minimum needed for us to advise another protocol */ 
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply]            "EchoReply",
+[Unreachable]          "Unreachable",
+[SrcQuench]            "SrcQuench",
+[Redirect]             "Redirect",
+[EchoRequest]          "EchoRequest",
+[TimeExceed]           "TimeExceed",
+[InParmProblem]                "InParmProblem",
+[Timestamp]            "Timestamp",
+[TimestampReply]       "TimestampReply",
+[InfoRequest]          "InfoRequest",
+[InfoReply]            "InfoReply",
+[AddrMaskRequest]      "AddrMaskRequest",
+[AddrMaskReply  ]      "AddrMaskReply  ",
+};
+
+enum {
+       IP_ICMPPROTO    = 1,
+       ICMP_IPSIZE     = 20,
+       ICMP_HDRSIZE    = 8,
+};
+
+enum
+{
+       InMsgs,
+       InErrors,
+       OutMsgs,
+       CsumErrs,
+       LenErrs,
+       HlenErrs,
+
+       Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs]       "InMsgs",
+[InErrors]     "InErrors",
+[OutMsgs]      "OutMsgs",
+[CsumErrs]     "CsumErrs",
+[LenErrs]      "LenErrs",
+[HlenErrs]     "HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+       ulong   stats[Nstats];
+
+       /* message counts */
+       ulong   in[Maxtype+1];
+       ulong   out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+       c->rq = qopen(64*1024, Qmsg, 0, c);
+       c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+       char *e;
+
+       e = Fsstdconnect(c, argv, argc);
+       if(e != nil)
+               return e;
+       Fsconnected(c, e);
+
+       return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+       USED(c);
+       return snprint(state, n, "%s qin %d qout %d",
+               "Datagram",
+               c->rq ? qlen(c->rq) : 0,
+               c->wq ? qlen(c->wq) : 0
+       );
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+       char *e;
+
+       e = Fsstdannounce(c, argv, argc);
+       if(e != nil)
+               return e;
+       Fsconnected(c, nil);
+
+       return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+       qclose(c->rq);
+       qclose(c->wq);
+       ipmove(c->laddr, IPnoaddr);
+       ipmove(c->raddr, IPnoaddr);
+       c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+       Conv *c = x;
+       Icmp *p;
+       Icmppriv *ipriv;
+
+       if(bp == nil)
+               return;
+
+       if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+               freeblist(bp);
+               return;
+       }
+       p = (Icmp *)(bp->rp);
+       p->vihl = IP_VER4;
+       ipriv = c->p->priv;
+       if(p->type <= Maxtype)  
+               ipriv->out[p->type]++;
+       
+       v6tov4(p->dst, c->raddr);
+       v6tov4(p->src, c->laddr);
+       p->proto = IP_ICMPPROTO;
+       hnputs(p->icmpid, c->lport);
+       memset(p->cksum, 0, sizeof(p->cksum));
+       hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+       ipriv->stats[OutMsgs]++;
+       ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+       Block   *nbp;
+       Icmp    *p, *np;
+
+       p = (Icmp *)bp->rp;
+
+       netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+       nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+       nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+       np = (Icmp *)nbp->rp;
+       np->vihl = IP_VER4;
+       memmove(np->dst, p->src, sizeof(np->dst));
+       v6tov4(np->src, ia);
+       memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+       np->type = TimeExceed;
+       np->code = 0;
+       np->proto = IP_ICMPPROTO;
+       hnputs(np->icmpid, 0);
+       hnputs(np->seq, 0);
+       memset(np->cksum, 0, sizeof(np->cksum));
+       hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+       ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+       Block   *nbp;
+       Icmp    *p, *np;
+       int     i;
+       uchar   addr[IPaddrlen];
+
+       p = (Icmp *)bp->rp;
+
+       /* only do this for unicast sources and destinations */
+       v4tov6(addr, p->dst);
+       i = ipforme(f, addr);
+       if((i&Runi) == 0)
+               return;
+       v4tov6(addr, p->src);
+       i = ipforme(f, addr);
+       if(i != 0 && (i&Runi) == 0)
+               return;
+
+       netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+       nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+       nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+       np = (Icmp *)nbp->rp;
+       np->vihl = IP_VER4;
+       memmove(np->dst, p->src, sizeof(np->dst));
+       memmove(np->src, p->dst, sizeof(np->src));
+       memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+       np->type = Unreachable;
+       np->code = code;
+       np->proto = IP_ICMPPROTO;
+       hnputs(np->icmpid, 0);
+       hnputs(np->seq, seq);
+       memset(np->cksum, 0, sizeof(np->cksum));
+       hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+       ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+       icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+       icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+       Conv    **c, *s;
+       Icmp    *p;
+       uchar   dst[IPaddrlen];
+       ushort  recid;
+
+       p = (Icmp *) bp->rp;
+       v4tov6(dst, p->src);
+       recid = nhgets(p->icmpid);
+
+       for(c = icmp->conv; *c; c++) {
+               s = *c;
+               if(s->lport == recid)
+               if(ipcmp(s->raddr, dst) == 0){
+                       bp = concatblock(bp);
+                       if(bp != nil)
+                               qpass(s->rq, bp);
+                       return;
+               }
+       }
+       freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+       Icmp    *q;
+       uchar   ip[4];
+
+       q = (Icmp *)bp->rp;
+       q->vihl = IP_VER4;
+       memmove(ip, q->src, sizeof(q->dst));
+       memmove(q->src, q->dst, sizeof(q->src));
+       memmove(q->dst, ip,  sizeof(q->dst));
+       q->type = EchoReply;
+       memset(q->cksum, 0, sizeof(q->cksum));
+       hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+       return bp;
+}
+
+static char *unreachcode[] =
+{
+[0]    "net unreachable",
+[1]    "host unreachable",
+[2]    "protocol unreachable",
+[3]    "port unreachable",
+[4]    "fragmentation needed and DF set",
+[5]    "source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc*, Block *bp)
+{
+       int     n, iplen;
+       Icmp    *p;
+       Block   *r;
+       Proto   *pr;
+       char    *msg;
+       char    m2[128];
+       Icmppriv *ipriv;
+
+       ipriv = icmp->priv;
+       
+       ipriv->stats[InMsgs]++;
+
+       p = (Icmp *)bp->rp;
+       netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+       n = blocklen(bp);
+       if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+               ipriv->stats[InErrors]++;
+               ipriv->stats[HlenErrs]++;
+               netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+               goto raise;
+       }
+       iplen = nhgets(p->length);
+       if(iplen > n || (iplen % 1)){
+               ipriv->stats[LenErrs]++;
+               ipriv->stats[InErrors]++;
+               netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+               goto raise;
+       }
+       if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+               ipriv->stats[InErrors]++;
+               ipriv->stats[CsumErrs]++;
+               netlog(icmp->f, Logicmp, "icmp checksum error\n");
+               goto raise;
+       }
+       if(p->type <= Maxtype)
+               ipriv->in[p->type]++;
+
+       switch(p->type) {
+       case EchoRequest:
+               if (iplen < n)
+                       bp = trimblock(bp, 0, iplen);
+               r = mkechoreply(bp);
+               ipriv->out[EchoReply]++;
+               ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+               break;
+       case Unreachable:
+               if(p->code > 5)
+                       msg = unreachcode[1];
+               else
+                       msg = unreachcode[p->code];
+
+               bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+               if(blocklen(bp) < MinAdvise){
+                       ipriv->stats[LenErrs]++;
+                       goto raise;
+               }
+               p = (Icmp *)bp->rp;
+               pr = Fsrcvpcolx(icmp->f, p->proto);
+               if(pr != nil && pr->advise != nil) {
+                       (*pr->advise)(pr, bp, msg);
+                       return;
+               }
+
+               bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+               goticmpkt(icmp, bp);
+               break;
+       case TimeExceed:
+               if(p->code == 0){
+                       sprint(m2, "ttl exceeded at %V", p->src);
+
+                       bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+                       if(blocklen(bp) < MinAdvise){
+                               ipriv->stats[LenErrs]++;
+                               goto raise;
+                       }
+                       p = (Icmp *)bp->rp;
+                       pr = Fsrcvpcolx(icmp->f, p->proto);
+                       if(pr != nil && pr->advise != nil) {
+                               (*pr->advise)(pr, bp, m2);
+                               return;
+                       }
+                       bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+               }
+
+               goticmpkt(icmp, bp);
+               break;
+       default:
+               goticmpkt(icmp, bp);
+               break;
+       }
+       return;
+
+raise:
+       freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+       Conv    **c, *s;
+       Icmp    *p;
+       uchar   dst[IPaddrlen];
+       ushort  recid;
+
+       p = (Icmp *) bp->rp;
+       v4tov6(dst, p->dst);
+       recid = nhgets(p->icmpid);
+
+       for(c = icmp->conv; *c; c++) {
+               s = *c;
+               if(s->lport == recid)
+               if(ipcmp(s->raddr, dst) == 0){
+                       qhangup(s->rq, msg);
+                       qhangup(s->wq, msg);
+                       break;
+               }
+       }
+       freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+       Icmppriv *priv;
+       char *p, *e;
+       int i;
+
+       priv = icmp->priv;
+       p = buf;
+       e = p+len;
+       for(i = 0; i < Nstats; i++)
+               p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+       for(i = 0; i <= Maxtype; i++){
+               if(icmpnames[i])
+                       p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+               else
+                       p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+       }
+       return p - buf;
+}
+       
+void
+icmpinit(Fs *fs)
+{
+       Proto *icmp;
+
+       icmp = smalloc(sizeof(Proto));
+       icmp->priv = smalloc(sizeof(Icmppriv));
+       icmp->name = "icmp";
+       icmp->connect = icmpconnect;
+       icmp->announce = icmpannounce;
+       icmp->state = icmpstate;
+       icmp->create = icmpcreate;
+       icmp->close = icmpclose;
+       icmp->rcv = icmpiput;
+       icmp->stats = icmpstats;
+       icmp->ctl = nil;
+       icmp->advise = icmpadvise;
+       icmp->gc = nil;
+       icmp->ipproto = IP_ICMPPROTO;
+       icmp->nc = 128;
+       icmp->ptclsize = 0;
+
+       Fsproto(fs, icmp);
+}
diff --git a/kern/net/icmp6.c b/kern/net/icmp6.c
new file mode 100644 (file)
index 0000000..bca78a3
--- /dev/null
@@ -0,0 +1,917 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+       uchar   type;
+       uchar   code;
+       uchar   cksum[2];
+       uchar   icmpid[2];
+       uchar   seq[2];
+};
+
+struct IPICMP {
+       Ip6hdr;
+       ICMPpkt;
+};
+
+struct NdiscC
+{
+       IPICMP;
+       uchar target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+       NdiscC;
+       uchar otype;
+       uchar olen;     // length in units of 8 octets(incl type, code),
+                               // 1 for IEEE 802 addresses
+       uchar lnaddr[6];        // link-layer address
+};
+
+enum { 
+       // ICMPv6 types
+       EchoReply       = 0,
+       UnreachableV6   = 1,
+       PacketTooBigV6  = 2,
+       TimeExceedV6    = 3,
+       SrcQuench       = 4,
+       ParamProblemV6  = 4,
+       Redirect        = 5,
+       EchoRequest     = 8,
+       TimeExceed      = 11,
+       InParmProblem   = 12,
+       Timestamp       = 13,
+       TimestampReply  = 14,
+       InfoRequest     = 15,
+       InfoReply       = 16,
+       AddrMaskRequest = 17,
+       AddrMaskReply   = 18,
+       EchoRequestV6   = 128,
+       EchoReplyV6     = 129,
+       RouterSolicit   = 133,
+       RouterAdvert    = 134,
+       NbrSolicit      = 135,
+       NbrAdvert       = 136,
+       RedirectV6      = 137,
+
+       Maxtype6        = 137,
+};
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply]            "EchoReply",
+[UnreachableV6]                "UnreachableV6",
+[PacketTooBigV6]       "PacketTooBigV6",
+[TimeExceedV6]         "TimeExceedV6",
+[SrcQuench]            "SrcQuench",
+[Redirect]             "Redirect",
+[EchoRequest]          "EchoRequest",
+[TimeExceed]           "TimeExceed",
+[InParmProblem]                "InParmProblem",
+[Timestamp]            "Timestamp",
+[TimestampReply]       "TimestampReply",
+[InfoRequest]          "InfoRequest",
+[InfoReply]            "InfoReply",
+[AddrMaskRequest]      "AddrMaskRequest",
+[AddrMaskReply]                "AddrMaskReply",
+[EchoRequestV6]                "EchoRequestV6",
+[EchoReplyV6]          "EchoReplyV6",
+[RouterSolicit]                "RouterSolicit",
+[RouterAdvert]         "RouterAdvert",
+[NbrSolicit]           "NbrSolicit",
+[NbrAdvert]            "NbrAdvert",
+[RedirectV6]           "RedirectV6",
+};
+
+enum
+{
+       InMsgs6,
+       InErrors6,
+       OutMsgs6,
+       CsumErrs6,
+       LenErrs6,
+       HlenErrs6,
+       HoplimErrs6,
+       IcmpCodeErrs6,
+       TargetErrs6,
+       OptlenErrs6,
+       AddrmxpErrs6,
+       RouterAddrErrs6,
+
+       Nstats6,
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6]      "InMsgs",
+[InErrors6]    "InErrors",
+[OutMsgs6]     "OutMsgs",
+[CsumErrs6]    "CsumErrs",
+[LenErrs6]     "LenErrs",
+[HlenErrs6]    "HlenErrs",
+[HoplimErrs6]  "HoplimErrs",
+[IcmpCodeErrs6]        "IcmpCodeErrs",
+[TargetErrs6]  "TargetErrs",
+[OptlenErrs6]  "OptlenErrs",
+[AddrmxpErrs6] "AddrmxpErrs",
+[RouterAddrErrs6]      "RouterAddrErrs",
+};
+
+typedef struct Icmppriv6
+{
+       ulong   stats[Nstats6];
+
+       /* message counts */
+       ulong   in[Maxtype6+1];
+       ulong   out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6 
+{
+       QLock;
+       uchar headers;
+} Icmpcb6;
+
+static char *unreachcode[] =
+{
+[icmp6_no_route]       "no route to destination",
+[icmp6_ad_prohib]      "comm with destination administratively prohibited",
+[icmp6_unassigned]     "icmp unreachable: unassigned error code (2)",
+[icmp6_adr_unreach]    "address unreachable",
+[icmp6_port_unreach]   "port unreachable",
+[icmp6_unkn_code]      "icmp unreachable: unknown code",
+};
+
+enum {
+       ICMP_USEAD6     = 40,
+};
+
+enum {
+       Oflag   = 1<<5,
+       Sflag   = 1<<6,
+       Rflag   = 1<<7,
+};
+
+enum {
+       slladd  = 1,
+       tlladd  = 2,
+       prfinfo = 3,
+       redhdr  = 4,
+       mtuopt  = 5,
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+       c->rq = qopen(64*1024, Qmsg, 0, c);
+       c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+       IPICMP *p = (IPICMP *)(bp->rp);
+
+       hnputl(p->vcf, 0);  // borrow IP header as pseudoheader
+       hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+       p->proto = 0;
+       p->ttl = ICMPv6;        // ttl gets set later
+       hnputs(p->cksum, 0);
+       hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+       p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+       Block   *nbp;
+       nbp = allocb(packetlen);
+       nbp->wp += packetlen;
+       memset(nbp->rp, 0, packetlen);
+       return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+       Conv    **c, *s;
+       IPICMP  *p;
+       ushort  recid;
+
+       p = (IPICMP *) bp->rp;
+       recid = nhgets(p->icmpid);
+
+       for(c = icmp->conv; *c; c++) {
+               s = *c;
+               if(s->lport == recid)
+               if(ipcmp(s->raddr, p->dst) == 0){
+                       qhangup(s->rq, msg);
+                       qhangup(s->wq, msg);
+                       break;
+               }
+       }
+       freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+       Conv *c = x;
+       IPICMP *p;
+       uchar laddr[IPaddrlen], raddr[IPaddrlen];
+       Icmppriv6 *ipriv = c->p->priv;
+       Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+       if(bp == nil)
+               return;
+
+       if(icb->headers==6) {
+               /* get user specified addresses */
+               bp = pullupblock(bp, ICMP_USEAD6);
+               if(bp == nil)
+                       return;
+               bp->rp += 8;
+               ipmove(laddr, bp->rp);
+               bp->rp += IPaddrlen;
+               ipmove(raddr, bp->rp);
+               bp->rp += IPaddrlen;
+               bp = padblock(bp, sizeof(Ip6hdr));
+       }
+
+       if(blocklen(bp) < sizeof(IPICMP)){
+               freeblist(bp);
+               return;
+       }
+       p = (IPICMP *)(bp->rp);
+       if(icb->headers == 6) {
+               ipmove(p->dst, raddr);
+               ipmove(p->src, laddr);
+       } else {
+               ipmove(p->dst, c->raddr);
+               ipmove(p->src, c->laddr);
+               hnputs(p->icmpid, c->lport);
+       }
+
+       set_cksum(bp);
+       p->vcf[0] = 0x06 << 4;
+       if(p->type <= Maxtype6) 
+               ipriv->out[p->type]++;
+       ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+       Icmpcb6 *icb;
+
+       icb = (Icmpcb6*) c->ptcl;
+
+       if(argc==1) {
+               if(strcmp(argv[0], "headers")==0) {
+                       icb->headers = 6;
+                       return nil;
+               }
+       }
+       return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+       Conv    **c, *s;
+       IPICMP  *p = (IPICMP *)bp->rp;
+       ushort  recid; 
+       uchar   *addr;
+
+       if(muxkey == 0) {
+               recid = nhgets(p->icmpid);
+               addr = p->src;
+       }
+       else {
+               recid = muxkey;
+               addr = p->dst;
+       }
+
+       for(c = icmp->conv; *c; c++){
+               s = *c;
+               if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+                       bp = concatblock(bp);
+                       if(bp != nil)
+                               qpass(s->rq, bp);
+                       return;
+               }
+       }
+
+       freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp)
+{
+       IPICMP *p = (IPICMP *)(bp->rp);
+       uchar   addr[IPaddrlen];
+
+       ipmove(addr, p->src);
+       ipmove(p->src, p->dst);
+       ipmove(p->dst, addr);
+       p->type = EchoReplyV6;
+       set_cksum(bp);
+       return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ *     suni == SRC_UNSPEC or SRC_UNI, 
+ *     tuni == TARG_MULTI => multicast for address resolution,
+ *     and tuni == TARG_UNI => neighbor reachability.
+ */
+
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+       Block   *nbp;
+       Ndpkt *np;
+       Proto *icmp = f->t2p[ICMPv6];
+       Icmppriv6 *ipriv = icmp->priv;
+
+
+       nbp = newIPICMP(sizeof(Ndpkt));
+       np = (Ndpkt*) nbp->rp;
+
+
+       if(suni == SRC_UNSPEC) 
+               memmove(np->src, v6Unspecified, IPaddrlen);
+       else 
+               memmove(np->src, src, IPaddrlen);
+
+       if(tuni == TARG_UNI)
+               memmove(np->dst, targ, IPaddrlen);
+       else
+               ipv62smcast(np->dst, targ);
+
+       np->type = NbrSolicit;
+       np->code = 0;
+       memmove(np->target, targ, IPaddrlen);
+       if(suni != SRC_UNSPEC) {
+               np->otype = SRC_LLADDRESS;
+               np->olen = 1;   /* 1+1+6 = 8 = 1 8-octet */
+               memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+       }
+       else {
+               int r = sizeof(Ndpkt)-sizeof(NdiscC);
+               nbp->wp -= r;
+       }
+
+       set_cksum(nbp);
+       np = (Ndpkt*) nbp->rp;
+       np->ttl = HOP_LIMIT;
+       np->vcf[0] = 0x06 << 4;
+       ipriv->out[NbrSolicit]++;
+       netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+       ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+       Block   *nbp;
+       Ndpkt *np;
+       Proto *icmp = f->t2p[ICMPv6];
+       Icmppriv6 *ipriv = icmp->priv;
+
+       nbp = newIPICMP(sizeof(Ndpkt));
+       np = (Ndpkt*) nbp->rp;
+
+       memmove(np->src, src, IPaddrlen);
+       memmove(np->dst, dst, IPaddrlen);
+
+       np->type = NbrAdvert;
+       np->code = 0;
+       np->icmpid[0] = flags;
+       memmove(np->target, targ, IPaddrlen);
+
+       np->otype = TARGET_LLADDRESS;
+       np->olen = 1;   
+       memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+       set_cksum(nbp);
+       np = (Ndpkt*) nbp->rp;
+       np->ttl = HOP_LIMIT;
+       np->vcf[0] = 0x06 << 4;
+       ipriv->out[NbrAdvert]++;
+       netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+       ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+       Block *nbp;
+       IPICMP *np;
+       Ip6hdr  *p;
+       int osz = BLEN(bp);
+       int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+       Proto   *icmp = f->t2p[ICMPv6];
+       Icmppriv6 *ipriv = icmp->priv;
+
+       p = (Ip6hdr *) bp->rp;
+
+       if(isv6mcast(p->src)) 
+               goto clean;
+
+       nbp = newIPICMP(sz);
+       np = (IPICMP *) nbp->rp;
+
+       rlock(ifc);
+       if(ipv6anylocal(ifc, np->src)) {
+               netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
+       }
+       else {
+               netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
+               freeblist(nbp);
+               if(free) 
+                       goto clean;
+               else
+                       return;
+       }
+
+       memmove(np->dst, p->src, IPaddrlen);
+       np->type = UnreachableV6;
+       np->code = code;
+       memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+       set_cksum(nbp);
+       np->ttl = HOP_LIMIT;
+       np->vcf[0] = 0x06 << 4;
+       ipriv->out[UnreachableV6]++;
+
+       if(free)
+               ipiput6(f, ifc, nbp);
+       else {
+               ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+               return;
+       }
+
+clean:
+       runlock(ifc);
+       freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+       Block *nbp;
+       IPICMP *np;
+       Ip6hdr  *p;
+       int osz = BLEN(bp);
+       int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+       Proto   *icmp = f->t2p[ICMPv6];
+       Icmppriv6 *ipriv = icmp->priv;
+
+       p = (Ip6hdr *) bp->rp;
+
+       if(isv6mcast(p->src)) 
+               return;
+
+       nbp = newIPICMP(sz);
+       np = (IPICMP *) nbp->rp;
+
+       if(ipv6anylocal(ifc, np->src)) {
+               netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
+       }
+       else {
+               netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
+               return;
+       }
+
+       memmove(np->dst, p->src, IPaddrlen);
+       np->type = TimeExceedV6;
+       np->code = 0;
+       memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+       set_cksum(nbp);
+       np->ttl = HOP_LIMIT;
+       np->vcf[0] = 0x06 << 4;
+       ipriv->out[TimeExceedV6]++;
+       ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+       Block *nbp;
+       IPICMP *np;
+       Ip6hdr  *p;
+       int osz = BLEN(bp);
+       int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+       Proto   *icmp = f->t2p[ICMPv6];
+       Icmppriv6 *ipriv = icmp->priv;
+
+       p = (Ip6hdr *) bp->rp;
+
+       if(isv6mcast(p->src)) 
+               return;
+
+       nbp = newIPICMP(sz);
+       np = (IPICMP *) nbp->rp;
+
+       if(ipv6anylocal(ifc, np->src)) {
+               netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
+       }
+       else {
+               netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
+               return;
+       }
+
+       memmove(np->dst, p->src, IPaddrlen);
+       np->type = PacketTooBigV6;
+       np->code = 0;
+       hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+       memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+       set_cksum(nbp);
+       np->ttl = HOP_LIMIT;
+       np->vcf[0] = 0x06 << 4;
+       ipriv->out[PacketTooBigV6]++;
+       ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
+       int     sz, osz, unsp, n, ttl, iplen;
+       int     pktsz = BLEN(bp);
+       uchar   *packet = bp->rp;
+       IPICMP  *p = (IPICMP *) packet;
+       Ndpkt   *np;
+
+       USED(ifc);
+       n = blocklen(bp);
+       if(n < sizeof(IPICMP)) {
+               ipriv->stats[HlenErrs6]++;
+               netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+               goto err;
+       }
+
+       iplen = nhgets(p->ploadlen);
+       if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
+               ipriv->stats[LenErrs6]++;
+               netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+               goto err;
+       }
+
+       // Rather than construct explicit pseudoheader, overwrite IPv6 header
+       if(p->proto != ICMPv6) {
+               // This code assumes no extension headers!!!
+               netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+               goto err;
+       }
+       memset(packet, 0, 4);
+       ttl = p->ttl;
+       p->ttl = p->proto;
+       p->proto = 0;
+       if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+               ipriv->stats[CsumErrs6]++;
+               netlog(icmp->f, Logicmp, "icmp checksum error\n");
+               goto err;
+       }
+       p->proto = p->ttl;
+       p->ttl = ttl;
+
+       /* additional tests for some pkt types */
+       if( (p->type == NbrSolicit) ||
+               (p->type == NbrAdvert) ||
+               (p->type == RouterAdvert) ||
+               (p->type == RouterSolicit) ||
+               (p->type == RedirectV6) ) {
+
+               if(p->ttl != HOP_LIMIT) {
+                       ipriv->stats[HoplimErrs6]++; 
+                       goto err; 
+               }
+               if(p->code != 0) {
+                       ipriv->stats[IcmpCodeErrs6]++; 
+                       goto err; 
+               }
+
+               switch (p->type) {
+               case NbrSolicit:
+               case NbrAdvert:
+                       np = (Ndpkt*) p;
+                       if(isv6mcast(np->target)) {
+                               ipriv->stats[TargetErrs6]++; 
+                               goto err; 
+                       }
+                       if(optexsts(np) && (np->olen == 0)) {
+                               ipriv->stats[OptlenErrs6]++; 
+                               goto err; 
+                       }
+               
+                       if(p->type == NbrSolicit) {
+                               if(ipcmp(np->src, v6Unspecified) == 0) { 
+                                       if(!issmcast(np->dst) || optexsts(np))  {
+                                               ipriv->stats[AddrmxpErrs6]++; 
+                                               goto err;
+                                       }
+                               }
+                       }
+               
+                       if(p->type == NbrAdvert) {
+                               if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
+                                       ipriv->stats[AddrmxpErrs6]++; 
+                                       goto err; 
+                               }
+                       }
+                       break;
+       
+               case RouterAdvert:
+                       if(pktsz - sizeof(Ip6hdr) < 16) {
+                               ipriv->stats[HlenErrs6]++; 
+                               goto err; 
+                       }
+                       if(!islinklocal(p->src)) {
+                               ipriv->stats[RouterAddrErrs6]++; 
+                               goto err; 
+                       }
+                       sz = sizeof(IPICMP) + 8;
+                       while ((sz+1) < pktsz) {
+                               osz = *(packet+sz+1);
+                               if(osz <= 0) {
+                                       ipriv->stats[OptlenErrs6]++; 
+                                       goto err; 
+                               }       
+                               sz += 8*osz;
+                       }
+                       break;
+       
+               case RouterSolicit:
+                       if(pktsz - sizeof(Ip6hdr) < 8) {
+                               ipriv->stats[HlenErrs6]++; 
+                               goto err; 
+                       }
+                       unsp = (ipcmp(p->src, v6Unspecified) == 0);
+                       sz = sizeof(IPICMP) + 8;
+                       while ((sz+1) < pktsz) {
+                               osz = *(packet+sz+1);
+                               if((osz <= 0) ||
+                                       (unsp && (*(packet+sz) == slladd)) ) {
+                                       ipriv->stats[OptlenErrs6]++; 
+                                       goto err; 
+                               }
+                               sz += 8*osz;
+                       }
+                       break;
+       
+               case RedirectV6:
+                       //to be filled in
+                       break;
+       
+               default:
+                       goto err;
+               }
+       }
+
+       return 1;
+
+err:
+       ipriv->stats[InErrors6]++; 
+       return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+       Iplifc *lifc;
+       int t;
+
+       rlock(ifc);
+       if(ipproxyifc(f, ifc, target)) {
+               runlock(ifc);
+               return t_uniproxy;
+       }
+
+       for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+               if(ipcmp(lifc->local, target) == 0) {
+                       t = (lifc->tentative) ? t_unitent : t_unirany; 
+                       runlock(ifc);
+                       return t;
+               }
+       }
+
+       runlock(ifc);
+       return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+       uchar   *packet = bp->rp;
+       IPICMP  *p = (IPICMP *)packet;
+       Icmppriv6 *ipriv = icmp->priv;
+       Block   *r;
+       Proto   *pr;
+       char    *msg, m2[128];
+       Ndpkt* np;
+       uchar pktflags;
+       uchar lsrc[IPaddrlen];
+       int refresh = 1;
+       Iplifc *lifc;
+
+       if(!valid(icmp, ipifc, bp, ipriv)) 
+               goto raise;
+
+       if(p->type <= Maxtype6)
+               ipriv->in[p->type]++;
+       else
+               goto raise;
+
+       switch(p->type) {
+       case EchoRequestV6:
+               r = mkechoreply6(bp);
+               ipriv->out[EchoReply]++;
+               ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+               break;
+
+       case UnreachableV6:
+               if(p->code > 4)
+                       msg = unreachcode[icmp6_unkn_code];
+               else
+                       msg = unreachcode[p->code];
+
+               bp->rp += sizeof(IPICMP);
+               if(blocklen(bp) < 8){
+                       ipriv->stats[LenErrs6]++;
+                       goto raise;
+               }
+               p = (IPICMP *)bp->rp;
+               pr = Fsrcvpcolx(icmp->f, p->proto);
+               if(pr != nil && pr->advise != nil) {
+                       (*pr->advise)(pr, bp, msg);
+                       return;
+               }
+
+               bp->rp -= sizeof(IPICMP);
+               goticmpkt6(icmp, bp, 0);
+               break;
+
+       case TimeExceedV6:
+               if(p->code == 0){
+                       sprint(m2, "ttl exceeded at %I", p->src);
+
+                       bp->rp += sizeof(IPICMP);
+                       if(blocklen(bp) < 8){
+                               ipriv->stats[LenErrs6]++;
+                               goto raise;
+                       }
+                       p = (IPICMP *)bp->rp;
+                       pr = Fsrcvpcolx(icmp->f, p->proto);
+                       if(pr != nil && pr->advise != nil) {
+                               (*pr->advise)(pr, bp, m2);
+                               return;
+                       }
+                       bp->rp -= sizeof(IPICMP);
+               }
+
+               goticmpkt6(icmp, bp, 0);
+               break;
+
+       case RouterAdvert:
+       case RouterSolicit:
+               /* using lsrc as a temp, munge hdr for goticmp6 
+               memmove(lsrc, p->src, IPaddrlen);
+               memmove(p->src, p->dst, IPaddrlen);
+               memmove(p->dst, lsrc, IPaddrlen); */
+
+               goticmpkt6(icmp, bp, p->type);
+               break;
+
+       case NbrSolicit:
+               np = (Ndpkt*) p;
+               pktflags = 0;
+               switch (targettype(icmp->f, ipifc, np->target)) {
+               case t_unirany:
+                       pktflags |= Oflag;
+                       /* fall through */
+
+               case t_uniproxy: 
+                       if(ipcmp(np->src, v6Unspecified) != 0) {
+                               arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+                               pktflags |= Sflag;
+                       }
+                       if(ipv6local(ipifc, lsrc)) {
+                               icmpna(icmp->f, lsrc, 
+                                  (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
+                                  np->target, ipifc->mac, pktflags); 
+                       }
+                       else
+                               freeblist(bp);
+                       break;
+
+               case t_unitent:
+                       /* not clear what needs to be done. send up
+                        * an icmp mesg saying don't use this address? */
+
+               default:
+                       freeblist(bp);
+               }
+
+               break;
+
+       case NbrAdvert:
+               np = (Ndpkt*) p;
+
+               /* if the target address matches one of the local interface 
+                * address and the local interface address has tentative bit set, 
+                * then insert into ARP table. this is so the duplication address 
+                * detection part of ipconfig can discover duplication through 
+                * the arp table
+                */
+               lifc = iplocalonifc(ipifc, np->target);
+               if(lifc && lifc->tentative)
+                       refresh = 0;
+               arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+               freeblist(bp);
+               break;
+
+       case PacketTooBigV6:
+
+       default:
+               goticmpkt6(icmp, bp, 0);
+               break;
+       }
+       return;
+
+raise:
+       freeblist(bp);
+
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+       Icmppriv6 *priv;
+       char *p, *e;
+       int i;
+
+       priv = icmp6->priv;
+       p = buf;
+       e = p+len;
+       for(i = 0; i < Nstats6; i++)
+               p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+       for(i = 0; i <= Maxtype6; i++){
+               if(icmpnames6[i])
+                       p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
+/*             else
+                       p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+*/
+       }
+       return p - buf;
+}
+
+
+// need to import from icmp.c
+extern int     icmpstate(Conv *c, char *state, int n);
+extern char*   icmpannounce(Conv *c, char **argv, int argc);
+extern char*   icmpconnect(Conv *c, char **argv, int argc);
+extern void    icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+       Proto *icmp6 = smalloc(sizeof(Proto));
+
+       icmp6->priv = smalloc(sizeof(Icmppriv6));
+       icmp6->name = "icmpv6";
+       icmp6->connect = icmpconnect;
+       icmp6->announce = icmpannounce;
+       icmp6->state = icmpstate;
+       icmp6->create = icmpcreate6;
+       icmp6->close = icmpclose;
+       icmp6->rcv = icmpiput6;
+       icmp6->stats = icmpstats6;
+       icmp6->ctl = icmpctl6;
+       icmp6->advise = icmpadvise6;
+       icmp6->gc = nil;
+       icmp6->ipproto = ICMPv6;
+       icmp6->nc = 16;
+       icmp6->ptclsize = sizeof(Icmpcb6);
+
+       Fsproto(fs, icmp6);
+}
+
diff --git a/kern/net/igmp.c b/kern/net/igmp.c
new file mode 100644 (file)
index 0000000..109df30
--- /dev/null
@@ -0,0 +1,291 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+       IGMP_IPHDRSIZE  = 20,           /* size of ip header */
+       IGMP_HDRSIZE    = 8,            /* size of IGMP header */
+       IP_IGMPPROTO    = 2,
+
+       IGMPquery       = 1,
+       IGMPreport      = 2,
+
+       MSPTICK         = 100,
+       MAXTIMEOUT      = 10000/MSPTICK,        /* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+struct IGMPpkt
+{
+       /* ip header */
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   len[2];         /* packet length (including headers) */
+       uchar   id[2];          /* Identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   Unused; 
+       uchar   proto;          /* Protocol */
+       uchar   cksum[2];       /* checksum of ip portion */
+       uchar   src[IPaddrlen];         /* Ip source */
+       uchar   dst[IPaddrlen];         /* Ip destination */
+
+       /* igmp header */
+       uchar   vertype;        /* version and type */
+       uchar   unused;
+       uchar   igmpcksum[2];           /* checksum of igmp portion */
+       uchar   group[IPaddrlen];       /* multicast group */
+};
+
+/*
+ *  lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+       IGMPrep         *next;
+       Media           *m;
+       int             ticks;
+       Multicast       *multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+       Lock;
+       Rendez  r;
+       IGMPrep *reports;
+};
+
+IGMP igmpalloc;
+
+       Proto   igmp;
+extern Fs      fs;
+
+static struct Stats
+{
+       ulong   inqueries;
+       ulong   outqueries;
+       ulong   inreports;
+       ulong   outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, uchar *addr)
+{
+       IGMPpkt *p;
+       Block *bp;
+
+       bp = allocb(sizeof(IGMPpkt));
+       if(bp == nil)
+               return;
+       p = (IGMPpkt*)bp->wp;
+       p->vihl = IP_VER4;
+       bp->wp += sizeof(IGMPpkt);
+       memset(bp->rp, 0, sizeof(IGMPpkt));
+       hnputl(p->src, Mediagetaddr(m));
+       hnputl(p->dst, Ipallsys);
+       p->vertype = (1<<4) | IGMPreport;
+       p->proto = IP_IGMPPROTO;
+       memmove(p->group, addr, IPaddrlen);
+       hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+       netlog(Logigmp, "igmpreport %I\n", p->group);
+       stats.outreports++;
+       ipoput4(bp, 0, 1, DFLTTOS, nil);        /* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+       USED(a);
+       return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+       IGMPrep *rp, **lrp;
+       Multicast *mp, **lmp;
+       uchar ip[IPaddrlen];
+
+       USED(a);
+
+       for(;;){
+               sleep(&igmpalloc.r, isreport, 0);
+               for(;;){
+                       lock(&igmpalloc);
+
+                       if(igmpalloc.reports == nil)
+                               break;
+       
+                       /* look for a single report */
+                       lrp = &igmpalloc.reports;
+                       mp = nil;
+                       for(rp = *lrp; rp; rp = *lrp){
+                               rp->ticks++;
+                               lmp = &rp->multi;
+                               for(mp = *lmp; mp; mp = *lmp){
+                                       if(rp->ticks >= mp->timeout){
+                                               *lmp = mp->next;
+                                               break;
+                                       }
+                                       lmp = &mp->next;
+                               }
+                               if(mp != nil)
+                                       break;
+
+                               if(rp->multi != nil){
+                                       lrp = &rp->next;
+                                       continue;
+                               } else {
+                                       *lrp = rp->next;
+                                       free(rp);
+                               }
+                       }
+                       unlock(&igmpalloc);
+
+                       if(mp){
+                               /* do a single report and try again */
+                               hnputl(ip, mp->addr);
+                               igmpsendreport(rp->m, ip);
+                               free(mp);
+                               continue;
+                       }
+
+                       tsleep(&up->sleep, return0, 0, MSPTICK);
+               }
+               unlock(&igmpalloc);
+       }
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+       int n;
+       IGMPpkt *ghp;
+       Ipaddr group;
+       IGMPrep *rp, **lrp;
+       Multicast *mp, **lmp;
+
+       ghp = (IGMPpkt*)(bp->rp);
+       netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+       n = blocklen(bp);
+       if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+               netlog(Logigmp, "igmpiput: bad len\n");
+               goto error;
+       }
+       if((ghp->vertype>>4) != 1){
+               netlog(Logigmp, "igmpiput: bad igmp type\n");
+               goto error;
+       }
+       if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+               netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+               goto error;
+       }
+
+       group = nhgetl(ghp->group);
+       
+       lock(&igmpalloc);
+       switch(ghp->vertype & 0xf){
+       case IGMPquery:
+               /*
+                *  start reporting groups that we're a member of.
+                */
+               stats.inqueries++;
+               for(rp = igmpalloc.reports; rp; rp = rp->next)
+                       if(rp->m == m)
+                               break;
+               if(rp != nil)
+                       break;  /* already reporting */
+
+               mp = Mediacopymulti(m);
+               if(mp == nil)
+                       break;
+
+               rp = malloc(sizeof(*rp));
+               if(rp == nil)
+                       break;
+
+               rp->m = m;
+               rp->multi = mp;
+               rp->ticks = 0;
+               for(; mp; mp = mp->next)
+                       mp->timeout = nrand(MAXTIMEOUT);
+               rp->next = igmpalloc.reports;
+               igmpalloc.reports = rp;
+
+               wakeup(&igmpalloc.r);
+
+               break;
+       case IGMPreport:
+               /*
+                *  find report list for this medium
+                */
+               stats.inreports++;
+               lrp = &igmpalloc.reports;
+               for(rp = *lrp; rp; rp = *lrp){
+                       if(rp->m == m)
+                               break;
+                       lrp = &rp->next;
+               }
+               if(rp == nil)
+                       break;
+
+               /*
+                *  if someone else has reported a group,
+                *  we don't have to.
+                */
+               lmp = &rp->multi;
+               for(mp = *lmp; mp; mp = *lmp){
+                       if(mp->addr == group){
+                               *lmp = mp->next;
+                               free(mp);
+                               break;
+                       }
+                       lmp = &mp->next;
+               }
+
+               break;
+       }
+       unlock(&igmpalloc);
+
+error:
+       freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+       return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+               stats.inqueries, stats.inreports,
+               stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+       igmp.name = "igmp";
+       igmp.connect = nil;
+       igmp.announce = nil;
+       igmp.ctl = nil;
+       igmp.state = nil;
+       igmp.close = nil;
+       igmp.rcv = igmpiput;
+       igmp.stats = igmpstats;
+       igmp.ipproto = IP_IGMPPROTO;
+       igmp.nc = 0;
+       igmp.ptclsize = 0;
+
+       igmpreportfn = igmpsendreport;
+       kproc("igmpproc", igmpproc, 0, 0);
+
+       Fsproto(fs, &igmp);
+}
diff --git a/kern/net/ihbootp.c b/kern/net/ihbootp.c
new file mode 100644 (file)
index 0000000..68b14b1
--- /dev/null
@@ -0,0 +1,323 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static ulong   fsip;
+static ulong   auip;
+static ulong   gwip;
+static ulong   ipmask;
+static ulong   ipaddr;
+static ulong   dnsip;
+
+enum
+{
+       Bootrequest = 1,
+       Bootreply   = 2,
+};
+
+typedef struct Bootp
+{
+       /* udp.c oldheader */
+       uchar   raddr[IPaddrlen];
+       uchar   laddr[IPaddrlen];
+       uchar   rport[2];
+       uchar   lport[2];
+       /* bootp itself */
+       uchar   op;             /* opcode */
+       uchar   htype;          /* hardware type */
+       uchar   hlen;           /* hardware address len */
+       uchar   hops;           /* hops */
+       uchar   xid[4];         /* a random number */
+       uchar   secs[2];        /* elapsed snce client started booting */
+       uchar   pad[2];
+       uchar   ciaddr[4];      /* client IP address (client tells server) */
+       uchar   yiaddr[4];      /* client IP address (server tells client) */
+       uchar   siaddr[4];      /* server IP address */
+       uchar   giaddr[4];      /* gateway IP address */
+       uchar   chaddr[16];     /* client hardware address */
+       uchar   sname[64];      /* server host name (optional) */
+       uchar   file[128];      /* boot file name */
+       uchar   vend[128];      /* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dnsip d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static Bootp   req;
+static Proc*   rcvprocp;
+static int     recv;
+static int     done;
+static Rendez  bootpr;
+static char    rcvbuf[512];
+static int     bootpdebug;
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno 
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will begin with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static void
+parsevend(uchar* vend)
+{
+       /* The field must start with 99.130.83.99 to be compliant */
+       if ((vend[0] != 99) || (vend[1] != 130) ||
+           (vend[2] != 83) || (vend[3] != 99)){
+               if(bootpdebug)
+                       print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+               return;
+       }
+
+       /* Skip over the magic cookie */
+       vend += 4;
+
+       while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+               if(bootpdebug){
+                       int i;
+                       print("vend %d [%d]", vend[0], vend[1]);
+                       for(i=0; i<vend[1]; i++)
+                               print(" %2.2x", vend[i]);
+                       print("\n");
+               }
+               switch (vend[0]) {
+               case 1: /* Subnet mask field */
+                       /* There must be only one subnet mask */
+                       if (vend[1] != 4)
+                               return;
+
+                       ipmask = (vend[2]<<24)|
+                                (vend[3]<<16)|
+                                (vend[4]<<8)|
+                                 vend[5];
+                       break;
+
+               case 3: /* Gateway/router field */
+                       /* We are only concerned with first address */
+                       if (vend[1] < 4)
+                               break;
+
+                       gwip =  (vend[2]<<24)|
+                               (vend[3]<<16)|
+                               (vend[4]<<8)|
+                                vend[5];
+                       break;
+
+               case 6: /* DNS server */
+                       /* We are only concerned with first address */
+                       if (vend[1] < 4)
+                               break;
+
+                       dnsip = (vend[2]<<24)|
+                               (vend[3]<<16)|
+                               (vend[4]<<8)|
+                                vend[5];
+                       break;
+
+               case 8: /* "Cookie server" (auth server) field */
+                       /* We are only concerned with first address */
+                       if (vend[1] < 4)
+                               break;
+
+                       auip =  (vend[2]<<24)|
+                               (vend[3]<<16)|
+                               (vend[4]<<8)|
+                                vend[5];
+                       break;
+
+               case 11:        /* "Resource loc server" (file server) field */
+                       /* We are only concerned with first address */
+                       if (vend[1] < 4)
+                               break;
+
+                       fsip =  (vend[2]<<24)|
+                               (vend[3]<<16)|
+                               (vend[4]<<8)|
+                                vend[5];
+                       break;
+
+               default:        /* Ignore everything else */
+                       break;
+               }
+
+               /* Skip over the field */
+               vend += vend[1] + 2;
+       }
+}
+
+static void
+rcvbootp(void *a)
+{
+       int n, fd;
+       Bootp *rp;
+
+       if(waserror())
+               pexit("", 0);
+       rcvprocp = up;  /* store for postnote below */
+       fd = (int)a;
+       while(done == 0) {
+               n = kread(fd, rcvbuf, sizeof(rcvbuf));
+               if(n <= 0)
+                       break;
+               rp = (Bootp*)rcvbuf;
+               if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
+                  rp->htype == 1 && rp->hlen == 6) {
+                       ipaddr = (rp->yiaddr[0]<<24)|
+                                (rp->yiaddr[1]<<16)|
+                                (rp->yiaddr[2]<<8)|
+                                 rp->yiaddr[3];
+                       parsevend(rp->vend);
+                       break;
+               }
+       }
+       poperror();
+       rcvprocp = nil;
+
+       recv = 1;
+       wakeup(&bootpr);
+       pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+       int cfd, dfd, tries, n;
+       char ia[5+3*16], im[16], *av[3];
+       uchar nipaddr[4], ngwip[4], nipmask[4];
+       char dir[Maxpath];
+       static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       cfd = kannounce("udp!*!68", dir);
+       if(cfd < 0)
+               return "bootp announce failed";
+       strcat(dir, "/data");
+       if(kwrite(cfd, "headers", 7) < 0){
+               kclose(cfd);
+               return "bootp ctl headers failed";
+       }
+       kwrite(cfd, "oldheaders", 10);
+       dfd = kopen(dir, ORDWR);
+       if(dfd < 0){
+               kclose(cfd);
+               return "bootp open data failed";
+       }
+       kclose(cfd);
+
+       /* create request */
+       memset(&req, 0, sizeof(req));
+       ipmove(req.raddr, IPv4bcast);
+       hnputs(req.rport, 67);
+       req.op = Bootrequest;
+       req.htype = 1;                  /* ethernet (all we know) */
+       req.hlen = 6;                   /* ethernet (all we know) */
+
+       /* Hardware MAC address */
+       memmove(req.chaddr, ifc->mac, 6);
+       /* Fill in the local IP address if we know it */
+       ipv4local(ifc, req.ciaddr);
+       memset(req.file, 0, sizeof(req.file));
+       memmove(req.vend, vend_rfc1048, 4);
+
+       done = 0;
+       recv = 0;
+
+       kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+       /*
+        * broadcast bootp's till we get a reply,
+        * or fixed number of tries
+        */
+       tries = 0;
+       while(recv == 0) {
+               if(kwrite(dfd, &req, sizeof(req)) < 0)
+                       print("bootp: write: %r");
+
+               tsleep(&bootpr, return0, 0, 1000);
+               if(++tries > 10) {
+                       print("bootp: timed out\n");
+                       break;
+               }
+       }
+       kclose(dfd);
+       done = 1;
+       if(rcvprocp != nil){
+               postnote(rcvprocp, 1, "timeout", 0);
+               rcvprocp = nil;
+       }
+
+       av[1] = "0.0.0.0";
+       av[2] = "0.0.0.0";
+       ipifcrem(ifc, av, 3);
+
+       hnputl(nipaddr, ipaddr);
+       sprint(ia, "%V", nipaddr);
+       hnputl(nipmask, ipmask);
+       sprint(im, "%V", nipmask);
+       av[1] = ia;
+       av[2] = im;
+       ipifcadd(ifc, av, 3, 0, nil);
+
+       if(gwip != 0) {
+               hnputl(ngwip, gwip);
+               n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+               routewrite(ifc->conv->p->f, nil, ia, n);
+       }
+       return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+       int n;
+       char *buf;
+       uchar a[4];
+
+       buf = smalloc(READSTR);
+       if(waserror()){
+               free(buf);
+               nexterror();
+       }
+       hnputl(a, fsip);
+       n = snprint(buf, READSTR, "fsip %15V\n", a);
+       hnputl(a, auip);
+       n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+       hnputl(a, gwip);
+       n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+       hnputl(a, ipmask);
+       n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+       hnputl(a, ipaddr);
+       n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+       hnputl(a, dnsip);
+       snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
+
+       len = readstr(offset, bp, len, buf);
+       poperror();
+       free(buf);
+       return len;
+}
+
+char*  (*bootp)(Ipifc*) = rbootp;
+int    (*bootpread)(char*, ulong, int) = rbootpread;
diff --git a/kern/net/ip.c b/kern/net/ip.c
new file mode 100644 (file)
index 0000000..b0d3f5a
--- /dev/null
@@ -0,0 +1,805 @@
+#include       "u.h"
+#include       "../port/lib.h"
+#include       "mem.h"
+#include       "dat.h"
+#include       "fns.h"
+#include       "../port/error.h"
+
+#include       "ip.h"
+
+typedef struct Ip4hdr          Ip4hdr;
+typedef struct IP              IP;
+typedef struct Fragment4       Fragment4;
+typedef struct Fragment6       Fragment6;
+typedef struct Ipfrag          Ipfrag;
+
+enum
+{
+       IP4HDR          = 20,           /* sizeof(Ip4hdr) */
+       IP6HDR          = 40,           /* sizeof(Ip6hdr) */
+       IP_HLEN4        = 0x05,         /* Header length in words */
+       IP_DF           = 0x4000,       /* Don't fragment */
+       IP_MF           = 0x2000,       /* More fragments */
+       IP6FHDR         = 8,            /* sizeof(Fraghdr6) */
+       IP_MAX          = 64*1024,      /* Maximum Internet packet size */
+};
+
+#define BLKIPVER(xp)   (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+struct Ip4hdr
+{
+       uchar   vihl;           /* Version and header length */
+       uchar   tos;            /* Type of service */
+       uchar   length[2];      /* packet length */
+       uchar   id[2];          /* ip->identification */
+       uchar   frag[2];        /* Fragment information */
+       uchar   ttl;            /* Time to live */
+       uchar   proto;          /* Protocol */
+       uchar   cksum[2];       /* Header checksum */
+       uchar   src[4];         /* IP source */
+       uchar   dst[4];         /* IP destination */
+};
+
+/* MIB II counters */
+enum
+{
+       Forwarding,
+       DefaultTTL,
+       InReceives,
+       InHdrErrors,
+       InAddrErrors,
+       ForwDatagrams,
+       InUnknownProtos,
+       InDiscards,
+       InDelivers,
+       OutRequests,
+       OutDiscards,
+       OutNoRoutes,
+       ReasmTimeout,
+       ReasmReqds,
+       ReasmOKs,
+       ReasmFails,
+       FragOKs,
+       FragFails,
+       FragCreates,
+
+       Nstats,
+};
+
+struct Fragment4
+{
+       Block*  blist;
+       Fragment4*      next;
+       ulong   src;
+       ulong   dst;
+       ushort  id;
+       ulong   age;
+};
+
+struct Fragment6
+{
+       Block*  blist;
+       Fragment6*      next;
+       uchar   src[IPaddrlen];
+       uchar   dst[IPaddrlen];
+       uint    id;
+       ulong   age;
+};
+
+struct Ipfrag
+{
+       ushort  foff;
+       ushort  flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+       ulong           stats[Nstats];
+
+       QLock           fraglock4;
+       Fragment4*      flisthead4;
+       Fragment4*      fragfree4;
+       Ref             id4;
+
+       QLock           fraglock6;
+       Fragment6*      flisthead6;
+       Fragment6*      fragfree6;
+       Ref             id6;
+
+       int             iprouting;      /* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding]   "Forwarding",
+[DefaultTTL]   "DefaultTTL",
+[InReceives]   "InReceives",
+[InHdrErrors]  "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams]        "ForwDatagrams",
+[InUnknownProtos]      "InUnknownProtos",
+[InDiscards]   "InDiscards",
+[InDelivers]   "InDelivers",
+[OutRequests]  "OutRequests",
+[OutDiscards]  "OutDiscards",
+[OutNoRoutes]  "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds]   "ReasmReqds",
+[ReasmOKs]     "ReasmOKs",
+[ReasmFails]   "ReasmFails",
+[FragOKs]      "FragOKs",
+[FragFails]    "FragFails",
+[FragCreates]  "FragCreates",
+};
+
+#define BLKIP(xp)      ((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp)       ((Ipfrag*)((xp)->base))
+
+ushort         ipcsum(uchar*);
+Block*         ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void           ipfragfree4(IP*, Fragment4*);
+Fragment4*     ipfragallo4(IP*);
+
+
+void
+ip_init_6(Fs *f)
+{
+       V6params *v6p;
+
+       v6p = smalloc(sizeof(V6params));
+       
+       v6p->rp.mflag           = 0;            // default not managed
+       v6p->rp.oflag           = 0;
+       v6p->rp.maxraint        = 600000;       // millisecs
+       v6p->rp.minraint        = 200000;
+       v6p->rp.linkmtu         = 0;            // no mtu sent
+       v6p->rp.reachtime       = 0;
+       v6p->rp.rxmitra         = 0;
+       v6p->rp.ttl             = MAXTTL;
+       v6p->rp.routerlt        = 3*(v6p->rp.maxraint); 
+
+       v6p->hp.rxmithost       = 1000;         // v6 RETRANS_TIMER
+
+       v6p->cdrouter           = -1;
+
+       f->v6p                  = v6p;
+
+}
+
+void
+initfrag(IP *ip, int size)
+{
+       Fragment4 *fq4, *eq4;
+       Fragment6 *fq6, *eq6;
+
+       ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+       if(ip->fragfree4 == nil)
+               panic("initfrag");
+
+       eq4 = &ip->fragfree4[size];
+       for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+               fq4->next = fq4+1;
+
+       ip->fragfree4[size-1].next = nil;
+
+       ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+       if(ip->fragfree6 == nil)
+               panic("initfrag");
+
+       eq6 = &ip->fragfree6[size];
+       for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+               fq6->next = fq6+1;
+
+       ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+       IP *ip;
+
+       ip = smalloc(sizeof(IP));
+       initfrag(ip, 100);
+       f->ip = ip;
+
+       ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+       f->ip->iprouting = on;
+       if(f->ip->iprouting==0)
+               f->ip->stats[Forwarding] = 2;
+       else
+               f->ip->stats[Forwarding] = 1;   
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+       Ipifc *ifc;
+       uchar *gate;
+       ulong fragoff;
+       Block *xp, *nb;
+       Ip4hdr *eh, *feh;
+       int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+       Route *r, *sr;
+       IP *ip;
+       int rv = 0;
+
+       ip = f->ip;
+
+       /* Fill out the ip header */
+       eh = (Ip4hdr*)(bp->rp);
+
+       ip->stats[OutRequests]++;
+
+       /* Number of uchars in data and ip header to write */
+       len = blocklen(bp);
+
+       if(gating){
+               chunk = nhgets(eh->length);
+               if(chunk > len){
+                       ip->stats[OutDiscards]++;
+                       netlog(f, Logip, "short gated packet\n");
+                       goto free;
+               }
+               if(chunk < len)
+                       len = chunk;
+       }
+       if(len >= IP_MAX){
+               ip->stats[OutDiscards]++;
+               netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+               goto free;
+       }
+
+       r = v4lookup(f, eh->dst, c);
+       if(r == nil){
+               ip->stats[OutNoRoutes]++;
+               netlog(f, Logip, "no interface %V\n", eh->dst);
+               rv = -1;
+               goto free;
+       }
+
+       ifc = r->ifc;
+       if(r->type & (Rifc|Runi))
+               gate = eh->dst;
+       else
+       if(r->type & (Rbcast|Rmulti)) {
+               gate = eh->dst;
+               sr = v4lookup(f, eh->src, nil);
+               if(sr != nil && (sr->type & Runi))
+                       ifc = sr->ifc;
+       }
+       else
+               gate = r->v4.gate;
+
+       if(!gating)
+               eh->vihl = IP_VER4|IP_HLEN4;
+       eh->ttl = ttl;
+       if(!gating)
+               eh->tos = tos;
+
+       if(!canrlock(ifc))
+               goto free;
+       if(waserror()){
+               runlock(ifc);
+               nexterror();
+       }
+       if(ifc->m == nil)
+               goto raise;
+
+       /* If we dont need to fragment just send it */
+       medialen = ifc->maxtu - ifc->m->hsize;
+       if(len <= medialen) {
+               if(!gating)
+                       hnputs(eh->id, incref(&ip->id4));
+               hnputs(eh->length, len);
+               if(!gating){
+                       eh->frag[0] = 0;
+                       eh->frag[1] = 0;
+               }
+               eh->cksum[0] = 0;
+               eh->cksum[1] = 0;
+               hnputs(eh->cksum, ipcsum(&eh->vihl));
+               ifc->m->bwrite(ifc, bp, V4, gate);
+               runlock(ifc);
+               poperror();
+               return 0;
+       }
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+       if(eh->frag[0] & (IP_DF>>8)){
+               ip->stats[FragFails]++;
+               ip->stats[OutDiscards]++;
+               icmpcantfrag(f, bp, medialen);
+               netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+               goto raise;
+       }
+
+       seglen = (medialen - IP4HDR) & ~7;
+       if(seglen < 8){
+               ip->stats[FragFails]++;
+               ip->stats[OutDiscards]++;
+               netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+               goto raise;
+       }
+
+       dlen = len - IP4HDR;
+       xp = bp;
+       if(gating)
+               lid = nhgets(eh->id);
+       else
+               lid = incref(&ip->id4);
+
+       offset = IP4HDR;
+       while(xp != nil && offset && offset >= BLEN(xp)) {
+               offset -= BLEN(xp);
+               xp = xp->next;
+       }
+       xp->rp += offset;
+
+       if(gating)
+               fragoff = nhgets(eh->frag)<<3;
+       else
+               fragoff = 0;
+       dlen += fragoff;
+       for(; fragoff < dlen; fragoff += seglen) {
+               nb = allocb(IP4HDR+seglen);
+               feh = (Ip4hdr*)(nb->rp);
+
+               memmove(nb->wp, eh, IP4HDR);
+               nb->wp += IP4HDR;
+
+               if((fragoff + seglen) >= dlen) {
+                       seglen = dlen - fragoff;
+                       hnputs(feh->frag, fragoff>>3);
+               }
+               else    
+                       hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+               hnputs(feh->length, seglen + IP4HDR);
+               hnputs(feh->id, lid);
+
+               /* Copy up the data area */
+               chunk = seglen;
+               while(chunk) {
+                       if(!xp) {
+                               ip->stats[OutDiscards]++;
+                               ip->stats[FragFails]++;
+                               freeblist(nb);
+                               netlog(f, Logip, "!xp: chunk %d\n", chunk);
+                               goto raise;
+                       }
+                       blklen = chunk;
+                       if(BLEN(xp) < chunk)
+                               blklen = BLEN(xp);
+                       memmove(nb->wp, xp->rp, blklen);
+                       nb->wp += blklen;
+                       xp->rp += blklen;
+                       chunk -= blklen;
+                       if(xp->rp == xp->wp)
+                               xp = xp->next;
+               } 
+
+               feh->cksum[0] = 0;
+               feh->cksum[1] = 0;
+               hnputs(feh->cksum, ipcsum(&feh->vihl));
+               ifc->m->bwrite(ifc, nb, V4, gate);
+               ip->stats[FragCreates]++;
+       }
+       ip->stats[FragOKs]++;
+raise:
+       runlock(ifc);
+       poperror();
+free:
+       freeblist(bp);
+       return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+       int hl;
+       int hop, tos, proto, olen;
+       Ip4hdr *h;
+       Proto *p;
+       ushort frag;
+       int notforme;
+       uchar *dp, v6dst[IPaddrlen];
+       IP *ip;
+       Route *r;
+
+       if(BLKIPVER(bp) != IP_VER4) {
+               ipiput6(f, ifc, bp);
+               return;
+       }
+
+       ip = f->ip;
+       ip->stats[InReceives]++;
+
+       /*
+        *  Ensure we have all the header info in the first
+        *  block.  Make life easier for other protocols by
+        *  collecting up to the first 64 bytes in the first block.
+        */
+       if(BLEN(bp) < 64) {
+               hl = blocklen(bp);
+               if(hl < IP4HDR)
+                       hl = IP4HDR;
+               if(hl > 64)
+                       hl = 64;
+               bp = pullupblock(bp, hl);
+               if(bp == nil)
+                       return;
+       }
+
+       h = (Ip4hdr*)(bp->rp);
+
+       /* dump anything that whose header doesn't checksum */
+       if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+               ip->stats[InHdrErrors]++;
+               netlog(f, Logip, "ip: checksum error %V\n", h->src);
+               freeblist(bp);
+               return;
+       }
+       v4tov6(v6dst, h->dst);
+       notforme = ipforme(f, v6dst) == 0;
+
+       /* Check header length and version */
+       if((h->vihl&0x0F) != IP_HLEN4) {
+               hl = (h->vihl&0xF)<<2;
+               if(hl < (IP_HLEN4<<2)) {
+                       ip->stats[InHdrErrors]++;
+                       netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+                       freeblist(bp);
+                       return;
+               }
+         /* If this is not routed strip off the options */
+               if(notforme == 0) {
+                       olen = nhgets(h->length);
+                       dp = bp->rp + (hl - (IP_HLEN4<<2));
+                       memmove(dp, h, IP_HLEN4<<2);
+                       bp->rp = dp;
+                       h = (Ip4hdr*)(bp->rp);
+                       h->vihl = (IP_VER4|IP_HLEN4);
+                       hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+               }
+       }
+
+       /* route */
+       if(notforme) {
+               Conv conv;
+
+               if(!ip->iprouting){
+                       freeb(bp);
+                       return;
+               }
+
+               /* don't forward to source's network */
+               conv.r = nil;
+               r = v4lookup(f, h->dst, &conv);
+               if(r == nil || r->ifc == ifc){
+                       ip->stats[OutDiscards]++;
+                       freeblist(bp);
+                       return;
+               }
+
+               /* don't forward if packet has timed out */
+               hop = h->ttl;
+               if(hop < 1) {
+                       ip->stats[InHdrErrors]++;
+                       icmpttlexceeded(f, ifc->lifc->local, bp);
+                       freeblist(bp);
+                       return;
+               }
+
+               /* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+               if(r->ifc->reassemble){
+                       frag = nhgets(h->frag);
+                       if(frag) {
+                               h->tos = 0;
+                               if(frag & IP_MF)
+                                       h->tos = 1;
+                               bp = ip4reassemble(ip, frag, bp, h);
+                               if(bp == nil)
+                                       return;
+                               h = (Ip4hdr*)(bp->rp);
+                       }
+               }
+
+               ip->stats[ForwDatagrams]++;
+               tos = h->tos;
+               hop = h->ttl;
+               ipoput4(f, bp, 1, hop - 1, tos, &conv);
+               return;
+       }
+
+       frag = nhgets(h->frag);
+       if(frag) {
+               h->tos = 0;
+               if(frag & IP_MF)
+                       h->tos = 1;
+               bp = ip4reassemble(ip, frag, bp, h);
+               if(bp == nil)
+                       return;
+               h = (Ip4hdr*)(bp->rp);
+       }
+
+       /* don't let any frag info go up the stack */
+       h->frag[0] = 0;
+       h->frag[1] = 0;
+
+       proto = h->proto;
+       p = Fsrcvpcol(f, proto);
+       if(p != nil && p->rcv != nil) {
+               ip->stats[InDelivers]++;
+               (*p->rcv)(p, ifc, bp);
+               return;
+       }
+       ip->stats[InDiscards]++;
+       ip->stats[InUnknownProtos]++;
+       freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+       IP *ip;
+       char *p, *e;
+       int i;
+
+       ip = f->ip;
+       ip->stats[DefaultTTL] = MAXTTL;
+
+       p = buf;
+       e = p+len;
+       for(i = 0; i < Nstats; i++)
+               p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+       return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+       int fend;
+       ushort id;
+       Fragment4 *f, *fnext;
+       ulong src, dst;
+       Block *bl, **l, *last, *prev;
+       int ovlap, len, fragsize, pktposn;
+
+       src = nhgetl(ih->src);
+       dst = nhgetl(ih->dst);
+       id = nhgets(ih->id);
+
+       /*
+        *  block lists are too hard, pullupblock into a single block
+        */
+       if(bp->next){
+               bp = pullupblock(bp, blocklen(bp));
+               ih = (Ip4hdr*)(bp->rp);
+       }
+
+       qlock(&ip->fraglock4);
+
+       /*
+        *  find a reassembly queue for this fragment
+        */
+       for(f = ip->flisthead4; f; f = fnext){
+               fnext = f->next;        /* because ipfragfree4 changes the list */
+               if(f->src == src && f->dst == dst && f->id == id)
+                       break;
+               if(f->age < NOW){
+                       ip->stats[ReasmTimeout]++;
+                       ipfragfree4(ip, f);
+               }
+       }
+
+       /*
+        *  if this isn't a fragmented packet, accept it
+        *  and get rid of any fragments that might go
+        *  with it.
+        */
+       if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+               if(f != nil) {
+                       ipfragfree4(ip, f);
+                       ip->stats[ReasmFails]++;
+               }
+               qunlock(&ip->fraglock4);
+               return bp;
+       }
+
+       if(bp->base+sizeof(Ipfrag) >= bp->rp){
+               bp = padblock(bp, sizeof(Ipfrag));
+               bp->rp += sizeof(Ipfrag);
+       }
+
+       BKFG(bp)->foff = offset<<3;
+       BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+       /* First fragment allocates a reassembly queue */
+       if(f == nil) {
+               f = ipfragallo4(ip);
+               f->id = id;
+               f->src = src;
+               f->dst = dst;
+
+               f->blist = bp;
+
+               qunlock(&ip->fraglock4);
+               ip->stats[ReasmReqds]++;
+               return nil;
+       }
+
+       /*
+        *  find the new fragment's position in the queue
+        */
+       prev = nil;
+       l = &f->blist;
+       bl = f->blist;
+       while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+               prev = bl;
+               l = &bl->next;
+               bl = bl->next;
+       }
+
+       /* Check overlap of a previous fragment - trim away as necessary */
+       if(prev) {
+               ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+               if(ovlap > 0) {
+                       if(ovlap >= BKFG(bp)->flen) {
+                               freeblist(bp);
+                               qunlock(&ip->fraglock4);
+                               return nil;
+                       }
+                       BKFG(prev)->flen -= ovlap;
+               }
+       }
+
+       /* Link onto assembly queue */
+       bp->next = *l;
+       *l = bp;
+
+       /* Check to see if succeeding segments overlap */
+       if(bp->next) {
+               l = &bp->next;
+               fend = BKFG(bp)->foff + BKFG(bp)->flen;
+               /* Take completely covered segments out */
+               while(*l) {
+                       ovlap = fend - BKFG(*l)->foff;
+                       if(ovlap <= 0)
+                               break;
+                       if(ovlap < BKFG(*l)->flen) {
+                               BKFG(*l)->flen -= ovlap;
+                               BKFG(*l)->foff += ovlap;
+                               /* move up ih hdrs */
+                               memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+                               (*l)->rp += ovlap;
+                               break;
+                       }
+                       last = (*l)->next;
+                       (*l)->next = nil;
+                       freeblist(*l);
+                       *l = last;
+               }
+       }
+
+       /*
+        *  look for a complete packet.  if we get to a fragment
+        *  without IP_MF set, we're done.
+        */
+       pktposn = 0;
+       for(bl = f->blist; bl; bl = bl->next) {
+               if(BKFG(bl)->foff != pktposn)
+                       break;
+               if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+                       bl = f->blist;
+                       len = nhgets(BLKIP(bl)->length);
+                       bl->wp = bl->rp + len;
+
+                       /* Pullup all the fragment headers and
+                        * return a complete packet
+                        */
+                       for(bl = bl->next; bl; bl = bl->next) {
+                               fragsize = BKFG(bl)->flen;
+                               len += fragsize;
+                               bl->rp += IP4HDR;
+                               bl->wp = bl->rp + fragsize;
+                       }
+
+                       bl = f->blist;
+                       f->blist = nil;
+                       ipfragfree4(ip, f);
+                       ih = BLKIP(bl);
+                       hnputs(ih->length, len);
+                       qunlock(&ip->fraglock4);
+                       ip->stats[ReasmOKs]++;
+                       return bl;              
+               }
+               pktposn += BKFG(bl)->flen;
+       }
+       qunlock(&ip->fraglock4);
+       return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+       Fragment4 *fl, **l;
+
+       if(frag->blist)
+               freeblist(frag->blist);
+
+       frag->src = 0;
+       frag->id = 0;
+       frag->blist = nil;
+
+       l = &ip->flisthead4;
+       for(fl = *l; fl; fl = fl->next) {
+               if(fl == frag) {
+                       *l = frag->next;
+                       break;
+               }
+               l = &fl->next;
+       }
+
+       frag->next = ip->fragfree4;
+       ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+       Fragment4 *f;
+
+       while(ip->fragfree4 == nil) {
+               /* free last entry on fraglist */
+               for(f = ip->flisthead4; f->next; f = f->next)
+                       ;
+               ipfragfree4(ip, f);
+       }
+       f = ip->fragfree4;
+       ip->fragfree4 = f->next;
+       f->next = ip->flisthead4;
+       ip->flisthead4 = f;
+       f->age = NOW + 30000;
+
+       return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+       int len;
+       ulong sum;
+
+       sum = 0;
+       len = (addr[0]&0xf)<<2;
+
+       while(len > 0) {
+               sum += addr[0]<<8 | addr[1] ;
+               len -= 2;
+               addr += 2;
+       }
+
+       sum = (sum & 0xffff) + (sum >> 16);
+       sum = (sum & 0xffff) + (sum >> 16);
+
+       return (sum^0xffff);
+}
diff --git a/kern/net/ip.h b/kern/net/ip.h
new file mode 100644 (file)
index 0000000..5d50416
--- /dev/null
@@ -0,0 +1,672 @@
+typedef struct Conv    Conv;
+typedef struct Fs      Fs;
+typedef union  Hwaddr  Hwaddr;
+typedef struct IP      IP;
+typedef struct IPaux   IPaux;
+typedef struct Ipself  Ipself;
+typedef struct Ipselftab       Ipselftab;
+typedef struct Iplink  Iplink;
+typedef struct Iplifc  Iplifc;
+typedef struct Ipmulti Ipmulti;
+typedef struct IProuter IProuter;
+typedef struct Ipifc   Ipifc;
+typedef struct Iphash  Iphash;
+typedef struct Ipht    Ipht;
+typedef struct Netlog  Netlog;
+typedef struct Ifclog  Ifclog;
+typedef struct Medium  Medium;
+typedef struct Proto   Proto;
+typedef struct Arpent  Arpent;
+typedef struct Arp Arp;
+typedef struct Route   Route;
+
+typedef struct Routerparams    Routerparams;
+typedef struct         Hostparams      Hostparams;
+typedef struct         V6router        V6router;
+typedef struct V6params        V6params;
+
+#pragma incomplete Arp
+#pragma        incomplete Ifclog
+#pragma incomplete Ipself
+#pragma incomplete Ipselftab
+#pragma incomplete IP
+#pragma incomplete Netlog
+
+enum
+{
+       Addrlen=        64,
+       Maxproto=       20,
+       Nhash=          64,
+       Maxincall=      5,
+       Nchans=         256,
+       MAClen=         16,             /* longest mac address */
+
+       MAXTTL=         255,
+       DFLTTOS=        0,
+
+       IPaddrlen=      16,
+       IPv4addrlen=    4,
+       IPv4off=        12,
+       IPllen=         4,
+
+       /* ip versions */
+       V4=             4,
+       V6=             6,
+       IP_VER4=        0x40,
+       IP_VER6=        0x60,
+
+       /* 2^Lroot trees in the root table */
+       Lroot=          10,
+
+       Maxpath =       64,
+};
+
+enum
+{
+       Idle=           0,
+       Announcing=     1,
+       Announced=      2,
+       Connecting=     3,
+       Connected=      4,
+};
+
+/*
+ *  one per conversation directory
+ */
+struct Conv
+{
+       QLock;
+
+       int     x;                      /* conversation index */
+       Proto*  p;
+
+       int     restricted;             /* remote port is restricted */
+       uint    ttl;                    /* max time to live */
+       uint    tos;                    /* type of service */
+       int     ignoreadvice;           /* don't terminate connection on icmp errors */
+
+       uchar   ipversion;
+       uchar   laddr[IPaddrlen];       /* local IP address */
+       uchar   raddr[IPaddrlen];       /* remote IP address */
+       ushort  lport;                  /* local port number */
+       ushort  rport;                  /* remote port number */
+
+       char    *owner;                 /* protections */
+       int     perm;
+       int     inuse;                  /* opens of listen/data/ctl */
+       int     length;
+       int     state;
+
+       /* udp specific */
+       int     headers;                /* data src/dst headers in udp */
+       int     reliable;               /* true if reliable udp */
+
+       Conv*   incall;                 /* calls waiting to be listened for */
+       Conv*   next;
+
+       Queue*  rq;                     /* queued data waiting to be read */
+       Queue*  wq;                     /* queued data waiting to be written */
+       Queue*  eq;                     /* returned error packets */
+       Queue*  sq;                     /* snooping queue */
+       Ref     snoopers;               /* number of processes with snoop open */
+
+       Rendez  cr;
+       char    cerr[ERRMAX];
+
+       QLock   listenq;
+       Rendez  listenr;
+
+       Ipmulti *multi;                 /* multicast bindings for this interface */
+
+       void*   ptcl;                   /* protocol specific stuff */
+
+       Route   *r;                     /* last route used */
+       ulong   rgen;                   /* routetable generation for *r */
+};
+
+struct Medium
+{
+       char    *name;
+       int     hsize;          /* medium header size */
+       int     mintu;          /* default min mtu */
+       int     maxtu;          /* default max mtu */
+       int     maclen;         /* mac address length  */
+       void    (*bind)(Ipifc*, int, char**);
+       void    (*unbind)(Ipifc*);
+       void    (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+       /* for arming interfaces to receive multicast */
+       void    (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+       void    (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+       /* process packets written to 'data' */
+       void    (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+       /* routes for router boards */
+       void    (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+       void    (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+       void    (*flushroutes)(Ipifc *ifc);
+
+       /* for routing multicast groups */
+       void    (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+       void    (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+       /* address resolution */
+       void    (*ares)(Fs*, int, uchar*, uchar*, int, int);    /* resolve */
+       void    (*areg)(Ipifc*, uchar*);                        /* register */
+
+       /* v6 address generation */
+       void    (*pref2addr)(uchar *pref, uchar *ea);
+
+       int     unbindonclose;  /* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+       uchar   local[IPaddrlen];
+       uchar   mask[IPaddrlen];
+       uchar   remote[IPaddrlen];
+       uchar   net[IPaddrlen];
+       uchar   tentative;      /* =1 => v6 dup disc on, =0 => confirmed unique */
+       uchar   onlink;         /* =1 => onlink, =0 offlink. */
+       uchar   autoflag;       /* v6 autonomous flag */
+       long    validlt;        /* v6 valid lifetime */
+       long    preflt;         /* v6 preferred lifetime */
+       long    origint;        /* time when addr was added */
+       Iplink  *link;          /* addresses linked to this lifc */
+       Iplifc  *next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+       Ipself  *self;
+       Iplifc  *lifc;
+       Iplink  *selflink;      /* next link for this local address */
+       Iplink  *lifclink;      /* next link for this ifc */
+       ulong   expire;
+       Iplink  *next;          /* free list */
+       int     ref;
+};
+
+/* rfc 2461, pp.40--43. */
+
+/* default values, one per stack */
+struct Routerparams {
+       int     mflag;
+       int     oflag;
+       int     maxraint;
+       int     minraint;
+       int     linkmtu;
+       int     reachtime;
+       int     rxmitra;
+       int     ttl;
+       int     routerlt;       
+};
+
+struct Hostparams {
+       int     rxmithost;
+};
+
+struct Ipifc
+{
+       RWlock;
+       
+       Conv    *conv;          /* link to its conversation structure */
+       char    dev[64];        /* device we're attached to */
+       Medium  *m;             /* Media pointer */
+       int     maxtu;          /* Maximum transfer unit */
+       int     mintu;          /* Minumum tranfer unit */
+       int     mbps;           /* megabits per second */
+       void    *arg;           /* medium specific */
+       int     reassemble;     /* reassemble IP packets before forwarding */
+
+       /* these are used so that we can unbind on the fly */
+       Lock    idlock;
+       uchar   ifcid;          /* incremented each 'bind/unbind/add/remove' */
+       int     ref;            /* number of proc's using this ipifc */
+       Rendez  wait;           /* where unbinder waits for ref == 0 */
+       int     unbinding;
+
+       uchar   mac[MAClen];    /* MAC address */
+
+       Iplifc  *lifc;          /* logical interfaces on this physical one */
+
+       ulong   in, out;        /* message statistics */
+       ulong   inerr, outerr;  /* ... */
+
+       uchar   sendra6;        /* == 1 => send router advs on this ifc */
+       uchar   recvra6;        /* == 1 => recv router advs on this ifc */
+       Routerparams rp;        /* router parameters as in RFC 2461, pp.40--43. 
+                                       used only if node is router */
+};
+
+/*
+ *  one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+       uchar   ma[IPaddrlen];
+       uchar   ia[IPaddrlen];
+       Ipmulti *next;
+};
+
+/*
+ *  hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+       Nipht=          521,    /* convenient prime */
+
+       IPmatchexact=   0,      /* match on 4 tuple */
+       IPmatchany,             /* *!* */
+       IPmatchport,            /* *!port */
+       IPmatchaddr,            /* addr!* */
+       IPmatchpa,              /* addr!port */
+};
+struct Iphash
+{
+       Iphash  *next;
+       Conv    *c;
+       int     match;
+};
+struct Ipht
+{
+       Lock;
+       Iphash  *tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ *  one per multiplexed protocol
+ */
+struct Proto
+{
+       QLock;
+       char*           name;           /* protocol name */
+       int             x;              /* protocol index */
+       int             ipproto;        /* ip protocol type */
+
+       char*           (*connect)(Conv*, char**, int);
+       char*           (*announce)(Conv*, char**, int);
+       char*           (*bind)(Conv*, char**, int);
+       int             (*state)(Conv*, char*, int);
+       void            (*create)(Conv*);
+       void            (*close)(Conv*);
+       void            (*rcv)(Proto*, Ipifc*, Block*);
+       char*           (*ctl)(Conv*, char**, int);
+       void            (*advise)(Proto*, Block*, char*);
+       int             (*stats)(Proto*, char*, int);
+       int             (*local)(Conv*, char*, int);
+       int             (*remote)(Conv*, char*, int);
+       int             (*inuse)(Conv*);
+       int             (*gc)(Proto*);  /* returns true if any conversations are freed */
+
+       Fs              *f;             /* file system this proto is part of */
+       Conv            **conv;         /* array of conversations */
+       int             ptclsize;       /* size of per protocol ctl block */
+       int             nc;             /* number of conversations */
+       int             ac;
+       Qid             qid;            /* qid for protocol directory */
+       ushort          nextport;
+       ushort          nextrport;
+
+       void            *priv;
+};
+
+/*
+ *  Stream for sending packets to user level
+ */
+struct IProuter {
+       QLock;
+       int     opens;
+       Queue   *q;
+};
+
+/*
+ *  one per IP protocol stack
+ */
+struct Fs
+{
+       RWlock;
+       int     dev;
+
+       int     np;
+       Proto*  p[Maxproto+1];          /* list of supported protocols */
+       Proto*  t2p[256];               /* vector of all protocols */
+       Proto*  ipifc;                  /* kludge for ipifcremroute & ipifcaddroute */
+       Proto*  ipmux;                  /* kludge for finding an ip multiplexor */
+
+       IP      *ip;
+       Ipselftab       *self;
+       Arp     *arp;
+       V6params        *v6p;
+       IProuter iprouter;
+
+       Route   *v4root[1<<Lroot];      /* v4 routing forest */
+       Route   *v6root[1<<Lroot];      /* v6 routing forest */
+       Route   *queue;                 /* used as temp when reinjecting routes */
+
+       Netlog  *alog;
+       Ifclog  *ilog;
+
+       char    ndb[1024];              /* an ndb entry for this interface */
+       int     ndbvers;
+       long    ndbmtime;
+};
+
+/* one per default router known to host */
+struct V6router {
+       uchar   inuse;
+       Ipifc   *ifc;
+       int     ifcid;
+       uchar   routeraddr[IPaddrlen];
+       long    ltorigin;
+       Routerparams    rp;
+};
+
+struct V6params
+{
+       Routerparams    rp;             /* v6 params, one copy per node now */
+       Hostparams      hp;
+       V6router        v6rlist[3];     /* max 3 default routers, currently */
+       int             cdrouter;       /* uses only v6rlist[cdrouter] if   */ 
+                                       /* cdrouter >= 0. */
+};
+
+
+int    Fsconnected(Conv*, char*);
+Conv*  Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int    Fspcolstats(char*, int);
+int    Fsproto(Fs*, Proto*);
+int    Fsbuiltinproto(Fs*, uchar);
+Conv*  Fsprotoclone(Proto*, char*);
+Proto* Fsrcvpcol(Fs*, uchar);
+Proto* Fsrcvpcolx(Fs*, uchar);
+char*  Fsstdconnect(Conv*, char**, int);
+char*  Fsstdannounce(Conv*, char**, int);
+char*  Fsstdbind(Conv*, char**, int);
+ulong  scalednconv(void);
+
+/* 
+ *  logging
+ */
+enum
+{
+       Logip=          1<<1,
+       Logtcp=         1<<2,
+       Logfs=          1<<3,
+       Logil=          1<<4,
+       Logicmp=        1<<5,
+       Logudp=         1<<6,
+       Logcompress=    1<<7,
+       Logilmsg=       1<<8,
+       Loggre=         1<<9,
+       Logppp=         1<<10,
+       Logtcprxmt=     1<<11,
+       Logigmp=        1<<12,
+       Logudpmsg=      1<<13,
+       Logipmsg=       1<<14,
+       Logrudp=        1<<15,
+       Logrudpmsg=     1<<16,
+       Logesp=         1<<17,
+       Logtcpwin=      1<<18,
+};
+
+void   netloginit(Fs*);
+void   netlogopen(Fs*);
+void   netlogclose(Fs*);
+void   netlogctl(Fs*, char*, int);
+long   netlogread(Fs*, void*, ulong, long);
+void   netlog(Fs*, int, char*, ...);
+void   ifcloginit(Fs*);
+long   ifclogread(Fs*, Chan *,void*, ulong, long);
+void   ifclog(Fs*, uchar *, int);
+void   ifclogopen(Fs*, Chan*);
+void   ifclogclose(Fs*, Chan*);
+
+/*
+ *  iproute.c
+ */
+typedef        struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+       /* type bits */
+       Rv4=            (1<<0),         /* this is a version 4 route */
+       Rifc=           (1<<1),         /* this route is a directly connected interface */
+       Rptpt=          (1<<2),         /* this route is a pt to pt interface */
+       Runi=           (1<<3),         /* a unicast self address */
+       Rbcast=         (1<<4),         /* a broadcast self address */
+       Rmulti=         (1<<5),         /* a multicast self address */
+       Rproxy=         (1<<6),         /* this route should be proxied */
+};
+
+struct Routewalk
+{
+       int     o;
+       int     h;
+       char*   p;
+       char*   e;
+       void*   state;
+       void    (*walk)(Route*, Routewalk*);
+};
+
+struct RouteTree
+{
+       Route*  right;
+       Route*  left;
+       Route*  mid;
+       uchar   depth;
+       uchar   type;
+       uchar   ifcid;          /* must match ifc->id */
+       Ipifc   *ifc;
+       char    tag[4];
+       int     ref;
+};
+
+struct V4route
+{
+       ulong   address;
+       ulong   endaddress;
+       uchar   gate[IPv4addrlen];
+};
+
+struct V6route
+{
+       ulong   address[IPllen];
+       ulong   endaddress[IPllen];
+       uchar   gate[IPaddrlen];
+};
+
+struct Route
+{
+       RouteTree;
+
+       union {
+               V6route v6;
+               V4route v4;
+       };
+};
+extern void    v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void    v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void    v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void    v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route*  v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route*  v6lookup(Fs *f, uchar *a, Conv *c);
+extern long    routeread(Fs *f, char*, ulong, int);
+extern long    routewrite(Fs *f, Chan*, char*, int);
+extern void    routetype(int, char*);
+extern void    ipwalkroutes(Fs*, Routewalk*);
+extern void    convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ *  devip.c
+ */
+
+/*
+ *  Hanging off every ip channel's ->aux is the following structure.
+ *  It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+       char    *owner;         /* the user that did the attach */
+       char    tag[4];
+};
+
+extern IPaux*  newipaux(char*, char*);
+
+/*
+ *  arp.c
+ */
+struct Arpent
+{
+       uchar   ip[IPaddrlen];
+       uchar   mac[MAClen];
+       Medium  *type;                  /* media type */
+       Arpent* hash;
+       Block*  hold;
+       Block*  last;
+       uint    ctime;                  /* time entry was created or refreshed */
+       uint    utime;                  /* time entry was last used */
+       uchar   state;
+       Arpent  *nextrxt;               /* re-transmit chain */
+       uint    rtime;                  /* time for next retransmission */
+       uchar   rxtsrem;
+       Ipifc   *ifc;
+       uchar   ifcid;                  /* must match ifc->id */
+};
+
+extern void    arpinit(Fs*);
+extern int     arpread(Arp*, char*, ulong, int);
+extern int     arpwrite(Fs*, char*, int);
+extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void    arprelease(Arp*, Arpent *a);
+extern Block*  arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void    arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int     myetheraddr(uchar*, char*);
+extern ulong   parseip(uchar*, char*);
+extern ulong   parseipmask(uchar*, char*);
+extern char*   v4parseip(uchar*, char*);
+extern void    maskip(uchar *from, uchar *mask, uchar *to);
+extern int     parsemac(uchar *to, char *from, int len);
+extern uchar*  defmask(uchar*);
+extern int     isv4(uchar*);
+extern void    v4tov6(uchar *v6, uchar *v4);
+extern int     v6tov4(uchar *v4, uchar *v6);
+extern int     eipfmt(Fmt*);
+
+#define        ipmove(x, y) memmove(x, y, IPaddrlen)
+#define        ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define        NOW     TK2MS(MACHP(0)->ticks)
+
+/*
+ *  media
+ */
+extern Medium  ethermedium;
+extern Medium  nullmedium;
+extern Medium  pktmedium;
+extern Medium  tripmedium;
+
+/*
+ *  ipifc.c
+ */
+extern Medium* ipfindmedium(char *name);
+extern void    addipmedium(Medium *med);
+extern int     ipforme(Fs*, uchar *addr);
+extern int     iptentative(Fs*, uchar *addr);
+extern int     ipisbm(uchar *);
+extern int     ipismulticast(uchar *);
+extern Ipifc*  findipifc(Fs*, uchar *remote, int type);
+extern void    findprimaryip(Fs*, uchar*);
+extern void    findlocalip(Fs*, uchar *local, uchar *remote);
+extern int     ipv4local(Ipifc *ifc, uchar *addr);
+extern int     ipv6local(Ipifc *ifc, uchar *addr);
+extern int     ipv6anyloca