Adds the e1000, rtl8169, and ethermii drivers
authorBarret Rhoden <brho@cs.berkeley.edu>
Fri, 28 Mar 2014 01:27:25 +0000 (18:27 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Sat, 29 Mar 2014 01:17:06 +0000 (18:17 -0700)
Ported to Akaros already, from the old 9namespace tree and
external-devices repo.

kern/drivers/net/Kbuild
kern/drivers/net/ether8169.c [new file with mode: 0644]
kern/drivers/net/etherigbe.c [new file with mode: 0644]
kern/drivers/net/ethermii.c [new file with mode: 0644]
kern/drivers/net/ethermii.h [new file with mode: 0644]

index c3892e1..c90822b 100644 (file)
@@ -1,6 +1,9 @@
 # need at least one obj file to build for Kbuild
 obj-y                                                  += dummy.o
 obj-y                                                  += ether8139.o
+obj-y                                                  += ether8169.o
+obj-y                                                  += ethermii.o
+obj-y                                                  += etherigbe.o
 # Following are from gpxe and need a lot of help.
 #obj-y                                                 += mii.o
 #obj-y                                                 += r8169.o
diff --git a/kern/drivers/net/ether8169.c b/kern/drivers/net/ether8169.c
new file mode 100644 (file)
index 0000000..a995a84
--- /dev/null
@@ -0,0 +1,1239 @@
+/* This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file. */
+
+/*
+ * Realtek RTL8110S/8169S.
+ * Mostly there. There are some magic register values used
+ * which are not described in any datasheet or driver but seem
+ * to be necessary.
+ * No tuning has been done. Only tested on an RTL8110S, there
+ * are slight differences between the chips in the series so some
+ * tweaks may be needed.
+ */
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <cpio.h>
+#include <pmap.h>
+#include <smp.h>
+#include <arch/pci.h>
+#include <assert.h>
+#include <ip.h>
+#include <ns.h>
+
+#define ilock(x) spin_lock_irqsave(x)
+#define iunlock(x) spin_unlock_irqsave(x)
+
+#include "ethermii.h"
+
+#define HOWMANY(x, y)  (((x)+((y)-1))/(y))
+enum {                                 /* registers */
+       Idr0            = 0x00,         /* MAC address */
+       Mar0            = 0x08,         /* Multicast address */
+       Dtccr           = 0x10,         /* Dump Tally Counter Command */
+       Tnpds           = 0x20,         /* Transmit Normal Priority Descriptors */
+       Thpds           = 0x28,         /* Transmit High Priority Descriptors */
+       Flash           = 0x30,         /* Flash Memory Read/Write */
+       Erbcr           = 0x34,         /* Early Receive Byte Count */
+       Ersr            = 0x36,         /* Early Receive Status */
+       Cr              = 0x37,         /* Command Register */
+       Tppoll          = 0x38,         /* Transmit Priority Polling */
+       Imr             = 0x3C,         /* Interrupt Mask */
+       Isr             = 0x3E,         /* Interrupt Status */
+       Tcr             = 0x40,         /* Transmit Configuration */
+       Rcr             = 0x44,         /* Receive Configuration */
+       Tctr            = 0x48,         /* Timer Count */
+       Mpc             = 0x4C,         /* Missed Packet Counter */
+       Cr9346          = 0x50,         /* 9346 Command Register */
+       Config0         = 0x51,         /* Configuration Register 0 */
+       Config1         = 0x52,         /* Configuration Register 1 */
+       Config2         = 0x53,         /* Configuration Register 2 */
+       Config3         = 0x54,         /* Configuration Register 3 */
+       Config4         = 0x55,         /* Configuration Register 4 */
+       Config5         = 0x56,         /* Configuration Register 5 */
+       Timerint        = 0x58,         /* Timer Interrupt */
+       Mulint          = 0x5C,         /* Multiple Interrupt Select */
+       Phyar           = 0x60,         /* PHY Access */
+       Tbicsr0         = 0x64,         /* TBI Control and Status */
+       Tbianar         = 0x68,         /* TBI Auto-Negotiation Advertisment */
+       Tbilpar         = 0x6A,         /* TBI Auto-Negotiation Link Partner */
+
+       Rms             = 0xDA,         /* Receive Packet Maximum Size */
+       Cplusc          = 0xE0,         /* C+ Command */
+       Rdsar           = 0xE4,         /* Receive Descriptor Start Address */
+       Mtps            = 0xEC,         /* Max. Transmit Packet Size */
+};
+
+enum {                                 /* Dtccr */
+       Cmd             = 0x00000008,   /* Command */
+};
+
+enum {                                 /* Cr */
+       Te              = 0x04,         /* Transmitter Enable */
+       Re              = 0x08,         /* Receiver Enable */
+       Rst             = 0x10,         /* Software Reset */
+};
+
+enum {                                 /* Tppoll */
+       Fswint          = 0x01,         /* Forced Software Interrupt */
+       Npq             = 0x40,         /* Normal Priority Queue polling */
+       Hpq             = 0x80,         /* High Priority Queue polling */
+};
+
+enum {                                 /* Imr/Isr */
+       Rok             = 0x0001,       /* Receive OK */
+       Rer             = 0x0002,       /* Receive Error */
+       Tok             = 0x0004,       /* Transmit OK */
+       Ter             = 0x0008,       /* Transmit Error */
+       Rdu             = 0x0010,       /* Receive Descriptor Unavailable */
+       Punlc           = 0x0020,       /* Packet Underrun or Link Change */
+       Fovw            = 0x0040,       /* Receive FIFO Overflow */
+       Tdu             = 0x0080,       /* Transmit Descriptor Unavailable */
+       Swint           = 0x0100,       /* Software Interrupt */
+       Timeout         = 0x4000,       /* Timer */
+       Serr            = 0x8000,       /* System Error */
+};
+
+enum {                                 /* Tcr */
+       MtxdmaSHIFT     = 8,            /* Max. DMA Burst Size */
+       MtxdmaMASK      = 0x00000700,
+       Mtxdmaunlimited = 0x00000700,
+       Acrc            = 0x00010000,   /* Append CRC (not) */
+       Lbk0            = 0x00020000,   /* Loopback Test 0 */
+       Lbk1            = 0x00040000,   /* Loopback Test 1 */
+       Ifg2            = 0x00080000,   /* Interframe Gap 2 */
+       HwveridSHIFT    = 23,           /* Hardware Version ID */
+       HwveridMASK     = 0x7C800000,
+       Macv01          = 0x00000000,   /* RTL8169 */
+       Macv02          = 0x00800000,   /* RTL8169S/8110S */
+       Macv03          = 0x04000000,   /* RTL8169S/8110S */
+       Macv04          = 0x10000000,   /* RTL8169SB/8110SB */
+       Macv05          = 0x18000000,   /* RTL8169SC/8110SC */
+       Macv11          = 0x30000000,   /* RTL8168B/8111B */
+       Macv12          = 0x38000000,   /* RTL8169B/8111B */
+       Macv13          = 0x34000000,   /* RTL8101E */
+       Macv14          = 0x30800000,   /* RTL8100E */
+       Macv15          = 0x38800000,   /* RTL8100E */
+       Ifg0            = 0x01000000,   /* Interframe Gap 0 */
+       Ifg1            = 0x02000000,   /* Interframe Gap 1 */
+};
+
+enum {                                 /* Rcr */
+       Aap             = 0x00000001,   /* Accept All Packets */
+       Apm             = 0x00000002,   /* Accept Physical Match */
+       Am              = 0x00000004,   /* Accept Multicast */
+       Ab              = 0x00000008,   /* Accept Broadcast */
+       Ar              = 0x00000010,   /* Accept Runt */
+       Aer             = 0x00000020,   /* Accept Error */
+       Sel9356         = 0x00000040,   /* 9356 EEPROM used */
+       MrxdmaSHIFT     = 8,            /* Max. DMA Burst Size */
+       MrxdmaMASK      = 0x00000700,
+       Mrxdmaunlimited = 0x00000700,
+       RxfthSHIFT      = 13,           /* Receive Buffer Length */
+       RxfthMASK       = 0x0000E000,
+       Rxfth256        = 0x00008000,
+       Rxfthnone       = 0x0000E000,
+       Rer8            = 0x00010000,   /* Accept Error Packets > 8 bytes */
+       MulERINT        = 0x01000000,   /* Multiple Early Interrupt Select */
+};
+
+enum {                                 /* Cr9346 */
+       Eedo            = 0x01,         /* */
+       Eedi            = 0x02,         /* */
+       Eesk            = 0x04,         /* */
+       Eecs            = 0x08,         /* */
+       Eem0            = 0x40,         /* Operating Mode */
+       Eem1            = 0x80,
+};
+
+enum {                                 /* Phyar */
+       DataMASK        = 0x0000FFFF,   /* 16-bit GMII/MII Register Data */
+       DataSHIFT       = 0,
+       RegaddrMASK     = 0x001F0000,   /* 5-bit GMII/MII Register Address */
+       RegaddrSHIFT    = 16,
+       Flag            = 0x80000000,   /* */
+};
+
+enum {                                 /* Cplusc */
+       Mulrw           = 0x0008,       /* PCI Multiple R/W Enable */
+       Dac             = 0x0010,       /* PCI Dual Address Cycle Enable */
+       Rxchksum        = 0x0020,       /* Receive Checksum Offload Enable */
+       Rxvlan          = 0x0040,       /* Receive VLAN De-tagging Enable */
+       Endian          = 0x0200,       /* Endian Mode */
+};
+
+typedef struct D D;                    /* Transmit/Receive Descriptor */
+struct D {
+       uint32_t        control;
+       uint32_t        vlan;
+       uint32_t        addrlo;
+       uint32_t        addrhi;
+};
+
+enum {                                 /* Transmit Descriptor control */
+       TxflMASK        = 0x0000FFFF,   /* Transmit Frame Length */
+       TxflSHIFT       = 0,
+       Tcps            = 0x00010000,   /* TCP Checksum Offload */
+       Udpcs           = 0x00020000,   /* UDP Checksum Offload */
+       Ipcs            = 0x00040000,   /* IP Checksum Offload */
+       Lgsen           = 0x08000000,   /* Large Send */
+};
+
+enum {                                 /* Receive Descriptor control */
+       RxflMASK        = 0x00003FFF,   /* Receive Frame Length */
+       RxflSHIFT       = 0,
+       Tcpf            = 0x00004000,   /* TCP Checksum Failure */
+       Udpf            = 0x00008000,   /* UDP Checksum Failure */
+       Ipf             = 0x00010000,   /* IP Checksum Failure */
+       Pid0            = 0x00020000,   /* Protocol ID0 */
+       Pid1            = 0x00040000,   /* Protocol ID1 */
+       Crce            = 0x00080000,   /* CRC Error */
+       Runt            = 0x00100000,   /* Runt Packet */
+       Res             = 0x00200000,   /* Receive Error Summary */
+       Rwt             = 0x00400000,   /* Receive Watchdog Timer Expired */
+       Fovf            = 0x00800000,   /* FIFO Overflow */
+       Bovf            = 0x01000000,   /* Buffer Overflow */
+       Bar             = 0x02000000,   /* Broadcast Address Received */
+       Pam             = 0x04000000,   /* Physical Address Matched */
+       Mar             = 0x08000000,   /* Multicast Address Received */
+};
+
+enum {                                 /* General Descriptor control */
+       Ls              = 0x10000000,   /* Last Segment Descriptor */
+       Fs              = 0x20000000,   /* First Segment Descriptor */
+       Eor             = 0x40000000,   /* End of Descriptor Ring */
+       Own             = 0x80000000,   /* Ownership */
+};
+
+/*
+ */
+enum {                                 /* Ring sizes  (<= 1024) */
+       Ntd             = 32,           /* Transmit Ring */
+       Nrd             = 128,          /* Receive Ring */
+};
+
+#define Mps ROUNDUP(ETHERMAXTU + 4, 128)
+
+typedef struct Dtcc Dtcc;
+struct Dtcc {
+       uint64_t        txok;
+       uint64_t        rxok;
+       uint64_t        txer;
+       uint32_t        rxer;
+       uint16_t        misspkt;
+       uint16_t        fae;
+       uint32_t        tx1col;
+       uint32_t        txmcol;
+       uint64_t        rxokph;
+       uint64_t        rxokbrd;
+       uint32_t        rxokmu;
+       uint16_t        txabt;
+       uint16_t        txundrn;
+};
+
+enum {                                         /* Variants */
+       Rtl8100e        = (0x8136<<16)|0x10EC,  /* RTL810[01]E: pci -e */
+       Rtl8169c        = (0x0116<<16)|0x16EC,  /* RTL8169C+ (USR997902) */
+       Rtl8169sc       = (0x8167<<16)|0x10EC,  /* RTL8169SC */
+       Rtl8168b        = (0x8168<<16)|0x10EC,  /* RTL8168B: pci-e */
+       Rtl8169         = (0x8169<<16)|0x10EC,  /* RTL8169 */
+};
+
+struct ctlr {
+       int     port;
+       struct pci_device *pci;
+       struct ctlr*    next;
+       int     active;
+
+       qlock_t alock;                  /* attach */
+       spinlock_t      ilock;                  /* init */
+       int     init;                   /*  */
+
+       int     pciv;                   /*  */
+       int     macv;                   /* MAC version */
+       int     phyv;                   /* PHY version */
+       int     pcie;                   /* flag: pci-express device? */
+
+       uint64_t        mchash;                 /* multicast hash */
+
+       struct mii*     mii;
+
+       spinlock_t      tlock;                  /* transmit */
+       D*      td;                     /* descriptor ring */
+       struct block**  tb;                     /* transmit buffers */
+       int     ntd;
+
+       int     tdh;                    /* head - producer index (host) */
+       int     tdt;                    /* tail - consumer index (NIC) */
+       int     ntdfree;
+       int     ntq;
+
+       int     mtps;                   /* Max. Transmit Packet Size */
+
+       spinlock_t      rlock;                  /* receive */
+       D*      rd;                     /* descriptor ring */
+       struct block**  rb;                     /* receive buffers */
+       int     nrd;
+
+       int     rdh;                    /* head - producer index (NIC) */
+       int     rdt;                    /* tail - consumer index (host) */
+       int     nrdfree;
+
+       int     tcr;                    /* transmit configuration register */
+       int     rcr;                    /* receive configuration register */
+       int     imr;
+
+       qlock_t slock;                  /* statistics */
+       Dtcc*   dtcc;
+       unsigned int    txdu;
+       unsigned int    tcpf;
+       unsigned int    udpf;
+       unsigned int    ipf;
+       unsigned int    fovf;
+       unsigned int    ierrs;
+       unsigned int    rer;
+       unsigned int    rdu;
+       unsigned int    punlc;
+       unsigned int    fovw;
+       unsigned int    mcast;
+};
+
+static struct ctlr* rtl8169ctlrhead;
+static struct ctlr* rtl8169ctlrtail;
+
+#define csr8r(c, r)    (inb((c)->port+(r)))
+#define csr16r(c, r)   (inw((c)->port+(r)))
+#define csr32r(c, r)   (inl((c)->port+(r)))
+#define csr8w(c, r, b) (outb((c)->port+(r), (uint8_t)(b)))
+#define csr16w(c, r, w)        (outw((c)->port+(r), (uint16_t)(w)))
+#define csr32w(c, r, l)        (outl((c)->port+(r), (uint32_t)(l)))
+
+static int
+rtl8169miimir(struct ctlr* ctlr, int pa, int ra)
+{
+       unsigned int r;
+       int timeo;
+
+       if(pa != 1)
+               return -1;
+
+       r = (ra<<16) & RegaddrMASK;
+       csr32w(ctlr, Phyar, r);
+       udelay(1000*1);
+       for(timeo = 0; timeo < 2000; timeo++){
+               if((r = csr32r(ctlr, Phyar)) & Flag)
+                       break;
+               udelay(100);
+       }
+       if(!(r & Flag))
+               return -1;
+
+       return (r & DataMASK)>>DataSHIFT;
+}
+
+static int
+rtl8169miimiw(struct ctlr* ctlr, int pa, int ra, int data)
+{
+       unsigned int r;
+       int timeo;
+
+       if(pa != 1)
+               return -1;
+
+       r = Flag|((ra<<16) & RegaddrMASK)|((data<<DataSHIFT) & DataMASK);
+       csr32w(ctlr, Phyar, r);
+       udelay(1000*1);
+       for(timeo = 0; timeo < 2000; timeo++){
+               if(!((r = csr32r(ctlr, Phyar)) & Flag))
+                       break;
+               udelay(100);
+       }
+       if(r & Flag)
+               return -1;
+
+       return 0;
+}
+
+static int
+rtl8169miirw(struct mii* mii, int write, int pa, int ra, int data)
+{
+       if(write)
+               return rtl8169miimiw(mii->ctlr, pa, ra, data);
+
+       return rtl8169miimir(mii->ctlr, pa, ra);
+}
+
+static struct mii*
+rtl8169mii(struct ctlr* ctlr)
+{
+       struct mii* mii;
+       struct miiphy *phy;
+
+       /*
+        * Link management.
+        *
+        * Get rev number out of Phyidr2 so can config properly.
+        * There's probably more special stuff for Macv0[234] needed here.
+        */
+       ctlr->phyv = rtl8169miimir(ctlr, 1, Phyidr2) & 0x0F;
+       if(ctlr->macv == Macv02){
+               csr8w(ctlr, 0x82, 1);                           /* magic */
+               rtl8169miimiw(ctlr, 1, 0x0B, 0x0000);           /* magic */
+       }
+       if((mii = miiattach(ctlr, (1<<1), rtl8169miirw)) == NULL)
+               return NULL;
+
+       phy = mii->curphy;
+       printd("oui %#ux phyno %d, macv = %#8.8ux phyv = %#4.4ux\n",
+               phy->oui, phy->phyno, ctlr->macv, ctlr->phyv);
+
+       if(miistatus(mii) < 0){
+               miireset(mii);
+               miiane(mii, ~0, ~0, ~0);
+       }
+
+       return mii;
+}
+
+static void
+rtl8169promiscuous(void* arg, int on)
+{
+       struct ether *edev;
+       struct ctlr * ctlr;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+       ilock(&ctlr->ilock);
+
+       if(on)
+               ctlr->rcr |= Aap;
+       else
+               ctlr->rcr &= ~Aap;
+       csr32w(ctlr, Rcr, ctlr->rcr);
+       iunlock(&ctlr->ilock);
+}
+
+enum {
+       /* everyone else uses 0x04c11db7, but they both produce the same crc */
+       Etherpolybe = 0x04c11db6,
+       Bytemask = (1<<8) - 1,
+};
+
+static uint32_t
+ethercrcbe(uint8_t *addr, long len)
+{
+       int i, j;
+       uint32_t c, crc, carry;
+
+       crc = ~0U;
+       for (i = 0; i < len; i++) {
+               c = addr[i];
+               for (j = 0; j < 8; j++) {
+                       carry = ((crc & (1UL << 31))? 1: 0) ^ (c & 1);
+                       crc <<= 1;
+                       c >>= 1;
+                       if (carry)
+                               crc = (crc ^ Etherpolybe) | carry;
+               }
+       }
+       return crc;
+}
+
+static uint32_t
+swabl(uint32_t l)
+{
+       return (l>>24) | ((l>>8) & (Bytemask<<8)) |
+               ((l<<8) & (Bytemask<<16)) | (l<<24);
+}
+
+static void
+rtl8169multicast(void* ether, uint8_t *eaddr, int add)
+{
+       struct ether *edev;
+       struct ctlr *ctlr;
+
+       if (!add)
+               return; /* ok to keep receiving on old mcast addrs */
+
+       edev = ether;
+       ctlr = edev->ctlr;
+       ilock(&ctlr->ilock);
+
+       ctlr->mchash |= 1ULL << (ethercrcbe(eaddr, Eaddrlen) >> 26);
+
+       ctlr->rcr |= Am;
+       csr32w(ctlr, Rcr, ctlr->rcr);
+
+       /* pci-e variants reverse the order of the hash byte registers */
+       if (ctlr->pcie) {
+               csr32w(ctlr, Mar0,   swabl(ctlr->mchash>>32));
+               csr32w(ctlr, Mar0+4, swabl(ctlr->mchash));
+       } else {
+               csr32w(ctlr, Mar0,   ctlr->mchash);
+               csr32w(ctlr, Mar0+4, ctlr->mchash>>32);
+       }
+
+       iunlock(&ctlr->ilock);
+}
+
+static long
+rtl8169ifstat(struct ether* edev, void* a, long n, uint32_t offset)
+{
+       ERRSTACK(2);
+       struct ctlr *ctlr;
+       Dtcc *dtcc;
+       int timeo;
+       char *alloc, *e, *p;
+
+       ctlr = edev->ctlr;
+       qlock(&ctlr->slock);
+
+       alloc = NULL;
+       if(waserror()){
+               qunlock(&ctlr->slock);
+               kfree(alloc);
+               nexterror();
+       }
+
+       csr32w(ctlr, Dtccr+4, 0);
+       csr32w(ctlr, Dtccr, paddr_low32(ctlr->dtcc)|Cmd);
+       for(timeo = 0; timeo < 1000; timeo++){
+               if(!(csr32r(ctlr, Dtccr) & Cmd))
+                       break;
+               udelay(1000*1);
+       }
+       if(csr32r(ctlr, Dtccr) & Cmd)
+               error(Eio);
+       dtcc = ctlr->dtcc;
+
+       edev->netif.oerrs = dtcc->txer;
+       edev->netif.crcs = dtcc->rxer;
+       edev->netif.frames = dtcc->fae;
+       edev->netif.buffs = dtcc->misspkt;
+       edev->netif.overflows = ctlr->txdu+ctlr->rdu;
+
+       if(n == 0){
+               qunlock(&ctlr->slock);
+               poperror();
+               return 0;
+       }
+
+       if((alloc = kzmalloc(READSTR, 0)) == NULL)
+               error(Enomem);
+       e = alloc+READSTR;
+
+       p = seprintf(alloc, e, "TxOk: %llu\n", dtcc->txok);
+       p = seprintf(p, e, "RxOk: %llu\n", dtcc->rxok);
+       p = seprintf(p, e, "TxEr: %llu\n", dtcc->txer);
+       p = seprintf(p, e, "RxEr: %u\n", dtcc->rxer);
+       p = seprintf(p, e, "MissPkt: %u\n", dtcc->misspkt);
+       p = seprintf(p, e, "FAE: %u\n", dtcc->fae);
+       p = seprintf(p, e, "Tx1Col: %u\n", dtcc->tx1col);
+       p = seprintf(p, e, "TxMCol: %u\n", dtcc->txmcol);
+       p = seprintf(p, e, "RxOkPh: %llu\n", dtcc->rxokph);
+       p = seprintf(p, e, "RxOkBrd: %llu\n", dtcc->rxokbrd);
+       p = seprintf(p, e, "RxOkMu: %u\n", dtcc->rxokmu);
+       p = seprintf(p, e, "TxAbt: %u\n", dtcc->txabt);
+       p = seprintf(p, e, "TxUndrn: %u\n", dtcc->txundrn);
+
+       p = seprintf(p, e, "txdu: %u\n", ctlr->txdu);
+       p = seprintf(p, e, "tcpf: %u\n", ctlr->tcpf);
+       p = seprintf(p, e, "udpf: %u\n", ctlr->udpf);
+       p = seprintf(p, e, "ipf: %u\n", ctlr->ipf);
+       p = seprintf(p, e, "fovf: %u\n", ctlr->fovf);
+       p = seprintf(p, e, "ierrs: %u\n", ctlr->ierrs);
+       p = seprintf(p, e, "rer: %u\n", ctlr->rer);
+       p = seprintf(p, e, "rdu: %u\n", ctlr->rdu);
+       p = seprintf(p, e, "punlc: %u\n", ctlr->punlc);
+       p = seprintf(p, e, "fovw: %u\n", ctlr->fovw);
+
+       p = seprintf(p, e, "tcr: 0x%#8.8u\n", ctlr->tcr);
+       p = seprintf(p, e, "rcr: 0x%#8.8u\n", ctlr->rcr);
+       p = seprintf(p, e, "multicast: %u\n", ctlr->mcast);
+
+       if(ctlr->mii != NULL && ctlr->mii->curphy != NULL)
+               miidumpphy(ctlr->mii, p, e);
+
+       n = readstr(offset, a, n, alloc);
+
+       qunlock(&ctlr->slock);
+       poperror();
+       kfree(alloc);
+
+       return n;
+}
+
+static void
+rtl8169halt(struct ctlr* ctlr)
+{
+       csr8w(ctlr, Cr, 0);
+       csr16w(ctlr, Imr, 0);
+       csr16w(ctlr, Isr, ~0);
+}
+
+static int
+rtl8169reset(struct ctlr* ctlr)
+{
+       uint32_t r;
+       int timeo;
+
+       /*
+        * Soft reset the controller.
+        */
+       csr8w(ctlr, Cr, Rst);
+       for(r = timeo = 0; timeo < 1000; timeo++){
+               r = csr8r(ctlr, Cr);
+               if(!(r & Rst))
+                       break;
+               udelay(1000*1);
+       }
+       rtl8169halt(ctlr);
+
+       if(r & Rst)
+               return -1;
+       return 0;
+}
+
+static void
+rtl8169replenish(struct ctlr* ctlr)
+{
+       D *d;
+       int rdt;
+       struct block *bp;
+
+       rdt = ctlr->rdt;
+       while(NEXT_RING(rdt, ctlr->nrd) != ctlr->rdh){
+               d = &ctlr->rd[rdt];
+               if(ctlr->rb[rdt] == NULL){
+                       /*
+                        * Simple allocation for now.
+                        * This better be aligned on 8.
+                        */
+                       bp = iallocb(Mps);
+                       if(bp == NULL){
+                               printk("no available buffers\n");
+                               break;
+                       }
+                       ctlr->rb[rdt] = bp;
+                       d->addrlo = paddr_low32(bp->rp);
+                       d->addrhi = paddr_high32(bp->rp);
+               }
+               wmb();
+               d->control |= Own|Mps;
+               rdt = NEXT_RING(rdt, ctlr->nrd);
+               ctlr->nrdfree++;
+       }
+       ctlr->rdt = rdt;
+}
+
+static int
+rtl8169init(struct ether* edev)
+{
+       int i;
+       uint32_t r;
+       struct block *bp;
+       struct ctlr *ctlr;
+       uint8_t cplusc;
+
+       ctlr = edev->ctlr;
+       ilock(&ctlr->ilock);
+
+       rtl8169halt(ctlr);
+
+       /*
+        * MAC Address.
+        * Must put chip into config register write enable mode.
+        */
+       csr8w(ctlr, Cr9346, Eem1|Eem0);
+       r = (edev->ea[3]<<24)|(edev->ea[2]<<16)|(edev->ea[1]<<8)|edev->ea[0];
+       csr32w(ctlr, Idr0, r);
+       r = (edev->ea[5]<<8)|edev->ea[4];
+       csr32w(ctlr, Idr0+4, r);
+
+       /*
+        * Transmitter.
+        */
+       memset(ctlr->td, 0, sizeof(D)*ctlr->ntd);
+       ctlr->tdh = ctlr->tdt = 0;
+       ctlr->td[ctlr->ntd-1].control = Eor;
+
+       /*
+        * Receiver.
+        * Need to do something here about the multicast filter.
+        */
+       memset(ctlr->rd, 0, sizeof(D)*ctlr->nrd);
+       ctlr->nrdfree = ctlr->rdh = ctlr->rdt = 0;
+       ctlr->rd[ctlr->nrd-1].control = Eor;
+
+       for(i = 0; i < ctlr->nrd; i++){
+               if((bp = ctlr->rb[i]) != NULL){
+                       ctlr->rb[i] = NULL;
+                       freeb(bp);
+               }
+       }
+       rtl8169replenish(ctlr);
+       ctlr->rcr = Rxfthnone|Mrxdmaunlimited|Ab|Am|Apm;
+
+       /*
+        * Mtps is in units of 128 except for the RTL8169
+        * where is is 32. If using jumbo frames should be
+        * set to 0x3F.
+        * Setting Mulrw in Cplusc disables the Tx/Rx DMA burst
+        * settings in Tcr/Rcr; the (1<<14) is magic.
+        */
+       ctlr->mtps = HOWMANY(Mps, 128);
+       cplusc = csr16r(ctlr, Cplusc) & ~(1<<14);
+       cplusc |= /*Rxchksum|*/Mulrw;
+       switch(ctlr->macv){
+       default:
+               printd("rtl8169: unsupported macv %#ux\n", ctlr->macv);
+               break;  /* perhaps it just works */
+       case Macv01:
+               ctlr->mtps = HOWMANY(Mps, 32);
+               break;
+       case Macv02:
+       case Macv03:
+               cplusc |= (1<<14);                      /* magic */
+               break;
+       case Macv05:
+               /*
+                * This is interpreted from clearly bogus code
+                * in the manufacturer-supplied driver, it could
+                * be wrong. Untested.
+                */
+               printk("untested\n");
+               break;
+#if 0        
+               r = csr8r(ctlr, Config2) & 0x07;
+               if(r == 0x01)                           /* 66MHz PCI */
+                       csr32w(ctlr, 0x7C, 0x0007FFFF); /* magic */
+               else
+                       csr32w(ctlr, 0x7C, 0x0007FF00); /* magic */
+               pciclrmwi(ctlr->pcidev);
+#endif
+               break;
+       case Macv13:
+               printk("untested macv13 write\n");
+               break;
+#if 0
+               /*
+                * This is interpreted from clearly bogus code
+                * in the manufacturer-supplied driver, it could
+                * be wrong. Untested.
+                */
+               pcicfgw8(ctlr->pcidev, 0x68, 0x00);     /* magic */
+               pcicfgw8(ctlr->pcidev, 0x69, 0x08);     /* magic */
+               break;
+#endif
+       case Macv04:
+       case Macv11:
+       case Macv12:
+       case Macv14:
+       case Macv15:
+               break;
+       }
+
+       /*
+        * Enable receiver/transmitter.
+        * Need to do this first or some of the settings below
+        * won't take.
+        */
+       switch(ctlr->pciv){
+       default:
+               csr8w(ctlr, Cr, Te|Re);
+               csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+               csr32w(ctlr, Rcr, ctlr->rcr);
+               csr32w(ctlr, Mar0,   0);
+               csr32w(ctlr, Mar0+4, 0);
+               ctlr->mchash = 0;
+       case Rtl8169sc:
+       case Rtl8168b:
+               break;
+       }
+
+       /*
+        * Interrupts.
+        * Disable Tdu|Tok for now, the transmit routine will tidy.
+        * Tdu means the NIC ran out of descriptors to send, so it
+        * doesn't really need to ever be on.
+        */
+       csr32w(ctlr, Timerint, 0);
+       ctlr->imr = Serr|Timeout|Fovw|Punlc|Rdu|Ter|Rer|Rok;
+       csr16w(ctlr, Imr, ctlr->imr);
+
+       /*
+        * Clear missed-packet counter;
+        * initial early transmit threshold value;
+        * set the descriptor ring base addresses;
+        * set the maximum receive packet size;
+        * no early-receive interrupts.
+        */
+       csr32w(ctlr, Mpc, 0);
+       csr8w(ctlr, Mtps, ctlr->mtps);
+       csr32w(ctlr, Tnpds + 4, paddr_high32(ctlr->td));
+       csr32w(ctlr, Tnpds, paddr_low32(ctlr->td));
+       csr32w(ctlr, Rdsar + 4, paddr_high32(ctlr->rd));
+       csr32w(ctlr, Rdsar, paddr_low32(ctlr->rd));
+       csr16w(ctlr, Rms, Mps);
+       r = csr16r(ctlr, Mulint) & 0xF000;
+       csr16w(ctlr, Mulint, r);
+       csr16w(ctlr, Cplusc, cplusc);
+
+       /*
+        * Set configuration.
+        */
+       switch(ctlr->pciv){
+       default:
+               break;
+       case Rtl8169sc:
+               csr16w(ctlr, 0xE2, 0);                  /* magic */
+               csr8w(ctlr, Cr, Te|Re);
+               csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+               csr32w(ctlr, Rcr, ctlr->rcr);
+               break;
+       case Rtl8168b:
+       case Rtl8169c:
+               csr16w(ctlr, 0xE2, 0);                  /* magic */
+               csr16w(ctlr, Cplusc, 0x2000);           /* magic */
+               csr8w(ctlr, Cr, Te|Re);
+               csr32w(ctlr, Tcr, Ifg1|Ifg0|Mtxdmaunlimited);
+               csr32w(ctlr, Rcr, ctlr->rcr);
+               csr16w(ctlr, Rms, 0x0800);
+               csr8w(ctlr, Mtps, 0x3F);
+               break;
+       }
+       ctlr->tcr = csr32r(ctlr, Tcr);
+       csr8w(ctlr, Cr9346, 0);
+
+       iunlock(&ctlr->ilock);
+
+//     rtl8169mii(ctlr);
+
+       return 0;
+}
+
+static void
+rtl8169attach(struct ether* edev)
+{
+       int timeo;
+       struct ctlr *ctlr;
+       struct miiphy *phy;
+
+       ctlr = edev->ctlr;
+       qlock(&ctlr->alock);
+       if(ctlr->init == 0){
+               /*
+                * Handle allocation/init errors here.
+                */
+               ctlr->td = kzmalloc_align(sizeof(D) * Ntd, KMALLOC_WAIT, 256);
+               ctlr->tb = kzmalloc(Ntd * sizeof(struct block *), KMALLOC_WAIT);
+               ctlr->ntd = Ntd;
+               ctlr->rd = kzmalloc_align(sizeof(D) * Nrd, KMALLOC_WAIT, 256);
+               ctlr->rb = kzmalloc(Nrd * sizeof(struct block *), KMALLOC_WAIT);
+               ctlr->nrd = Nrd;
+               ctlr->dtcc = kzmalloc_align(sizeof(Dtcc), KMALLOC_WAIT, 64);
+               rtl8169init(edev);
+               ctlr->init = 1;
+       }
+       qunlock(&ctlr->alock);
+
+       /*
+        * Wait for link to be ready.
+        */
+       for(timeo = 0; timeo < 350; timeo++){
+               if(miistatus(ctlr->mii) == 0)
+                       break;
+               udelay_sched(10000);
+       }
+       phy = ctlr->mii->curphy;
+       printd("%s: speed %d fd %d link %d rfc %d tfc %d\n",
+               edev->netif.name, phy->speed, phy->fd, phy->link, phy->rfc, phy->tfc);
+}
+
+static void
+rtl8169link(struct ether* edev)
+{
+       int limit;
+       struct ctlr *ctlr;
+       struct miiphy *phy;
+
+       ctlr = edev->ctlr;
+
+       /*
+        * Maybe the link changed - do we care very much?
+        * Could stall transmits if no link, maybe?
+        */
+       if(ctlr->mii == NULL || ctlr->mii->curphy == NULL)
+               return;
+
+       phy = ctlr->mii->curphy;
+       if(miistatus(ctlr->mii) < 0){
+               // TODO : no name here
+               printk("%slink n: speed %d fd %d link %d rfc %d tfc %d\n",
+                       edev->netif.name, phy->speed, phy->fd, phy->link,
+                       phy->rfc, phy->tfc);
+               edev->netif.link = 0;
+               return;
+       }
+       edev->netif.link = 1;
+
+       limit = 256*1024;
+       if(phy->speed == 10){
+               edev->netif.mbps = 10;
+               limit = 65*1024;
+       }
+       else if(phy->speed == 100)
+               edev->netif.mbps = 100;
+       else if(phy->speed == 1000)
+               edev->netif.mbps = 1000;
+       printk("%slink y: speed %d fd %d link %d rfc %d tfc %d\n",
+               edev->netif.name, phy->speed, phy->fd, phy->link,
+               phy->rfc, phy->tfc);
+
+       if(edev->oq != NULL)
+               qsetlimit(edev->oq, limit);
+}
+
+static void
+rtl8169transmit(struct ether* edev)
+{
+       D *d;
+       struct block *bp;
+       struct ctlr *ctlr;
+       int control, x;
+
+       ctlr = edev->ctlr;
+
+       ilock(&ctlr->tlock);
+       for(x = ctlr->tdh; ctlr->ntq > 0; x = NEXT_RING(x, ctlr->ntd)){
+               d = &ctlr->td[x];
+               if((control = d->control) & Own)
+                       break;
+
+               /*
+                * Check errors and log here.
+                */
+
+               /*
+                * Free it up.
+                * Need to clean the descriptor here? Not really.
+                * Simple freeb for now (no chain and freeblist).
+                * Use ntq count for now.
+                */
+               freeb(ctlr->tb[x]);
+               ctlr->tb[x] = NULL;
+               d->control &= Eor;
+
+               ctlr->ntq--;
+       }
+       ctlr->tdh = x;
+
+       x = ctlr->tdt;
+       while(ctlr->ntq < (ctlr->ntd-1)){
+               if((bp = qget(edev->oq)) == NULL)
+                       break;
+
+               d = &ctlr->td[x];
+               d->addrlo = paddr_low32(bp->rp);
+               d->addrhi = paddr_high32(bp->rp);
+               ctlr->tb[x] = bp;
+               wmb();
+               d->control |= Own|Fs|Ls|((BLEN(bp)<<TxflSHIFT) & TxflMASK);
+
+               x = NEXT_RING(x, ctlr->ntd);
+               ctlr->ntq++;
+       }
+       if(x != ctlr->tdt){
+               ctlr->tdt = x;
+               csr8w(ctlr, Tppoll, Npq);
+       }
+       else if(ctlr->ntq >= (ctlr->ntd-1))
+               ctlr->txdu++;
+
+       iunlock(&ctlr->tlock);
+}
+
+static void
+rtl8169receive(struct ether* edev)
+{
+       D *d;
+       int rdh;
+       struct block *bp;
+       struct ctlr *ctlr;
+       uint32_t control;
+
+       ctlr = edev->ctlr;
+
+       rdh = ctlr->rdh;
+       for(;;){
+               d = &ctlr->rd[rdh];
+
+               if(d->control & Own)
+                       break;
+
+               control = d->control;
+               if((control & (Fs|Ls|Res)) == (Fs|Ls)){
+                       bp = ctlr->rb[rdh];
+                       ctlr->rb[rdh] = NULL;
+                       bp->wp = bp->rp + ((control & RxflMASK)>>RxflSHIFT)-4;
+                       bp->next = NULL;
+
+                       if(control & Fovf)
+                               ctlr->fovf++;
+                       if(control & Mar)
+                               ctlr->mcast++;
+
+                       switch(control & (Pid1|Pid0)){
+                       default:
+                               break;
+                       case Pid0:
+                               if(control & Tcpf){
+                                       ctlr->tcpf++;
+                                       break;
+                               }
+                               bp->flag |= Btcpck;
+                               break;
+                       case Pid1:
+                               if(control & Udpf){
+                                       ctlr->udpf++;
+                                       break;
+                               }
+                               bp->flag |= Budpck;
+                               break;
+                       case Pid1|Pid0:
+                               if(control & Ipf){
+                                       ctlr->ipf++;
+                                       break;
+                               }
+                               bp->flag |= Bipck;
+                               break;
+                       }
+                       etheriq(edev, bp, 1);
+               }
+               else{
+                       /*
+                        * Error stuff here.
+                       print("control %#8.8ux\n", control);
+                        */
+               }
+               d->control &= Eor;
+               ctlr->nrdfree--;
+               rdh = NEXT_RING(rdh, ctlr->nrd);
+
+               if(ctlr->nrdfree < ctlr->nrd/2)
+                       rtl8169replenish(ctlr);
+       }
+       ctlr->rdh = rdh;
+}
+
+static void
+rtl8169interrupt(struct hw_trapframe *hw_tf, void *arg)
+{
+       struct ctlr *ctlr;
+       struct ether *edev;
+       uint32_t isr;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+
+       while((isr = csr16r(ctlr, Isr)) != 0 && isr != 0xFFFF){
+               csr16w(ctlr, Isr, isr);
+               if((isr & ctlr->imr) == 0)
+                       break;
+               if(isr & (Fovw|Punlc|Rdu|Rer|Rok)){
+                       rtl8169receive(edev);
+                       if(!(isr & (Punlc|Rok)))
+                               ctlr->ierrs++;
+                       if(isr & Rer)
+                               ctlr->rer++;
+                       if(isr & Rdu)
+                               ctlr->rdu++;
+                       if(isr & Punlc)
+                               ctlr->punlc++;
+                       if(isr & Fovw)
+                               ctlr->fovw++;
+                       isr &= ~(Fovw|Rdu|Rer|Rok);
+               }
+
+               if(isr & (Tdu|Ter|Tok)){
+                       rtl8169transmit(edev);
+                       isr &= ~(Tdu|Ter|Tok);
+               }
+
+               if(isr & Punlc){
+                       rtl8169link(edev);
+                       isr &= ~Punlc;
+               }
+
+               /*
+                * Some of the reserved bits get set sometimes...
+                */
+               if(isr & (Serr|Timeout|Tdu|Fovw|Punlc|Rdu|Ter|Tok|Rer|Rok))
+                       panic("rtl8169interrupt: imr %#4.4ux isr %#4.4ux\n",
+                               csr16r(ctlr, Imr), isr);
+       }
+}
+
+static void
+rtl8169pci(void)
+{
+       struct pci_device *pcidev;
+
+       struct ctlr *ctlr;
+       int id, port, pcie;
+
+       STAILQ_FOREACH(pcidev, &pci_devices, all_dev) {
+               /* This checks that pcidev is a Network Controller for Ethernet */
+               if (pcidev->class != 0x02 || pcidev->subclass != 0x00)
+                       continue;
+               id = pcidev->dev_id << 16 | pcidev->ven_id;
+
+               pcie = 0;
+               switch(id) {
+               default:
+                       continue;
+               case Rtl8100e:                  /* RTL810[01]E ? */
+               case Rtl8168b:                  /* RTL8168B */
+                       pcie = 1;
+                       break;
+               case Rtl8169c:                  /* RTL8169C */
+               case Rtl8169sc:                 /* RTL8169SC */
+               case Rtl8169:                   /* RTL8169 */
+                       break;
+               case (0xC107<<16)|0x1259:       /* Corega CG-LAPCIGT */
+                       id = Rtl8169;
+                       break;
+               }
+               printk("rtl8169 driver found 0x%04x:%04x at %02x:%02x.%x\n",
+                      pcidev->ven_id, pcidev->dev_id,
+                      pcidev->bus, pcidev->dev, pcidev->func);
+
+               port = pcidev->bar[0].pio_base;
+
+               ctlr = kzmalloc(sizeof(struct ctlr), KMALLOC_WAIT);
+               spinlock_init_irqsave(&ctlr->ilock);
+               spinlock_init_irqsave(&ctlr->tlock);
+               spinlock_init_irqsave(&ctlr->rlock);
+               qlock_init(&ctlr->alock);
+               qlock_init(&ctlr->slock);
+
+               ctlr->port = port;
+               ctlr->pci = pcidev;
+               ctlr->pciv = id;
+               ctlr->pcie = pcie;
+
+               /* pcipms is something related to power mgmt, i think */
+               #if 0
+               if(pcigetpms(p) > 0){
+                       pcisetpms(p, 0);
+
+                       for(int i = 0; i < 6; i++)
+                               pcicfgw32(p, PciBAR0+i*4, p->mem[i].bar);
+                       pcicfgw8(p, PciINTL, p->intl);
+                       pcicfgw8(p, PciLTR, p->ltr);
+                       pcicfgw8(p, PciCLS, p->cls);
+                       pcicfgw16(p, PciPCR, p->pcr);
+               }
+               #endif
+
+               if(rtl8169reset(ctlr)){
+                       kfree(ctlr);
+                       continue;
+               }
+
+               /*
+                * Extract the chip hardware version,
+                * needed to configure each properly.
+                */
+               ctlr->macv = csr32r(ctlr, Tcr) & HwveridMASK;
+               if((ctlr->mii = rtl8169mii(ctlr)) == NULL){
+                       kfree(ctlr);
+                       continue;
+               }
+
+               pci_set_bus_master(pcidev);
+
+               if(rtl8169ctlrhead != NULL)
+                       rtl8169ctlrtail->next = ctlr;
+               else
+                       rtl8169ctlrhead = ctlr;
+               rtl8169ctlrtail = ctlr;
+       }
+}
+
+static int
+rtl8169pnp(struct ether* edev)
+{
+       uint32_t r;
+       struct ctlr *ctlr;
+       uint8_t ea[Eaddrlen];
+
+       run_once(rtl8169pci());
+
+       /*
+        * Any adapter matches if no edev->port is supplied,
+        * otherwise the ports must match.
+        */
+       for(ctlr = rtl8169ctlrhead; ctlr != NULL; ctlr = ctlr->next){
+               if(ctlr->active)
+                       continue;
+               if(edev->port == 0 || edev->port == ctlr->port){
+                       ctlr->active = 1;
+                       break;
+               }
+       }
+       if(ctlr == NULL)
+               return -1;
+
+       edev->ctlr = ctlr;
+       edev->port = ctlr->port;
+       edev->irq = ctlr->pci->irqline;
+       edev->netif.mbps = 100;
+
+       /*
+        * Check if the adapter's station address is to be overridden.
+        * If not, read it from the device and set in edev->ea.
+        */
+       memset(ea, 0, Eaddrlen);
+       if(memcmp(ea, edev->ea, Eaddrlen) == 0){
+               r = csr32r(ctlr, Idr0);
+               edev->ea[0] = r;
+               edev->ea[1] = r>>8;
+               edev->ea[2] = r>>16;
+               edev->ea[3] = r>>24;
+               r = csr32r(ctlr, Idr0+4);
+               edev->ea[4] = r;
+               edev->ea[5] = r>>8;
+       }
+
+       edev->tbdf = MKBUS(BusPCI, ctlr->pci->bus, ctlr->pci->dev,
+                          ctlr->pci->func);
+       edev->attach = rtl8169attach;
+       edev->transmit = rtl8169transmit;
+       edev->interrupt = rtl8169interrupt;
+       edev->ifstat = rtl8169ifstat;
+
+       edev->netif.arg = edev;
+       edev->netif.promiscuous = rtl8169promiscuous;
+       edev->netif.multicast = rtl8169multicast;
+//     edev->netif.shutdown = rtl8169shutdown;
+
+       rtl8169link(edev);
+
+       return 0;
+}
+
+linker_func_3(ether8169link)
+{
+       addethercard("rtl8169", rtl8169pnp);
+}
diff --git a/kern/drivers/net/etherigbe.c b/kern/drivers/net/etherigbe.c
new file mode 100644 (file)
index 0000000..aa2c471
--- /dev/null
@@ -0,0 +1,2111 @@
+/* This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file. */
+
+/*
+ * Intel 8254[340]NN Gigabit Ethernet PCI Controllers
+ * as found on the Intel PRO/1000 series of adapters:
+ *     82543GC Intel PRO/1000 T
+ *     82544EI Intel PRO/1000 XT
+ *     82540EM Intel PRO/1000 MT
+ *     82541[GP]I
+ *     82547GI
+ *     82546GB
+ *     82546EB
+ * To Do:
+ *     finish autonegotiation code;
+ *     integrate fiber stuff back in (this ONLY handles
+ *     the CAT5 cards at the moment);
+ *     add checksum-offload;
+ *     add tuning control via ctl file;
+ *     this driver is little-endian specific.
+ *
+ * Modified by brho:
+ *     ported to Akaros
+ *     fixed mii bugs (allocation, startup, miirw, etc)
+ *     fixed CLS bug (continue -> break)
+ *     made sure igbepci only runs once, even if it fails */
+
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <cpio.h>
+#include <pmap.h>
+#include <smp.h>
+#include <arch/pci.h>
+#include <ip.h>
+#include <ns.h>
+#include "ethermii.h"
+
+#define ilock(x) spin_lock_irqsave(x)
+#define iunlock(x) spin_unlock_irqsave(x)
+
+enum {
+       i82542          = (0x1000<<16)|0x8086,
+       i82543gc        = (0x1004<<16)|0x8086,
+       i82544ei        = (0x1008<<16)|0x8086,
+       i82544eif       = (0x1009<<16)|0x8086,
+       i82544gc        = (0x100d<<16)|0x8086,
+       i82540em        = (0x100E<<16)|0x8086,
+       i82540eplp      = (0x101E<<16)|0x8086,
+       i82545em        = (0x100F<<16)|0x8086,
+       i82545gmc       = (0x1026<<16)|0x8086,
+       i82547ei        = (0x1019<<16)|0x8086,
+       i82547gi        = (0x1075<<16)|0x8086,
+       i82541ei        = (0x1013<<16)|0x8086,
+       i82541gi        = (0x1076<<16)|0x8086,
+       i82541gi2       = (0x1077<<16)|0x8086,
+       i82541pi        = (0x107c<<16)|0x8086,
+       i82546gb        = (0x1079<<16)|0x8086,
+       i82546eb        = (0x1010<<16)|0x8086,
+};
+
+enum {
+       Ctrl            = 0x00000000,   /* Device Control */
+       Ctrldup         = 0x00000004,   /* Device Control Duplicate */
+       Status          = 0x00000008,   /* Device Status */
+       Eecd            = 0x00000010,   /* EEPROM/Flash Control/Data */
+       Ctrlext         = 0x00000018,   /* Extended Device Control */
+       Mdic            = 0x00000020,   /* MDI Control */
+       Fcal            = 0x00000028,   /* Flow Control Address Low */
+       Fcah            = 0x0000002C,   /* Flow Control Address High */
+       Fct             = 0x00000030,   /* Flow Control Type */
+       Icr             = 0x000000C0,   /* Interrupt Cause Read */
+       Ics             = 0x000000C8,   /* Interrupt Cause Set */
+       Ims             = 0x000000D0,   /* Interrupt Mask Set/Read */
+       Imc             = 0x000000D8,   /* Interrupt mask Clear */
+       Rctl            = 0x00000100,   /* Receive Control */
+       Fcttv           = 0x00000170,   /* Flow Control Transmit Timer Value */
+       Txcw            = 0x00000178,   /* Transmit Configuration Word */
+       Rxcw            = 0x00000180,   /* Receive Configuration Word */
+       /* on the oldest cards (8254[23]), the Mta register is at 0x200 */
+       Tctl            = 0x00000400,   /* Transmit Control */
+       Tipg            = 0x00000410,   /* Transmit IPG */
+       Tbt             = 0x00000448,   /* Transmit Burst Timer */
+       Ait             = 0x00000458,   /* Adaptive IFS Throttle */
+       Fcrtl           = 0x00002160,   /* Flow Control RX Threshold Low */
+       Fcrth           = 0x00002168,   /* Flow Control Rx Threshold High */
+       Rdfh            = 0x00002410,   /* Receive data fifo head */
+       Rdft            = 0x00002418,   /* Receive data fifo tail */
+       Rdfhs           = 0x00002420,   /* Receive data fifo head saved */
+       Rdfts           = 0x00002428,   /* Receive data fifo tail saved */
+       Rdfpc           = 0x00002430,   /* Receive data fifo packet count */
+       Rdbal           = 0x00002800,   /* Rd Base Address Low */
+       Rdbah           = 0x00002804,   /* Rd Base Address High */
+       Rdlen           = 0x00002808,   /* Receive Descriptor Length */
+       Rdh             = 0x00002810,   /* Receive Descriptor Head */
+       Rdt             = 0x00002818,   /* Receive Descriptor Tail */
+       Rdtr            = 0x00002820,   /* Receive Descriptor Timer Ring */
+       Rxdctl          = 0x00002828,   /* Receive Descriptor Control */
+       Radv            = 0x0000282C,   /* Receive Interrupt Absolute Delay Timer */
+       Txdmac          = 0x00003000,   /* Transfer DMA Control */
+       Ett             = 0x00003008,   /* Early Transmit Control */
+       Tdfh            = 0x00003410,   /* Transmit data fifo head */
+       Tdft            = 0x00003418,   /* Transmit data fifo tail */
+       Tdfhs           = 0x00003420,   /* Transmit data Fifo Head saved */
+       Tdfts           = 0x00003428,   /* Transmit data fifo tail saved */
+       Tdfpc           = 0x00003430,   /* Trasnmit data Fifo packet count */
+       Tdbal           = 0x00003800,   /* Td Base Address Low */
+       Tdbah           = 0x00003804,   /* Td Base Address High */
+       Tdlen           = 0x00003808,   /* Transmit Descriptor Length */
+       Tdh             = 0x00003810,   /* Transmit Descriptor Head */
+       Tdt             = 0x00003818,   /* Transmit Descriptor Tail */
+       Tidv            = 0x00003820,   /* Transmit Interrupt Delay Value */
+       Txdctl          = 0x00003828,   /* Transmit Descriptor Control */
+       Tadv            = 0x0000382C,   /* Transmit Interrupt Absolute Delay Timer */
+
+       Statistics      = 0x00004000,   /* Start of Statistics Area */
+       Gorcl           = 0x88/4,       /* Good Octets Received Count */
+       Gotcl           = 0x90/4,       /* Good Octets Transmitted Count */
+       Torl            = 0xC0/4,       /* Total Octets Received */
+       Totl            = 0xC8/4,       /* Total Octets Transmitted */
+       Nstatistics     = 64,
+
+       Rxcsum          = 0x00005000,   /* Receive Checksum Control */
+       Mta             = 0x00005200,   /* Multicast Table Array */
+       Ral             = 0x00005400,   /* Receive Address Low */
+       Rah             = 0x00005404,   /* Receive Address High */
+       Manc            = 0x00005820,   /* Management Control */
+};
+
+enum {                                 /* Ctrl */
+       Bem             = 0x00000002,   /* Big Endian Mode */
+       Prior           = 0x00000004,   /* Priority on the PCI bus */
+       Lrst            = 0x00000008,   /* Link Reset */
+       Asde            = 0x00000020,   /* Auto-Speed Detection Enable */
+       Slu             = 0x00000040,   /* Set Link Up */
+       Ilos            = 0x00000080,   /* Invert Loss of Signal (LOS) */
+       SspeedMASK      = 0x00000300,   /* Speed Selection */
+       SspeedSHIFT     = 8,
+       Sspeed10        = 0x00000000,   /* 10Mb/s */
+       Sspeed100       = 0x00000100,   /* 100Mb/s */
+       Sspeed1000      = 0x00000200,   /* 1000Mb/s */
+       Frcspd          = 0x00000800,   /* Force Speed */
+       Frcdplx         = 0x00001000,   /* Force Duplex */
+       SwdpinsloMASK   = 0x003C0000,   /* Software Defined Pins - lo nibble */
+       SwdpinsloSHIFT  = 18,
+       SwdpioloMASK    = 0x03C00000,   /* Software Defined Pins - I or O */
+       SwdpioloSHIFT   = 22,
+       Devrst          = 0x04000000,   /* Device Reset */
+       Rfce            = 0x08000000,   /* Receive Flow Control Enable */
+       Tfce            = 0x10000000,   /* Transmit Flow Control Enable */
+       Vme             = 0x40000000,   /* VLAN Mode Enable */
+};
+
+/*
+ * can't find Tckok nor Rbcok in any Intel docs,
+ * but even 82543gc docs define Lanid.
+ */
+enum {                                 /* Status */
+       Lu              = 0x00000002,   /* Link Up */
+       Lanid           = 0x0000000C,   /* mask for Lan ID. (function id) */
+//     Tckok           = 0x00000004,   /* Transmit clock is running */
+//     Rbcok           = 0x00000008,   /* Receive clock is running */
+       Txoff           = 0x00000010,   /* Transmission Paused */
+       Tbimode         = 0x00000020,   /* TBI Mode Indication */
+       LspeedMASK      = 0x000000C0,   /* Link Speed Setting */
+       LspeedSHIFT     = 6,
+       Lspeed10        = 0x00000000,   /* 10Mb/s */
+       Lspeed100       = 0x00000040,   /* 100Mb/s */
+       Lspeed1000      = 0x00000080,   /* 1000Mb/s */
+       Mtxckok         = 0x00000400,   /* MTX clock is running */
+       Pci66           = 0x00000800,   /* PCI Bus speed indication */
+       Bus64           = 0x00001000,   /* PCI Bus width indication */
+       Pcixmode        = 0x00002000,   /* PCI-X mode */
+       PcixspeedMASK   = 0x0000C000,   /* PCI-X bus speed */
+       PcixspeedSHIFT  = 14,
+       Pcix66          = 0x00000000,   /* 50-66MHz */
+       Pcix100         = 0x00004000,   /* 66-100MHz */
+       Pcix133         = 0x00008000,   /* 100-133MHz */
+};
+
+enum {                                 /* Ctrl and Status */
+       Fd              = 0x00000001,   /* Full-Duplex */
+       AsdvMASK        = 0x00000300,
+       AsdvSHIFT       = 8,
+       Asdv10          = 0x00000000,   /* 10Mb/s */
+       Asdv100         = 0x00000100,   /* 100Mb/s */
+       Asdv1000        = 0x00000200,   /* 1000Mb/s */
+};
+
+enum {                                 /* Eecd */
+       Sk              = 0x00000001,   /* Clock input to the EEPROM */
+       Cs              = 0x00000002,   /* Chip Select */
+       Di              = 0x00000004,   /* Data Input to the EEPROM */
+       Do              = 0x00000008,   /* Data Output from the EEPROM */
+       Areq            = 0x00000040,   /* EEPROM Access Request */
+       Agnt            = 0x00000080,   /* EEPROM Access Grant */
+       Eepresent       = 0x00000100,   /* EEPROM Present */
+       Eesz256         = 0x00000200,   /* EEPROM is 256 words not 64 */
+       Eeszaddr        = 0x00000400,   /* EEPROM size for 8254[17] */
+       Spi             = 0x00002000,   /* EEPROM is SPI not Microwire */
+};
+
+enum {                                 /* Ctrlext */
+       Gpien           = 0x0000000F,   /* General Purpose Interrupt Enables */
+       SwdpinshiMASK   = 0x000000F0,   /* Software Defined Pins - hi nibble */
+       SwdpinshiSHIFT  = 4,
+       SwdpiohiMASK    = 0x00000F00,   /* Software Defined Pins - I or O */
+       SwdpiohiSHIFT   = 8,
+       Asdchk          = 0x00001000,   /* ASD Check */
+       Eerst           = 0x00002000,   /* EEPROM Reset */
+       Ips             = 0x00004000,   /* Invert Power State */
+       Spdbyps         = 0x00008000,   /* Speed Select Bypass */
+};
+
+enum {                                 /* EEPROM content offsets */
+       Ea              = 0x00,         /* Ethernet Address */
+       Cf              = 0x03,         /* Compatibility Field */
+       Pba             = 0x08,         /* Printed Board Assembly number */
+       Icw1            = 0x0A,         /* Initialization Control Word 1 */
+       Sid             = 0x0B,         /* Subsystem ID */
+       Svid            = 0x0C,         /* Subsystem Vendor ID */
+       Did             = 0x0D,         /* Device ID */
+       Vid             = 0x0E,         /* Vendor ID */
+       Icw2            = 0x0F,         /* Initialization Control Word 2 */
+};
+
+enum {                                 /* Mdic */
+       MDIdMASK        = 0x0000FFFF,   /* Data */
+       MDIdSHIFT       = 0,
+       MDIrMASK        = 0x001F0000,   /* PHY Register Address */
+       MDIrSHIFT       = 16,
+       MDIpMASK        = 0x03E00000,   /* PHY Address */
+       MDIpSHIFT       = 21,
+       MDIwop          = 0x04000000,   /* Write Operation */
+       MDIrop          = 0x08000000,   /* Read Operation */
+       MDIready        = 0x10000000,   /* End of Transaction */
+       MDIie           = 0x20000000,   /* Interrupt Enable */
+       MDIe            = 0x40000000,   /* Error */
+};
+
+enum {                                 /* Icr, Ics, Ims, Imc */
+       Txdw            = 0x00000001,   /* Transmit Descriptor Written Back */
+       Txqe            = 0x00000002,   /* Transmit Queue Empty */
+       Lsc             = 0x00000004,   /* Link Status Change */
+       Rxseq           = 0x00000008,   /* Receive Sequence Error */
+       Rxdmt0          = 0x00000010,   /* Rd Minimum Threshold Reached */
+       Rxo             = 0x00000040,   /* Receiver Overrun */
+       Rxt0            = 0x00000080,   /* Receiver Timer Interrupt */
+       Mdac            = 0x00000200,   /* MDIO Access Completed */
+       Rxcfg           = 0x00000400,   /* Receiving /C/ ordered sets */
+       Gpi0            = 0x00000800,   /* General Purpose Interrupts */
+       Gpi1            = 0x00001000,
+       Gpi2            = 0x00002000,
+       Gpi3            = 0x00004000,
+};
+
+/*
+ * The Mdic register isn't implemented on the 82543GC,
+ * the software defined pins are used instead.
+ * These definitions work for the Intel PRO/1000 T Server Adapter.
+ * The direction pin bits are read from the EEPROM.
+ */
+enum {
+       Mdd             = ((1<<2)<<SwdpinsloSHIFT),     /* data */
+       Mddo            = ((1<<2)<<SwdpioloSHIFT),      /* pin direction */
+       Mdc             = ((1<<3)<<SwdpinsloSHIFT),     /* clock */
+       Mdco            = ((1<<3)<<SwdpioloSHIFT),      /* pin direction */
+       Mdr             = ((1<<0)<<SwdpinshiSHIFT),     /* reset */
+       Mdro            = ((1<<0)<<SwdpiohiSHIFT),      /* pin direction */
+};
+
+enum {                                 /* Txcw */
+       TxcwFd          = 0x00000020,   /* Full Duplex */
+       TxcwHd          = 0x00000040,   /* Half Duplex */
+       TxcwPauseMASK   = 0x00000180,   /* Pause */
+       TxcwPauseSHIFT  = 7,
+       TxcwPs          = (1<<TxcwPauseSHIFT),  /* Pause Supported */
+       TxcwAs          = (2<<TxcwPauseSHIFT),  /* Asymmetric FC desired */
+       TxcwRfiMASK     = 0x00003000,   /* Remote Fault Indication */
+       TxcwRfiSHIFT    = 12,
+       TxcwNpr         = 0x00008000,   /* Next Page Request */
+       TxcwConfig      = 0x40000000,   /* Transmit COnfig Control */
+       TxcwAne         = 0x80000000,   /* Auto-Negotiation Enable */
+};
+
+enum {                                 /* Rxcw */
+       Rxword          = 0x0000FFFF,   /* Data from auto-negotiation process */
+       Rxnocarrier     = 0x04000000,   /* Carrier Sense indication */
+       Rxinvalid       = 0x08000000,   /* Invalid Symbol during configuration */
+       Rxchange        = 0x10000000,   /* Change to the Rxword indication */
+       Rxconfig        = 0x20000000,   /* /C/ order set reception indication */
+       Rxsync          = 0x40000000,   /* Lost bit synchronization indication */
+       Anc             = 0x80000000,   /* Auto Negotiation Complete */
+};
+
+enum {                                 /* Rctl */
+       Rrst            = 0x00000001,   /* Receiver Software Reset */
+       Ren             = 0x00000002,   /* Receiver Enable */
+       Sbp             = 0x00000004,   /* Store Bad Packets */
+       Upe             = 0x00000008,   /* Unicast Promiscuous Enable */
+       Mpe             = 0x00000010,   /* Multicast Promiscuous Enable */
+       Lpe             = 0x00000020,   /* Long Packet Reception Enable */
+       LbmMASK         = 0x000000C0,   /* Loopback Mode */
+       LbmOFF          = 0x00000000,   /* No Loopback */
+       LbmTBI          = 0x00000040,   /* TBI Loopback */
+       LbmMII          = 0x00000080,   /* GMII/MII Loopback */
+       LbmXCVR         = 0x000000C0,   /* Transceiver Loopback */
+       RdtmsMASK       = 0x00000300,   /* Rd Minimum Threshold Size */
+       RdtmsHALF       = 0x00000000,   /* Threshold is 1/2 Rdlen */
+       RdtmsQUARTER    = 0x00000100,   /* Threshold is 1/4 Rdlen */
+       RdtmsEIGHTH     = 0x00000200,   /* Threshold is 1/8 Rdlen */
+       MoMASK          = 0x00003000,   /* Multicast Offset */
+       Mo47b36         = 0x00000000,   /* bits [47:36] of received address */
+       Mo46b35         = 0x00001000,   /* bits [46:35] of received address */
+       Mo45b34         = 0x00002000,   /* bits [45:34] of received address */
+       Mo43b32         = 0x00003000,   /* bits [43:32] of received address */
+       Bam             = 0x00008000,   /* Broadcast Accept Mode */
+       BsizeMASK       = 0x00030000,   /* Receive Buffer Size */
+       Bsize2048       = 0x00000000,   /* Bsex = 0 */
+       Bsize1024       = 0x00010000,   /* Bsex = 0 */
+       Bsize512        = 0x00020000,   /* Bsex = 0 */
+       Bsize256        = 0x00030000,   /* Bsex = 0 */
+       Bsize16384      = 0x00010000,   /* Bsex = 1 */
+       Vfe             = 0x00040000,   /* VLAN Filter Enable */
+       Cfien           = 0x00080000,   /* Canonical Form Indicator Enable */
+       Cfi             = 0x00100000,   /* Canonical Form Indicator value */
+       Dpf             = 0x00400000,   /* Discard Pause Frames */
+       Pmcf            = 0x00800000,   /* Pass MAC Control Frames */
+       Bsex            = 0x02000000,   /* Buffer Size Extension */
+       Secrc           = 0x04000000,   /* Strip CRC from incoming packet */
+};
+
+enum {                                 /* Tctl */
+       Trst            = 0x00000001,   /* Transmitter Software Reset */
+       Ten             = 0x00000002,   /* Transmit Enable */
+       Psp             = 0x00000008,   /* Pad Short Packets */
+       CtMASK          = 0x00000FF0,   /* Collision Threshold */
+       CtSHIFT         = 4,
+       ColdMASK        = 0x003FF000,   /* Collision Distance */
+       ColdSHIFT       = 12,
+       Swxoff          = 0x00400000,   /* Sofware XOFF Transmission */
+       Pbe             = 0x00800000,   /* Packet Burst Enable */
+       Rtlc            = 0x01000000,   /* Re-transmit on Late Collision */
+       Nrtu            = 0x02000000,   /* No Re-transmit on Underrrun */
+};
+
+enum {                                 /* [RT]xdctl */
+       PthreshMASK     = 0x0000003F,   /* Prefetch Threshold */
+       PthreshSHIFT    = 0,
+       HthreshMASK     = 0x00003F00,   /* Host Threshold */
+       HthreshSHIFT    = 8,
+       WthreshMASK     = 0x003F0000,   /* Writeback Threshold */
+       WthreshSHIFT    = 16,
+       Gran            = 0x01000000,   /* Granularity */
+       LthreshMASK     = 0xFE000000,   /* Low Threshold */
+       LthreshSHIFT    = 25,
+};
+
+enum {                                 /* Rxcsum */
+       PcssMASK        = 0x000000FF,   /* Packet Checksum Start */
+       PcssSHIFT       = 0,
+       Ipofl           = 0x00000100,   /* IP Checksum Off-load Enable */
+       Tuofl           = 0x00000200,   /* TCP/UDP Checksum Off-load Enable */
+};
+
+enum {                                 /* Manc */
+       Arpen           = 0x00002000,   /* Enable ARP Request Filtering */
+};
+
+enum {                                 /* Receive Delay Timer Ring */
+       DelayMASK       = 0x0000FFFF,   /* delay timer in 1.024nS increments */
+       DelaySHIFT      = 0,
+       Fpd             = 0x80000000,   /* Flush partial Descriptor Block */
+};
+
+typedef struct Rd {                    /* Receive Descriptor */
+       unsigned int    addr[2];
+       uint16_t        length;
+       uint16_t        checksum;
+       uint8_t status;
+       uint8_t errors;
+       uint16_t        special;
+} Rd;
+
+enum {                                 /* Rd status */
+       Rdd             = 0x01,         /* Descriptor Done */
+       Reop            = 0x02,         /* End of Packet */
+       Ixsm            = 0x04,         /* Ignore Checksum Indication */
+       Vp              = 0x08,         /* Packet is 802.1Q (matched VET) */
+       Tcpcs           = 0x20,         /* TCP Checksum Calculated on Packet */
+       Ipcs            = 0x40,         /* IP Checksum Calculated on Packet */
+       Pif             = 0x80,         /* Passed in-exact filter */
+};
+
+enum {                                 /* Rd errors */
+       Ce              = 0x01,         /* CRC Error or Alignment Error */
+       Se              = 0x02,         /* Symbol Error */
+       Seq             = 0x04,         /* Sequence Error */
+       Cxe             = 0x10,         /* Carrier Extension Error */
+       Tcpe            = 0x20,         /* TCP/UDP Checksum Error */
+       Ipe             = 0x40,         /* IP Checksum Error */
+       Rxe             = 0x80,         /* RX Data Error */
+};
+
+typedef struct Td Td;
+struct Td {                            /* Transmit Descriptor */
+       union {
+               unsigned int    addr[2];        /* Data */
+               struct {                /* Context */
+                       uint8_t ipcss;
+                       uint8_t ipcso;
+                       uint16_t        ipcse;
+                       uint8_t tucss;
+                       uint8_t tucso;
+                       uint16_t        tucse;
+               };
+       };
+       unsigned int    control;
+       unsigned int    status;
+};
+
+enum {                                 /* Td control */
+       LenMASK         = 0x000FFFFF,   /* Data/Packet Length Field */
+       LenSHIFT        = 0,
+       DtypeCD         = 0x00000000,   /* Data Type 'Context Descriptor' */
+       DtypeDD         = 0x00100000,   /* Data Type 'Data Descriptor' */
+       PtypeTCP        = 0x01000000,   /* TCP/UDP Packet Type (CD) */
+       Teop            = 0x01000000,   /* End of Packet (DD) */
+       PtypeIP         = 0x02000000,   /* IP Packet Type (CD) */
+       Ifcs            = 0x02000000,   /* Insert FCS (DD) */
+       Tse             = 0x04000000,   /* TCP Segmentation Enable */
+       Rs              = 0x08000000,   /* Report Status */
+       Rps             = 0x10000000,   /* Report Status Sent */
+       Dext            = 0x20000000,   /* Descriptor Extension */
+       Vle             = 0x40000000,   /* VLAN Packet Enable */
+       Ide             = 0x80000000,   /* Interrupt Delay Enable */
+};
+
+enum {                                 /* Td status */
+       Tdd             = 0x00000001,   /* Descriptor Done */
+       Ec              = 0x00000002,   /* Excess Collisions */
+       Lc              = 0x00000004,   /* Late Collision */
+       Tu              = 0x00000008,   /* Transmit Underrun */
+       Iixsm           = 0x00000100,   /* Insert IP Checksum */
+       Itxsm           = 0x00000200,   /* Insert TCP/UDP Checksum */
+       HdrlenMASK      = 0x0000FF00,   /* Header Length (Tse) */
+       HdrlenSHIFT     = 8,
+       VlanMASK        = 0x0FFF0000,   /* VLAN Identifier */
+       VlanSHIFT       = 16,
+       Tcfi            = 0x10000000,   /* Canonical Form Indicator */
+       PriMASK         = 0xE0000000,   /* User Priority */
+       PriSHIFT        = 29,
+       MssMASK         = 0xFFFF0000,   /* Maximum Segment Size (Tse) */
+       MssSHIFT        = 16,
+};
+
+enum {
+       Nrd             = 256,          /* multiple of 8 */
+       Ntd             = 64,           /* multiple of 8 */
+       Nrb             = 1024,         /* private receive buffers per Ctlr */
+       Rbsz            = 2048,
+};
+
+struct ctlr {
+       int     port;
+       struct pci_device *pci;
+       struct ctlr*    next;
+       struct ether*   edev;
+       int     active;
+       int     started;
+       int     id;
+       int     cls;
+       uint16_t        eeprom[0x40];
+
+       qlock_t alock;                  /* attach */
+       void*   alloc;                  /* receive/transmit descriptors */
+       int     nrd;
+       int     ntd;
+       int     nrb;                    /* how many this Ctlr has in the pool */
+
+       int*    nic;
+       spinlock_t      imlock;
+       int     im;                     /* interrupt mask */
+
+       struct mii*     mii;
+       struct rendez   lrendez;
+       int     lim;
+
+       int     link;
+
+       qlock_t slock;
+       unsigned int    statistics[Nstatistics];
+       unsigned int    lsleep;
+       unsigned int    lintr;
+       unsigned int    rsleep;
+       unsigned int    rintr;
+       unsigned int    txdw;
+       unsigned int    tintr;
+       unsigned int    ixsm;
+       unsigned int    ipcs;
+       unsigned int    tcpcs;
+
+       uint8_t ra[Eaddrlen];           /* receive address */
+       uint32_t        mta[128];               /* multicast table array */
+
+       struct rendez   rrendez;
+       int     rim;
+       int     rdfree;
+       Rd*     rdba;                   /* receive descriptor base address */
+       struct block**  rb;                     /* receive buffers */
+       int     rdh;                    /* receive descriptor head */
+       int     rdt;                    /* receive descriptor tail */
+       int     rdtr;                   /* receive delay timer ring value */
+
+       spinlock_t      tlock;
+       int     tbusy;
+       int     tdfree;
+       Td*     tdba;                   /* transmit descriptor base address */
+       struct block**  tb;                     /* transmit buffers */
+       int     tdh;                    /* transmit descriptor head */
+       int     tdt;                    /* transmit descriptor tail */
+
+       int     txcw;
+       int     fcrtl;
+       int     fcrth;
+};
+
+#define csr32r(c, r)   (*((c)->nic+((r)/4)))
+#define csr32w(c, r, v)        (*((c)->nic+((r)/4)) = (v))
+
+static struct ctlr* igbectlrhead;
+static struct ctlr* igbectlrtail;
+
+/* lock for igberpool (free receive Blocks) */
+static spinlock_t igberblock = SPINLOCK_INITIALIZER_IRQSAVE;
+static struct block* igberbpool;       /* receive Blocks for all igbe controllers */
+
+static char* statistics[Nstatistics] = {
+       "CRC Error",
+       "Alignment Error",
+       "Symbol Error",
+       "RX Error",
+       "Missed Packets",
+       "Single Collision",
+       "Excessive Collisions",
+       "Multiple Collision",
+       "Late Collisions",
+       NULL,
+       "Collision",
+       "Transmit Underrun",
+       "Defer",
+       "Transmit - No CRS",
+       "Sequence Error",
+       "Carrier Extension Error",
+       "Receive Error Length",
+       NULL,
+       "XON Received",
+       "XON Transmitted",
+       "XOFF Received",
+       "XOFF Transmitted",
+       "FC Received Unsupported",
+       "Packets Received (64 Bytes)",
+       "Packets Received (65-127 Bytes)",
+       "Packets Received (128-255 Bytes)",
+       "Packets Received (256-511 Bytes)",
+       "Packets Received (512-1023 Bytes)",
+       "Packets Received (1024-1522 Bytes)",
+       "Good Packets Received",
+       "Broadcast Packets Received",
+       "Multicast Packets Received",
+       "Good Packets Transmitted",
+       NULL,
+       "Good Octets Received",
+       NULL,
+       "Good Octets Transmitted",
+       NULL,
+       NULL,
+       NULL,
+       "Receive No Buffers",
+       "Receive Undersize",
+       "Receive Fragment",
+       "Receive Oversize",
+       "Receive Jabber",
+       NULL,
+       NULL,
+       NULL,
+       "Total Octets Received",
+       NULL,
+       "Total Octets Transmitted",
+       NULL,
+       "Total Packets Received",
+       "Total Packets Transmitted",
+       "Packets Transmitted (64 Bytes)",
+       "Packets Transmitted (65-127 Bytes)",
+       "Packets Transmitted (128-255 Bytes)",
+       "Packets Transmitted (256-511 Bytes)",
+       "Packets Transmitted (512-1023 Bytes)",
+       "Packets Transmitted (1024-1522 Bytes)",
+       "Multicast Packets Transmitted",
+       "Broadcast Packets Transmitted",
+       "TCP Segmentation Context Transmitted",
+       "TCP Segmentation Context Fail",
+};
+
+static long
+igbeifstat(struct ether* edev, void* a, long n, uint32_t offset)
+{
+       struct ctlr *ctlr;
+       char *p, *s;
+       int i, l, r;
+       uint64_t tuvl, ruvl;
+
+       ctlr = edev->ctlr;
+       qlock(&ctlr->slock);
+       p = kzmalloc(READSTR, 0);
+       if(p == NULL) {
+               qunlock(&ctlr->slock);
+               error(Enomem);
+       }
+       l = 0;
+       for(i = 0; i < Nstatistics; i++){
+               r = csr32r(ctlr, Statistics+i*4);
+               if((s = statistics[i]) == NULL)
+                       continue;
+               switch(i){
+               case Gorcl:
+               case Gotcl:
+               case Torl:
+               case Totl:
+                       ruvl = r;
+                       ruvl += ((uint64_t)csr32r(ctlr, Statistics+(i+1)*4))<<32;
+                       tuvl = ruvl;
+                       tuvl += ctlr->statistics[i];
+                       tuvl += ((uint64_t)ctlr->statistics[i+1])<<32;
+                       if(tuvl == 0)
+                               continue;
+                       ctlr->statistics[i] = tuvl;
+                       ctlr->statistics[i+1] = tuvl>>32;
+                       l += snprintf(p+l, READSTR-l, "%s: %llud %llud\n",
+                               s, tuvl, ruvl);
+                       i++;
+                       break;
+
+               default:
+                       ctlr->statistics[i] += r;
+                       if(ctlr->statistics[i] == 0)
+                               continue;
+                       l += snprintf(p+l, READSTR-l, "%s: %ud %ud\n",
+                               s, ctlr->statistics[i], r);
+                       break;
+               }
+       }
+
+       l += snprintf(p+l, READSTR-l, "lintr: %ud %ud\n",
+               ctlr->lintr, ctlr->lsleep);
+       l += snprintf(p+l, READSTR-l, "rintr: %ud %ud\n",
+               ctlr->rintr, ctlr->rsleep);
+       l += snprintf(p+l, READSTR-l, "tintr: %ud %ud\n",
+               ctlr->tintr, ctlr->txdw);
+       l += snprintf(p+l, READSTR-l, "ixcs: %ud %ud %ud\n",
+               ctlr->ixsm, ctlr->ipcs, ctlr->tcpcs);
+       l += snprintf(p+l, READSTR-l, "rdtr: %ud\n", ctlr->rdtr);
+       l += snprintf(p+l, READSTR-l, "Ctrlext: %08x\n", csr32r(ctlr, Ctrlext));
+
+       l += snprintf(p+l, READSTR-l, "eeprom:");
+       for(i = 0; i < 0x40; i++){
+               if(i && ((i & 0x07) == 0))
+                       l += snprintf(p+l, READSTR-l, "\n       ");
+               l += snprintf(p+l, READSTR-l, " %4.4uX", ctlr->eeprom[i]);
+       }
+       l += snprintf(p+l, READSTR-l, "\n");
+
+       if(ctlr->mii != NULL && ctlr->mii->curphy != NULL){
+               l += snprintf(p+l, READSTR-l, "phy:   ");
+               for(i = 0; i < NMiiPhyr; i++){
+                       if(i && ((i & 0x07) == 0))
+                               l += snprintf(p+l, READSTR-l, "\n       ");
+                       r = miimir(ctlr->mii, i);
+                       l += snprintf(p+l, READSTR-l, " %4.4uX", r);
+               }
+               snprintf(p+l, READSTR-l, "\n");
+       }
+       n = readstr(offset, a, n, p);
+       kfree(p);
+       qunlock(&ctlr->slock);
+
+       return n;
+}
+
+enum {
+       CMrdtr,
+};
+
+static struct cmdtab igbectlmsg[] = {
+       {CMrdtr,        "rdtr", 2},
+};
+
+static long
+igbectl(struct ether* edev, void* buf, long n)
+{
+       ERRSTACK(2);
+       int v;
+       char *p;
+       struct ctlr *ctlr;
+       struct cmdbuf *cb;
+       struct cmdtab *ct;
+
+       if((ctlr = edev->ctlr) == NULL)
+               error(Enonexist);
+
+       cb = parsecmd(buf, n);
+       if(waserror()){
+               kfree(cb);
+               nexterror();
+       }
+
+       ct = lookupcmd(cb, igbectlmsg, ARRAY_SIZE(igbectlmsg));
+       switch(ct->index){
+       case CMrdtr:
+               v = strtol(cb->f[1], &p, 0);
+               if(v < 0 || p == cb->f[1] || v > 0xFFFF)
+                       error(Ebadarg);
+               ctlr->rdtr = v;
+               csr32w(ctlr, Rdtr, Fpd|v);
+               break;
+       }
+       kfree(cb);
+       poperror();
+
+       return n;
+}
+
+static void
+igbepromiscuous(void* arg, int on)
+{
+       int rctl;
+       struct ctlr *ctlr;
+       struct ether *edev;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+
+       rctl = csr32r(ctlr, Rctl);
+       rctl &= ~MoMASK;
+       rctl |= Mo47b36;
+       if(on)
+               rctl |= Upe|Mpe;
+       else
+               rctl &= ~(Upe|Mpe);
+       csr32w(ctlr, Rctl, rctl|Mpe);   /* temporarily keep Mpe on */
+}
+
+static void
+igbemulticast(void* arg, uint8_t* addr, int add)
+{
+       int bit, x;
+       struct ctlr *ctlr;
+       struct ether *edev;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+
+       x = addr[5]>>1;
+       bit = ((addr[5] & 1)<<4)|(addr[4]>>4);
+       /*
+        * multiple ether addresses can hash to the same filter bit,
+        * so it's never safe to clear a filter bit.
+        * if we want to clear filter bits, we need to keep track of
+        * all the multicast addresses in use, clear all the filter bits,
+        * then set the ones corresponding to in-use addresses.
+        */
+       if(add)
+               ctlr->mta[x] |= 1<<bit;
+//     else
+//             ctlr->mta[x] &= ~(1<<bit);
+
+       csr32w(ctlr, Mta+x*4, ctlr->mta[x]);
+}
+
+static struct block*
+igberballoc(void)
+{
+       struct block *bp;
+
+       ilock(&igberblock);
+       if((bp = igberbpool) != NULL){
+               igberbpool = bp->next;
+               bp->next = NULL;
+               /* _xinc(&bp->ref);     prevent bp from being freed */
+       }
+       iunlock(&igberblock);
+
+       return bp;
+}
+
+static void
+igberbfree(struct block* bp)
+{
+       bp->rp = bp->lim - Rbsz;
+       bp->wp = bp->rp;
+       bp->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
+
+       ilock(&igberblock);
+       bp->next = igberbpool;
+       igberbpool = bp;
+       iunlock(&igberblock);
+}
+
+static void
+igbeim(struct ctlr* ctlr, int im)
+{
+       ilock(&ctlr->imlock);
+       ctlr->im |= im;
+       csr32w(ctlr, Ims, ctlr->im);
+       iunlock(&ctlr->imlock);
+}
+
+static int
+igbelim(void* ctlr)
+{
+       return ((struct ctlr*)ctlr)->lim != 0;
+}
+
+static void
+igbelproc(void* arg)
+{
+       struct ctlr *ctlr;
+       struct ether *edev;
+       struct miiphy *phy;
+       int ctrl, r;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+       for(;;){
+               /* plan9 originally had a busy loop here (just called continue).  though
+                * either you have the mii or you don't.  i don't think it'll magically
+                * show up later (it should have been initialized during pnp/pci, which
+                * is before attach, which is before lproc).  -brho */
+               if (ctlr->mii == NULL || ctlr->mii->curphy == NULL) {
+                       printk("[kernel] igbelproc can't find a mii/curphy, aborting!\n");
+                       /* name alloc'd in attach */
+                       kfree(per_cpu_info[core_id()].cur_kthread->name);
+                       return;
+               }
+               /*
+                * To do:
+                *      logic to manage status change,
+                *      this is incomplete but should work
+                *      one time to set up the hardware.
+                *
+                *      MiiPhy.speed, etc. should be in Mii.
+                */
+               if(miistatus(ctlr->mii) < 0)
+                       //continue;     /* this comment out was plan9, not brho */
+                       goto enable;
+
+               phy = ctlr->mii->curphy;
+               ctrl = csr32r(ctlr, Ctrl);
+
+               switch(ctlr->id){
+               case i82543gc:
+               case i82544ei:
+               case i82544eif:
+               default:
+                       if(!(ctrl & Asde)){
+                               ctrl &= ~(SspeedMASK|Ilos|Fd);
+                               ctrl |= Frcdplx|Frcspd;
+                               if(phy->speed == 1000)
+                                       ctrl |= Sspeed1000;
+                               else if(phy->speed == 100)
+                                       ctrl |= Sspeed100;
+                               if(phy->fd)
+                                       ctrl |= Fd;
+                       }
+                       break;
+
+               case i82540em:
+               case i82540eplp:
+               case i82547gi:
+               case i82541gi:
+               case i82541gi2:
+               case i82541pi:
+                       break;
+               }
+
+               /*
+                * Collision Distance.
+                */
+               r = csr32r(ctlr, Tctl);
+               r &= ~ColdMASK;
+               if(phy->fd)
+                       r |= 64<<ColdSHIFT;
+               else
+                       r |= 512<<ColdSHIFT;
+               csr32w(ctlr, Tctl, r);
+
+               /*
+                * Flow control.
+                */
+               if(phy->rfc)
+                       ctrl |= Rfce;
+               if(phy->tfc)
+                       ctrl |= Tfce;
+               csr32w(ctlr, Ctrl, ctrl);
+
+enable:
+               ctlr->lim = 0;
+               igbeim(ctlr, Lsc);
+
+               ctlr->lsleep++;
+               rendez_sleep(&ctlr->lrendez, igbelim, ctlr);
+       }
+}
+
+static void
+igbetxinit(struct ctlr* ctlr)
+{
+       int i, r;
+       struct block *bp;
+
+       csr32w(ctlr, Tctl, (0x0F<<CtSHIFT)|Psp|(66<<ColdSHIFT));
+       switch(ctlr->id){
+       default:
+               r = 6;
+               break;
+       case i82543gc:
+       case i82544ei:
+       case i82544eif:
+       case i82544gc:
+       case i82540em:
+       case i82540eplp:
+       case i82541ei:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+       case i82547ei:
+       case i82547gi:
+               r = 8;
+               break;
+       }
+       csr32w(ctlr, Tipg, (6<<20)|(8<<10)|r);
+       csr32w(ctlr, Ait, 0);
+       csr32w(ctlr, Txdmac, 0);
+       csr32w(ctlr, Tdbal, paddr_low32(ctlr->tdba));
+       csr32w(ctlr, Tdbah, paddr_high32(ctlr->tdba));
+       csr32w(ctlr, Tdlen, ctlr->ntd*sizeof(Td));
+       ctlr->tdh = PREV_RING(0, ctlr->ntd);
+       csr32w(ctlr, Tdh, 0);
+       ctlr->tdt = 0;
+       csr32w(ctlr, Tdt, 0);
+
+       for(i = 0; i < ctlr->ntd; i++){
+               if((bp = ctlr->tb[i]) != NULL){
+                       ctlr->tb[i] = NULL;
+                       freeb(bp);
+               }
+               memset(&ctlr->tdba[i], 0, sizeof(Td));
+       }
+       ctlr->tdfree = ctlr->ntd;
+
+       csr32w(ctlr, Tidv, 128);
+       r = (4<<WthreshSHIFT)|(4<<HthreshSHIFT)|(8<<PthreshSHIFT);
+
+       switch(ctlr->id){
+       default:
+               break;
+       case i82540em:
+       case i82540eplp:
+       case i82547gi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+               r = csr32r(ctlr, Txdctl);
+               r &= ~WthreshMASK;
+               r |= Gran|(4<<WthreshSHIFT);
+
+               csr32w(ctlr, Tadv, 64);
+               break;
+       }
+
+       csr32w(ctlr, Txdctl, r);
+
+       r = csr32r(ctlr, Tctl);
+       r |= Ten;
+       csr32w(ctlr, Tctl, r);
+}
+
+static void
+igbetransmit(struct ether* edev)
+{
+       Td *td;
+       struct block *bp;
+       struct ctlr *ctlr;
+       int tdh, tdt;
+
+       ctlr = edev->ctlr;
+
+       ilock(&ctlr->tlock);
+
+       /*
+        * Free any completed packets
+        */
+       tdh = ctlr->tdh;
+       while(NEXT_RING(tdh, ctlr->ntd) != csr32r(ctlr, Tdh)){
+               if((bp = ctlr->tb[tdh]) != NULL){
+                       ctlr->tb[tdh] = NULL;
+                       freeb(bp);
+               }
+               memset(&ctlr->tdba[tdh], 0, sizeof(Td));
+               tdh = NEXT_RING(tdh, ctlr->ntd);
+       }
+       ctlr->tdh = tdh;
+
+       /*
+        * Try to fill the ring back up.
+        */
+       tdt = ctlr->tdt;
+       while(NEXT_RING(tdt, ctlr->ntd) != tdh){
+               if((bp = qget(edev->oq)) == NULL)
+                       break;
+               td = &ctlr->tdba[tdt];
+               td->addr[0] = paddr_low32(bp->rp);
+               td->addr[1] = paddr_high32(bp->rp);
+               td->control = ((BLEN(bp) & LenMASK)<<LenSHIFT);
+               td->control |= Dext|Ifcs|Teop|DtypeDD;
+               ctlr->tb[tdt] = bp;
+               tdt = NEXT_RING(tdt, ctlr->ntd);
+               if(NEXT_RING(tdt, ctlr->ntd) == tdh){
+                       td->control |= Rs;
+                       ctlr->txdw++;
+                       ctlr->tdt = tdt;
+                       csr32w(ctlr, Tdt, tdt);
+                       igbeim(ctlr, Txdw);
+                       break;
+               }
+               ctlr->tdt = tdt;
+               csr32w(ctlr, Tdt, tdt);
+       }
+
+       iunlock(&ctlr->tlock);
+}
+
+static void
+igbereplenish(struct ctlr* ctlr)
+{
+       Rd *rd;
+       int rdt;
+       struct block *bp;
+
+       rdt = ctlr->rdt;
+       while(NEXT_RING(rdt, ctlr->nrd) != ctlr->rdh){
+               rd = &ctlr->rdba[rdt];
+               if(ctlr->rb[rdt] == NULL){
+                       bp = igberballoc();
+                       if(bp == NULL){
+                               /* needs to be a safe print for interrupt level */
+                               printk("#l%d: igbereplenish: no available buffers\n",
+                                       ctlr->edev->ctlrno);
+                               break;
+                       }
+                       ctlr->rb[rdt] = bp;
+                       rd->addr[0] = paddr_low32(bp->rp);
+                       rd->addr[1] = paddr_high32(bp->rp);
+               }
+               wmb();  /* ensure prev rd writes come before status = 0. */
+               rd->status = 0;
+               rdt = NEXT_RING(rdt, ctlr->nrd);
+               ctlr->rdfree++;
+       }
+       ctlr->rdt = rdt;
+       csr32w(ctlr, Rdt, rdt);
+}
+
+static void
+igberxinit(struct ctlr* ctlr)
+{
+       int i;
+       struct block *bp;
+
+       /* temporarily keep Mpe on */
+       csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF|Mpe);
+       csr32w(ctlr, Rdbal, paddr_low32(ctlr->rdba));
+       csr32w(ctlr, Rdbah, paddr_high32(ctlr->rdba));
+       csr32w(ctlr, Rdlen, ctlr->nrd*sizeof(Rd));
+       ctlr->rdh = 0;
+       csr32w(ctlr, Rdh, 0);
+       ctlr->rdt = 0;
+       csr32w(ctlr, Rdt, 0);
+       ctlr->rdtr = 0;
+       csr32w(ctlr, Rdtr, Fpd|0);
+
+       for(i = 0; i < ctlr->nrd; i++){
+               if((bp = ctlr->rb[i]) != NULL){
+                       ctlr->rb[i] = NULL;
+                       freeb(bp);
+               }
+       }
+       igbereplenish(ctlr);
+
+       switch(ctlr->id){
+       case i82540em:
+       case i82540eplp:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+       case i82547gi:
+               csr32w(ctlr, Radv, 64);
+               break;
+       }
+       csr32w(ctlr, Rxdctl, (8<<WthreshSHIFT)|(8<<HthreshSHIFT)|4);
+
+       /*
+        * Enable checksum offload.
+        */
+       csr32w(ctlr, Rxcsum, Tuofl|Ipofl|(ETHERHDRSIZE<<PcssSHIFT));
+}
+
+static int
+igberim(void* ctlr)
+{
+       return ((struct ctlr*)ctlr)->rim != 0;
+}
+
+static void
+igberproc(void* arg)
+{
+       Rd *rd;
+       struct block *bp;
+       struct ctlr *ctlr;
+       int r, rdh;
+       struct ether *edev;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+
+       igberxinit(ctlr);
+       r = csr32r(ctlr, Rctl);
+       r |= Ren;
+       csr32w(ctlr, Rctl, r);
+
+       for(;;){
+               ctlr->rim = 0;
+               igbeim(ctlr, Rxt0|Rxo|Rxdmt0|Rxseq);
+               ctlr->rsleep++;
+               rendez_sleep(&ctlr->rrendez, igberim, ctlr);
+
+               rdh = ctlr->rdh;
+               for(;;){
+                       rd = &ctlr->rdba[rdh];
+
+                       if(!(rd->status & Rdd))
+                               break;
+
+                       /*
+                        * Accept eop packets with no errors.
+                        * With no errors and the Ixsm bit set,
+                        * the descriptor status Tpcs and Ipcs bits give
+                        * an indication of whether the checksums were
+                        * calculated and valid.
+                        */
+                       if((rd->status & Reop) && rd->errors == 0){
+                               bp = ctlr->rb[rdh];
+                               ctlr->rb[rdh] = NULL;
+                               bp->wp += rd->length;
+                               bp->next = NULL;
+                               if(!(rd->status & Ixsm)){
+                                       ctlr->ixsm++;
+                                       if(rd->status & Ipcs){
+                                               /*
+                                                * IP checksum calculated
+                                                * (and valid as errors == 0).
+                                                */
+                                               ctlr->ipcs++;
+                                               bp->flag |= Bipck;
+                                       }
+                                       if(rd->status & Tcpcs){
+                                               /*
+                                                * TCP/UDP checksum calculated
+                                                * (and valid as errors == 0).
+                                                */
+                                               ctlr->tcpcs++;
+                                               bp->flag |= Btcpck|Budpck;
+                                       }
+                                       bp->checksum = rd->checksum;
+                                       bp->flag |= Bpktck;
+                               }
+                               etheriq(edev, bp, 1);
+                       }
+                       else if(ctlr->rb[rdh] != NULL){
+                               freeb(ctlr->rb[rdh]);
+                               ctlr->rb[rdh] = NULL;
+                       }
+
+                       memset(rd, 0, sizeof(Rd));
+                       wmb();  /* make sure the zeroing happens before free (i think) */
+                       ctlr->rdfree--;
+                       rdh = NEXT_RING(rdh, ctlr->nrd);
+               }
+               ctlr->rdh = rdh;
+
+               if(ctlr->rdfree < ctlr->nrd/2 || (ctlr->rim & Rxdmt0))
+                       igbereplenish(ctlr);
+       }
+}
+
+static void
+igbeattach(struct ether* edev)
+{
+       ERRSTACK(1);
+       struct block *bp;
+       struct ctlr *ctlr;
+       char *name;
+
+       ctlr = edev->ctlr;
+       ctlr->edev = edev;                      /* point back to Ether* */
+       qlock(&ctlr->alock);
+       if(ctlr->alloc != NULL){                        /* already allocated? */
+               qunlock(&ctlr->alock);
+               return;
+       }
+
+       ctlr->tb = NULL;
+       ctlr->rb = NULL;
+       ctlr->alloc = NULL;
+       ctlr->nrb = 0;
+       if(waserror()){
+               while(ctlr->nrb > 0){
+                       bp = igberballoc();
+                       bp->free = NULL;
+                       freeb(bp);
+                       ctlr->nrb--;
+               }
+               kfree(ctlr->tb);
+               ctlr->tb = NULL;
+               kfree(ctlr->rb);
+               ctlr->rb = NULL;
+               kfree(ctlr->alloc);
+               ctlr->alloc = NULL;
+               qunlock(&ctlr->alock);
+               nexterror();
+       }
+
+       ctlr->nrd = Nrd;
+       ctlr->ntd = Ntd;
+       ctlr->alloc = kzmalloc(ctlr->nrd * sizeof(Rd) + ctlr->ntd * sizeof(Td) + 127, 0);
+       if(ctlr->alloc == NULL) {
+               printd("igbe: can't allocate ctlr->alloc\n");
+               error(Enomem);
+       }
+       ctlr->rdba = (Rd*)ROUNDUP((uintptr_t)ctlr->alloc, 128);
+       ctlr->tdba = (Td*)(ctlr->rdba+ctlr->nrd);
+
+       ctlr->rb = kzmalloc(ctlr->nrd * sizeof(struct block *), 0);
+       ctlr->tb = kzmalloc(ctlr->ntd * sizeof(struct block *), 0);
+       if (ctlr->rb == NULL || ctlr->tb == NULL) {
+               printd("igbe: can't allocate ctlr->rb or ctlr->tb\n");
+               error(Enomem);
+       }
+
+       for(ctlr->nrb = 0; ctlr->nrb < Nrb; ctlr->nrb++){
+               if((bp = allocb(Rbsz)) == NULL)
+                       break;
+               bp->free = igberbfree;
+               freeb(bp);
+       }
+
+       /* the ktasks should free these names, if they ever exit */
+       name = kmalloc(KNAMELEN, KMALLOC_WAIT);
+       snprintf(name, KNAMELEN, "#l%dlproc", edev->ctlrno);
+       ktask(name, igbelproc, edev);
+
+       name = kmalloc(KNAMELEN, KMALLOC_WAIT);
+       snprintf(name, KNAMELEN, "#l%drproc", edev->ctlrno);
+       ktask(name, igberproc, edev);
+
+       igbetxinit(ctlr);
+
+       qunlock(&ctlr->alock);
+       poperror();
+}
+
+static void igbeinterrupt(struct hw_trapframe *hw_tf, void *arg)
+{
+       struct ctlr *ctlr;
+       struct ether *edev;
+       int icr, im, txdw;
+
+       edev = arg;
+       ctlr = edev->ctlr;
+
+       ilock(&ctlr->imlock);
+       csr32w(ctlr, Imc, ~0);
+       im = ctlr->im;
+       txdw = 0;
+
+       while((icr = csr32r(ctlr, Icr) & ctlr->im) != 0){
+               if(icr & Lsc){
+                       im &= ~Lsc;
+                       ctlr->lim = icr & Lsc;
+                       rendez_wakeup(&ctlr->lrendez);
+                       ctlr->lintr++;
+               }
+               if(icr & (Rxt0|Rxo|Rxdmt0|Rxseq)){
+                       im &= ~(Rxt0|Rxo|Rxdmt0|Rxseq);
+                       ctlr->rim = icr & (Rxt0|Rxo|Rxdmt0|Rxseq);
+                       rendez_wakeup(&ctlr->rrendez);
+                       ctlr->rintr++;
+               }
+               if(icr & Txdw){
+                       im &= ~Txdw;
+                       txdw++;
+                       ctlr->tintr++;
+               }
+       }
+
+       ctlr->im = im;
+       csr32w(ctlr, Ims, im);
+       iunlock(&ctlr->imlock);
+
+       if(txdw)
+               igbetransmit(edev);
+}
+
+static int
+i82543mdior(struct ctlr* ctlr, int n)
+{
+       int ctrl, data, i, r;
+
+       /*
+        * Read n bits from the Management Data I/O Interface.
+        */
+       ctrl = csr32r(ctlr, Ctrl);
+       r = (ctrl & ~Mddo)|Mdco;
+       data = 0;
+       for(i = n-1; i >= 0; i--){
+               if(csr32r(ctlr, Ctrl) & Mdd)
+                       data |= (1<<i);
+               csr32w(ctlr, Ctrl, Mdc|r);
+               csr32w(ctlr, Ctrl, r);
+       }
+       csr32w(ctlr, Ctrl, ctrl);
+
+       return data;
+}
+
+static int
+i82543mdiow(struct ctlr* ctlr, int bits, int n)
+{
+       int ctrl, i, r;
+
+       /*
+        * Write n bits to the Management Data I/O Interface.
+        */
+       ctrl = csr32r(ctlr, Ctrl);
+       r = Mdco|Mddo|ctrl;
+       for(i = n-1; i >= 0; i--){
+               if(bits & (1<<i))
+                       r |= Mdd;
+               else
+                       r &= ~Mdd;
+               csr32w(ctlr, Ctrl, Mdc|r);
+               csr32w(ctlr, Ctrl, r);
+       }
+       csr32w(ctlr, Ctrl, ctrl);
+
+       return 0;
+}
+
+static int
+i82543miimir(struct mii* mii, int pa, int ra)
+{
+       int data;
+       struct ctlr *ctlr;
+
+       ctlr = mii->ctlr;
+
+       /*
+        * MII Management Interface Read.
+        *
+        * Preamble;
+        * ST+OP+PHYAD+REGAD;
+        * TA + 16 data bits.
+        */
+       i82543mdiow(ctlr, 0xFFFFFFFF, 32);
+       i82543mdiow(ctlr, 0x1800|(pa<<5)|ra, 14);
+       data = i82543mdior(ctlr, 18);
+
+       if(data & 0x10000)
+               return -1;
+
+       return data & 0xFFFF;
+}
+
+static int
+i82543miimiw(struct mii* mii, int pa, int ra, int data)
+{
+       struct ctlr *ctlr;
+
+       ctlr = mii->ctlr;
+
+       /*
+        * MII Management Interface Write.
+        *
+        * Preamble;
+        * ST+OP+PHYAD+REGAD+TA + 16 data bits;
+        * Z.
+        */
+       i82543mdiow(ctlr, 0xFFFFFFFF, 32);
+       data &= 0xFFFF;
+       data |= (0x05<<(5+5+2+16))|(pa<<(5+2+16))|(ra<<(2+16))|(0x02<<16);
+       i82543mdiow(ctlr, data, 32);
+
+       return 0;
+}
+
+static int
+igbemiimir(struct mii* mii, int pa, int ra)
+{
+       struct ctlr *ctlr;
+       int mdic, timo;
+
+       ctlr = mii->ctlr;
+
+       csr32w(ctlr, Mdic, MDIrop|(pa<<MDIpSHIFT)|(ra<<MDIrSHIFT));
+       mdic = 0;
+       for(timo = 64; timo; timo--){
+               mdic = csr32r(ctlr, Mdic);
+               if(mdic & (MDIe|MDIready))
+                       break;
+               udelay(1);
+       }
+
+       if((mdic & (MDIe|MDIready)) == MDIready)
+               return mdic & 0xFFFF;
+       return -1;
+}
+
+static int
+igbemiimiw(struct mii* mii, int pa, int ra, int data)
+{
+       struct ctlr *ctlr;
+       int mdic, timo;
+
+       ctlr = mii->ctlr;
+
+       data &= MDIdMASK;
+       csr32w(ctlr, Mdic, MDIwop|(pa<<MDIpSHIFT)|(ra<<MDIrSHIFT)|data);
+       mdic = 0;
+       for(timo = 64; timo; timo--){
+               mdic = csr32r(ctlr, Mdic);
+               if(mdic & (MDIe|MDIready))
+                       break;
+               udelay(1);
+       }
+       if((mdic & (MDIe|MDIready)) == MDIready)
+               return 0;
+       return -1;
+}
+
+static int
+i82543miirw(struct mii* mii, int write, int pa, int ra, int data)
+{
+       if(write)
+               return i82543miimiw(mii, pa, ra, data);
+
+       return i82543miimir(mii, pa, ra);
+}
+
+static int
+igbemiirw(struct mii* mii, int write, int pa, int ra, int data)
+{
+       if(write)
+               return igbemiimiw(mii, pa, ra, data);
+
+       return igbemiimir(mii, pa, ra);
+}
+
+static int
+igbemii(struct ctlr* ctlr)
+{
+       int ctrl, p, r;
+       int (*rw)(struct mii*, int unused_int, int, int, int);
+
+       r = csr32r(ctlr, Status);
+       if(r & Tbimode)
+               return -1;
+
+       ctrl = csr32r(ctlr, Ctrl);
+       ctrl |= Slu;
+
+       switch(ctlr->id){
+       case i82543gc:
+               ctrl |= Frcdplx|Frcspd;
+               csr32w(ctlr, Ctrl, ctrl);
+
+               /*
+                * The reset pin direction (Mdro) should already
+                * be set from the EEPROM load.
+                * If it's not set this configuration is unexpected
+                * so bail.
+                */
+               r = csr32r(ctlr, Ctrlext);
+               if(!(r & Mdro))
+                       return -1;
+               csr32w(ctlr, Ctrlext, r);
+               udelay(20*1000);
+               r = csr32r(ctlr, Ctrlext);
+               r &= ~Mdr;
+               csr32w(ctlr, Ctrlext, r);
+               udelay(20*1000);
+               r = csr32r(ctlr, Ctrlext);
+               r |= Mdr;
+               csr32w(ctlr, Ctrlext, r);
+               udelay(20*1000);
+
+               rw = i82543miirw;
+               break;
+       case i82544ei:
+       case i82544eif:
+       case i82544gc:
+       case i82540em:
+       case i82540eplp:
+       case i82547ei:
+       case i82547gi:
+       case i82541ei:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+               ctrl &= ~(Frcdplx|Frcspd);
+               csr32w(ctlr, Ctrl, ctrl);
+               rw = igbemiirw;
+               break;
+       default:
+               return -1;
+       }
+
+       if (!(ctlr->mii = miiattach(ctlr, ~0, rw)))
+               return -1;
+       // print("oui %X phyno %d\n", phy->oui, phy->phyno);
+
+       /*
+        * 8254X-specific PHY registers not in 802.3:
+        *      0x10    PHY specific control
+        *      0x14    extended PHY specific control
+        * Set appropriate values then reset the PHY to have
+        * changes noted.
+        */
+       switch(ctlr->id){
+       case i82547gi:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+               break;
+       default:
+               r = miimir(ctlr->mii, 16);
+               r |= 0x0800;                    /* assert CRS on Tx */
+               r |= 0x0060;                    /* auto-crossover all speeds */
+               r |= 0x0002;                    /* polarity reversal enabled */
+               miimiw(ctlr->mii, 16, r);
+
+               r = miimir(ctlr->mii, 20);
+               r |= 0x0070;                    /* +25MHz clock */
+               r &= ~0x0F00;
+               r |= 0x0100;                    /* 1x downshift */
+               miimiw(ctlr->mii, 20, r);
+
+               miireset(ctlr->mii);
+               p = 0;
+               if(ctlr->txcw & TxcwPs)
+                       p |= AnaP;
+               if(ctlr->txcw & TxcwAs)
+                       p |= AnaAP;
+               miiane(ctlr->mii, ~0, p, ~0);
+               break;
+       }
+       return 0;
+}
+
+static int
+at93c46io(struct ctlr* ctlr, char* op, int data)
+{
+       char *lp, *p;
+       int i, loop, eecd, r;
+
+       eecd = csr32r(ctlr, Eecd);
+
+       r = 0;
+       loop = -1;
+       lp = NULL;
+       for(p = op; *p != '\0'; p++){
+               switch(*p){
+               default:
+                       return -1;
+               case ' ':
+                       continue;
+               case ':':                       /* start of loop */
+                       loop = strtol(p+1, &lp, 0)-1;
+                       lp--;
+                       if(p == lp)
+                               loop = 7;
+                       p = lp;
+                       continue;
+               case ';':                       /* end of loop */
+                       if(lp == NULL)
+                               return -1;
+                       loop--;
+                       if(loop >= 0)
+                               p = lp;
+                       else
+                               lp = NULL;
+                       continue;
+               case 'C':                       /* assert clock */
+                       eecd |= Sk;
+                       break;
+               case 'c':                       /* deassert clock */
+                       eecd &= ~Sk;
+                       break;
+               case 'D':                       /* next bit in 'data' byte */
+                       if(loop < 0)
+                               return -1;
+                       if(data & (1<<loop))
+                               eecd |= Di;
+                       else
+                               eecd &= ~Di;
+                       break;
+               case 'O':                       /* collect data output */
+                       i = (csr32r(ctlr, Eecd) & Do) != 0;
+                       if(loop >= 0)
+                               r |= (i<<loop);
+                       else
+                               r = i;
+                       continue;
+               case 'I':                       /* assert data input */
+                       eecd |= Di;
+                       break;
+               case 'i':                       /* deassert data input */
+                       eecd &= ~Di;
+                       break;
+               case 'S':                       /* enable chip select */
+                       eecd |= Cs;
+                       break;
+               case 's':                       /* disable chip select */
+                       eecd &= ~Cs;
+                       break;
+               }
+               csr32w(ctlr, Eecd, eecd);
+               udelay(50);
+       }
+       if(loop >= 0)
+               return -1;
+       return r;
+}
+
+static int
+at93c46r(struct ctlr* ctlr)
+{
+       uint16_t sum;
+       char rop[20];
+       int addr, areq, bits, data, eecd, i;
+
+       eecd = csr32r(ctlr, Eecd);
+       if(eecd & Spi){
+               printd("igbe: SPI EEPROM access not implemented\n");
+               return 0;
+       }
+       if(eecd & (Eeszaddr|Eesz256))
+               bits = 8;
+       else
+               bits = 6;
+
+       sum = 0;
+
+       switch(ctlr->id){
+       default:
+               areq = 0;
+               break;
+       case i82540em:
+       case i82540eplp:
+       case i82541ei:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82546gb:
+       case i82546eb:
+       case i82547ei:
+       case i82547gi:
+               areq = 1;
+               csr32w(ctlr, Eecd, eecd|Areq);
+               for(i = 0; i < 1000; i++){
+                       if((eecd = csr32r(ctlr, Eecd)) & Agnt)
+                               break;
+                       udelay(5);
+               }
+               if(!(eecd & Agnt)){
+                       printd("igbe: not granted EEPROM access\n");
+                       goto release;
+               }
+               break;
+       }
+       snprintf(rop, sizeof(rop), "S :%dDCc;", bits+3);
+
+       for(addr = 0; addr < 0x40; addr++){
+               /*
+                * Read a word at address 'addr' from the Atmel AT93C46
+                * 3-Wire Serial EEPROM or compatible. The EEPROM access is
+                * controlled by 4 bits in Eecd. See the AT93C46 datasheet
+                * for protocol details.
+                */
+               if(at93c46io(ctlr, rop, (0x06<<bits)|addr) != 0){
+                       printd("igbe: can't set EEPROM address 0x%2.2X\n", addr);
+                       goto release;
+               }
+               data = at93c46io(ctlr, ":16COc;", 0);
+               at93c46io(ctlr, "sic", 0);
+               ctlr->eeprom[addr] = data;
+               sum += data;
+       }
+
+release:
+       if(areq)
+               csr32w(ctlr, Eecd, eecd & ~Areq);
+       return sum;
+}
+
+static int
+igbedetach(struct ctlr* ctlr)
+{
+       int r, timeo;
+
+       /*
+        * Perform a device reset to get the chip back to the
+        * power-on state, followed by an EEPROM reset to read
+        * the defaults for some internal registers.
+        */
+       csr32w(ctlr, Imc, ~0);
+       csr32w(ctlr, Rctl, 0);
+       csr32w(ctlr, Tctl, 0);
+
+       udelay(10*1000);
+
+       csr32w(ctlr, Ctrl, Devrst);
+       udelay(1*1000);
+       for(timeo = 0; timeo < 1000; timeo++){
+               if(!(csr32r(ctlr, Ctrl) & Devrst))
+                       break;
+               udelay(1*1000);
+       }
+       if(csr32r(ctlr, Ctrl) & Devrst)
+               return -1;
+       r = csr32r(ctlr, Ctrlext);
+       csr32w(ctlr, Ctrlext, r|Eerst);
+       udelay(1*1000);
+
+       for(timeo = 0; timeo < 1000; timeo++){
+               if(!(csr32r(ctlr, Ctrlext) & Eerst))
+                       break;
+               udelay(1*1000);
+       }
+       if(csr32r(ctlr, Ctrlext) & Eerst)
+               return -1;
+
+       switch(ctlr->id){
+       default:
+               break;
+       case i82540em:
+       case i82540eplp:
+       case i82541gi:
+       case i82541gi2:
+       case i82541pi:
+       case i82545em:
+       case i82545gmc:
+       case i82547gi:
+       case i82546gb:
+       case i82546eb:
+               r = csr32r(ctlr, Manc);
+               r &= ~Arpen;
+               csr32w(ctlr, Manc, r);
+               break;
+       }
+
+       csr32w(ctlr, Imc, ~0);
+       udelay(1*1000);
+       for(timeo = 0; timeo < 1000; timeo++){
+               if(!csr32r(ctlr, Icr))
+                       break;
+               udelay(1*1000);
+       }
+       if(csr32r(ctlr, Icr))
+               return -1;
+
+       return 0;
+}
+
+static void
+igbeshutdown(struct ether* ether)
+{
+       igbedetach(ether->ctlr);
+}
+
+static int
+igbereset(struct ctlr* ctlr)
+{
+       int ctrl, i, pause, r, swdpio, txcw;
+
+       if(igbedetach(ctlr))
+               return -1;
+
+       /*
+        * Read the EEPROM, validate the checksum
+        * then get the device back to a power-on state.
+        */
+       if((r = at93c46r(ctlr)) != 0xBABA){
+               printd("igbe: bad EEPROM checksum - 0x%4.4uX\n", r);
+               return -1;
+       }
+
+       /*
+        * Snarf and set up the receive addresses.
+        * There are 16 addresses. The first should be the MAC address.
+        * The others are cleared and not marked valid (MS bit of Rah).
+        */
+       if ((ctlr->id == i82546gb || ctlr->id == i82546eb) &&
+           (pci_config_addr(ctlr->pci->bus, ctlr->pci->dev, 0, 0) ==
+                pci_config_addr(0, 1, 0, 0)))
+               ctlr->eeprom[Ea+2] += 0x100;            /* second interface */
+       if(ctlr->id == i82541gi && ctlr->eeprom[Ea] == 0xFFFF)
+               ctlr->eeprom[Ea] = 0xD000;
+       for(i = Ea; i < Eaddrlen/2; i++){
+               ctlr->ra[2*i] = ctlr->eeprom[i];
+               ctlr->ra[2*i+1] = ctlr->eeprom[i]>>8;
+       }
+       /* lan id seems to vary on 82543gc; don't use it */
+       if (ctlr->id != i82543gc) {
+               r = (csr32r(ctlr, Status) & Lanid) >> 2;
+               ctlr->ra[5] += r;               /* ea ctlr[1] = ea ctlr[0]+1 */
+       }
+
+       r = (ctlr->ra[3]<<24)|(ctlr->ra[2]<<16)|(ctlr->ra[1]<<8)|ctlr->ra[0];
+       csr32w(ctlr, Ral, r);
+       r = 0x80000000|(ctlr->ra[5]<<8)|ctlr->ra[4];
+       csr32w(ctlr, Rah, r);
+       for(i = 1; i < 16; i++){
+               csr32w(ctlr, Ral+i*8, 0);
+               csr32w(ctlr, Rah+i*8, 0);
+       }
+
+       /*
+        * Clear the Multicast Table Array.
+        * It's a 4096 bit vector accessed as 128 32-bit registers.
+        */
+       memset(ctlr->mta, 0, sizeof(ctlr->mta));
+       for(i = 0; i < 128; i++)
+               csr32w(ctlr, Mta+i*4, 0);
+
+       /*
+        * Just in case the Eerst didn't load the defaults
+        * (doesn't appear to fully on the 82543GC), do it manually.
+        */
+       if (ctlr->id == i82543gc) {
+               txcw = csr32r(ctlr, Txcw);
+               txcw &= ~(TxcwAne|TxcwPauseMASK|TxcwFd);
+               ctrl = csr32r(ctlr, Ctrl);
+               ctrl &= ~(SwdpioloMASK|Frcspd|Ilos|Lrst|Fd);
+
+               if(ctlr->eeprom[Icw1] & 0x0400){
+                       ctrl |= Fd;
+                       txcw |= TxcwFd;
+               }
+               if(ctlr->eeprom[Icw1] & 0x0200)
+                       ctrl |= Lrst;
+               if(ctlr->eeprom[Icw1] & 0x0010)
+                       ctrl |= Ilos;
+               if(ctlr->eeprom[Icw1] & 0x0800)
+                       ctrl |= Frcspd;
+               swdpio = (ctlr->eeprom[Icw1] & 0x01E0)>>5;
+               ctrl |= swdpio<<SwdpioloSHIFT;
+               csr32w(ctlr, Ctrl, ctrl);
+
+               ctrl = csr32r(ctlr, Ctrlext);
+               ctrl &= ~(Ips|SwdpiohiMASK);
+               swdpio = (ctlr->eeprom[Icw2] & 0x00F0)>>4;
+               if(ctlr->eeprom[Icw1] & 0x1000)
+                       ctrl |= Ips;
+               ctrl |= swdpio<<SwdpiohiSHIFT;
+               csr32w(ctlr, Ctrlext, ctrl);
+
+               if(ctlr->eeprom[Icw2] & 0x0800)
+                       txcw |= TxcwAne;
+               pause = (ctlr->eeprom[Icw2] & 0x3000)>>12;
+               txcw |= pause<<TxcwPauseSHIFT;
+               switch(pause){
+               default:
+                       ctlr->fcrtl = 0x00002000;
+                       ctlr->fcrth = 0x00004000;
+                       txcw |= TxcwAs|TxcwPs;
+                       break;
+               case 0:
+                       ctlr->fcrtl = 0x00002000;
+                       ctlr->fcrth = 0x00004000;
+                       break;
+               case 2:
+                       ctlr->fcrtl = 0;
+                       ctlr->fcrth = 0;
+                       txcw |= TxcwAs;
+                       break;
+               }
+               ctlr->txcw = txcw;
+               csr32w(ctlr, Txcw, txcw);
+       }
+
+
+       /*
+        * Flow control - values from the datasheet.
+        */
+       csr32w(ctlr, Fcal, 0x00C28001);
+       csr32w(ctlr, Fcah, 0x00000100);
+       csr32w(ctlr, Fct, 0x00008808);
+       csr32w(ctlr, Fcttv, 0x00000100);
+
+       csr32w(ctlr, Fcrtl, ctlr->fcrtl);
+       csr32w(ctlr, Fcrth, ctlr->fcrth);
+
+       /* FYI, igbemii checks status right away too. */
+       if(!(csr32r(ctlr, Status) & Tbimode) && igbemii(ctlr) < 0) {
+               printk("igbemii failed!  igbe failing to reset!\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static void
+igbepci(void)
+{
+       int cls, id;
+       struct pci_device *pcidev;
+       struct ctlr *ctlr;
+       void *mem;
+       uintptr_t mmio_paddr;
+
+       STAILQ_FOREACH(pcidev, &pci_devices, all_dev) {
+               /* This checks that pcidev is a Network Controller for Ethernet */
+               if (pcidev->class != 0x02 || pcidev->subclass != 0x00)
+                       continue;
+               id = pcidev->dev_id << 16 | pcidev->ven_id;
+               switch (id) {
+               default:
+                       continue;
+               case i82543gc:
+               case i82544ei:
+               case i82544eif:
+               case i82544gc:
+               case i82547ei:
+               case i82547gi:
+               case i82540em:
+               case i82540eplp:
+               case i82541ei:
+               case i82541gi:
+               case i82541gi2:
+               case i82541pi:
+               case i82545em:
+               case i82545gmc:
+               case i82546gb:
+               case i82546eb:
+                       break;
+               }
+               printk("igbe/e1000 driver found 0x%04x:%04x at %02x:%02x.%x\n",
+                      pcidev->ven_id, pcidev->dev_id,
+                      pcidev->bus, pcidev->dev, pcidev->func);
+
+               mmio_paddr = pcidev->bar[0].mmio_base32 ? pcidev->bar[0].mmio_base32 : 
+                                                         pcidev->bar[0].mmio_base64;
+               mem = (void*)vmap_pmem(mmio_paddr, pcidev->bar[0].mmio_sz);
+               if(mem == NULL){
+                       printd("igbe: can't map %p\n", pcidev->bar[0].mmio_base32);
+                       continue;
+               }
+               cls = pcidev_read8(pcidev, PCI_CLSZ_REG);
+               switch(cls){
+                       default:
+                               printd("igbe: unexpected CLS - %d\n", cls*4);
+                               break;
+                       case 0x00:
+                       case 0xFF:
+                               /* bogus value; use a sane default.  cls is set in DWORD (u32)
+                                * units. */
+                               cls = ARCH_CL_SIZE / sizeof(long);
+                               pcidev_write8(pcidev, PCI_CLSZ_REG, cls);
+                               break;
+                       case 0x08:
+                       case 0x10:
+                               break;
+               }
+               ctlr = kzmalloc(sizeof(struct ctlr), 0);
+               if(ctlr == NULL) {
+                       vunmap_vmem((uintptr_t)mem, pcidev->bar[0].mmio_sz);
+                       error(Enomem);
+               }
+               spinlock_init_irqsave(&ctlr->imlock);
+               spinlock_init_irqsave(&ctlr->tlock);
+               qlock_init(&ctlr->alock);
+               qlock_init(&ctlr->slock);
+               rendez_init(&ctlr->lrendez);
+               rendez_init(&ctlr->rrendez);
+               /* port seems to be unused, and only used for some comparison with edev.
+                * plan9 just used the top of the raw bar, regardless of the type. */
+               ctlr->port = pcidev->bar[0].raw_bar & ~0x0f;
+               ctlr->pci = pcidev;
+               ctlr->id = id;
+               ctlr->cls = cls * sizeof(long);
+               ctlr->nic = mem;
+
+               if(igbereset(ctlr)){
+                       kfree(ctlr);
+                       vunmap_vmem((uintptr_t)mem, pcidev->bar[0].mmio_sz);
+                       continue;
+               }
+               pci_set_bus_master(pcidev);
+
+               if(igbectlrhead != NULL)
+                       igbectlrtail->next = ctlr;
+               else
+                       igbectlrhead = ctlr;
+               igbectlrtail = ctlr;
+       }
+}
+
+static int
+igbepnp(struct ether* edev)
+{
+       struct ctlr *ctlr;
+
+       run_once(igbepci());
+
+       /*
+        * Any adapter matches if no edev->port is supplied,
+        * otherwise the ports must match.
+        */
+       for(ctlr = igbectlrhead; ctlr != NULL; ctlr = ctlr->next){
+               if(ctlr->active)
+                       continue;
+               if(edev->port == 0 || edev->port == ctlr->port){
+                       ctlr->active = 1;
+                       break;
+               }
+       }
+       if(ctlr == NULL)
+               return -1;
+
+       edev->ctlr = ctlr;
+       edev->port = ctlr->port;
+       edev->irq = ctlr->pci->irqline;
+       edev->netif.mbps = 1000;
+       memmove(edev->ea, ctlr->ra, Eaddrlen);
+
+       /*
+        * Linkage to the generic ethernet driver.
+        */
+       edev->tbdf = MKBUS(BusPCI, ctlr->pci->bus, ctlr->pci->dev,
+                          ctlr->pci->func);
+       edev->attach = igbeattach;
+       edev->transmit = igbetransmit;
+       edev->interrupt = igbeinterrupt;
+       edev->ifstat = igbeifstat;
+       edev->ctl = igbectl;
+       edev->shutdown = igbeshutdown;
+
+       edev->netif.arg = edev;
+       edev->netif.promiscuous = igbepromiscuous;
+       edev->netif.multicast = igbemulticast;
+
+       return 0;
+}
+
+linker_func_3(etherigbelink)
+{
+       addethercard("i82543", igbepnp);
+       addethercard("igbe", igbepnp);
+}
diff --git a/kern/drivers/net/ethermii.c b/kern/drivers/net/ethermii.c
new file mode 100644 (file)
index 0000000..30da877
--- /dev/null
@@ -0,0 +1,301 @@
+/* This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file. */
+
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <cpio.h>
+#include <pmap.h>
+#include <smp.h>
+#include "ethermii.h"
+
+static int
+miiprobe(struct mii* mii, int mask)
+{
+       struct miiphy *miiphy;
+       int bit, oui, phyno, r, rmask;
+
+       /*
+        * Probe through mii for PHYs in mask;
+        * return the mask of those found in the current probe.
+        * If the PHY has not already been probed, update
+        * the Mii information.
+        */
+       rmask = 0;
+       for(phyno = 0; phyno < NMiiPhy; phyno++){
+               bit = 1<<phyno;
+               if(!(mask & bit))
+                       continue;
+               if(mii->mask & bit){
+                       rmask |= bit;
+                       continue;
+               }
+               if(mii->rw(mii, 0, phyno, Bmsr, 0) == -1)
+                       continue;
+               r = mii->rw(mii, 0, phyno, Phyidr1, 0)<<16;
+               r |= mii->rw(mii, 0, phyno, Phyidr2, 0);
+               oui = (r>>10) & 0xffff;
+               if(oui == 0xffff || oui == 0)
+                       continue;
+
+               if((miiphy = kzmalloc(sizeof(struct miiphy), 0)) == NULL)
+                       continue;
+
+               miiphy->mii = mii;
+               miiphy->phyno = phyno;
+               miiphy->phyid = r;
+               miiphy->oui = oui;
+
+               miiphy->anar = ~0;
+               miiphy->fc = ~0;
+               miiphy->mscr = ~0;
+
+               mii->phy[phyno] = miiphy;
+               if(mii->curphy == NULL)
+                       mii->curphy = miiphy;
+               mii->mask |= bit;
+               mii->nphy++;
+
+               rmask |= bit;
+       }
+       return rmask;
+}
+
+int
+miimir(struct mii* mii, int r)
+{
+       if(mii == NULL || mii->ctlr == NULL || mii->curphy == NULL)
+               return -1;
+       return mii->rw(mii, 0, mii->curphy->phyno, r, 0);
+}
+
+int
+miimiw(struct mii* mii, int r, int data)
+{
+       if(mii == NULL || mii->ctlr == NULL || mii->curphy == NULL)
+               return -1;
+       return mii->rw(mii, 1, mii->curphy->phyno, r, data);
+}
+
+int
+miireset(struct mii* mii)
+{
+       int bmcr, timeo;
+
+       if(mii == NULL || mii->ctlr == NULL || mii->curphy == NULL)
+               return -1;
+       bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0);
+       mii->rw(mii, 1, mii->curphy->phyno, Bmcr, BmcrR|bmcr);
+       for(timeo = 0; timeo < 1000; timeo++){
+               bmcr = mii->rw(mii, 0, mii->curphy->phyno, Bmcr, 0);
+               if(!(bmcr & BmcrR))
+                       break;
+               udelay(1);
+       }
+       if(bmcr & BmcrR)
+               return -1;
+       if(bmcr & BmcrI)
+               mii->rw(mii, 1, mii->curphy->phyno, Bmcr, bmcr & ~BmcrI);
+       return 0;
+}
+
+int
+miiane(struct mii* mii, int a, int p, int e)
+{
+       int anar, bmsr, mscr, r, phyno;
+
+       if(mii == NULL || mii->ctlr == NULL || mii->curphy == NULL)
+               return -1;
+       phyno = mii->curphy->phyno;
+
+       mii->rw(mii, 1, phyno, Bmsr, 0);
+       bmsr = mii->rw(mii, 0, phyno, Bmsr, 0);
+       if(!(bmsr & BmsrAna))
+               return -1;
+
+       if(a != ~0)
+               anar = (AnaTXFD|AnaTXHD|Ana10FD|Ana10HD) & a;
+       else if(mii->curphy->anar != ~0)
+               anar = mii->curphy->anar;
+       else{
+               anar = mii->rw(mii, 0, phyno, Anar, 0);
+               anar &= ~(AnaAP|AnaP|AnaT4|AnaTXFD|AnaTXHD|Ana10FD|Ana10HD);
+               if(bmsr & Bmsr10THD)
+                       anar |= Ana10HD;
+               if(bmsr & Bmsr10TFD)
+                       anar |= Ana10FD;
+               if(bmsr & Bmsr100TXHD)
+                       anar |= AnaTXHD;
+               if(bmsr & Bmsr100TXFD)
+                       anar |= AnaTXFD;
+       }
+       mii->curphy->anar = anar;
+
+       if(p != ~0)
+               anar |= (AnaAP|AnaP) & p;
+       else if(mii->curphy->fc != ~0)
+               anar |= mii->curphy->fc;
+       mii->curphy->fc = (AnaAP|AnaP) & anar;
+
+       if(bmsr & BmsrEs){
+               mscr = mii->rw(mii, 0, phyno, Mscr, 0);
+               mscr &= ~(Mscr1000TFD|Mscr1000THD);
+               if(e != ~0)
+                       mscr |= (Mscr1000TFD|Mscr1000THD) & e;
+               else if(mii->curphy->mscr != ~0)
+                       mscr = mii->curphy->mscr;
+               else{
+                       r = mii->rw(mii, 0, phyno, Esr, 0);
+                       if(r & Esr1000THD)
+                               mscr |= Mscr1000THD;
+                       if(r & Esr1000TFD)
+                               mscr |= Mscr1000TFD;
+               }
+               mii->curphy->mscr = mscr;
+               mii->rw(mii, 1, phyno, Mscr, mscr);
+       }
+       else
+               mii->curphy->mscr = 0;
+       mii->rw(mii, 1, phyno, Anar, anar);
+
+       r = mii->rw(mii, 0, phyno, Bmcr, 0);
+       if(!(r & BmcrR)){
+               r |= BmcrAne|BmcrRan;
+               mii->rw(mii, 1, phyno, Bmcr, r);
+       }
+
+       return 0;
+}
+
+int
+miistatus(struct mii* mii)
+{
+       struct miiphy *phy;
+       int anlpar, bmsr, p, r, phyno;
+
+       if(mii == NULL || mii->ctlr == NULL || mii->curphy == NULL)
+               return -1;
+       phy = mii->curphy;
+       phyno = phy->phyno;
+
+       /*
+        * Check Auto-Negotiation is complete and link is up.
+        * (Read status twice as the Ls bit is sticky).
+        */
+       bmsr = mii->rw(mii, 0, phyno, Bmsr, 0);
+       if(!(bmsr & (BmsrAnc|BmsrAna)))
+               return -1;
+
+       bmsr = mii->rw(mii, 0, phyno, Bmsr, 0);
+       if(!(bmsr & BmsrLs)){
+               phy->link = 0;
+               return -1;
+       }
+
+       phy->speed = phy->fd = phy->rfc = phy->tfc = 0;
+       if(phy->mscr){
+               r = mii->rw(mii, 0, phyno, Mssr, 0);
+               if((phy->mscr & Mscr1000TFD) && (r & Mssr1000TFD)){
+                       phy->speed = 1000;
+                       phy->fd = 1;
+               }
+               else if((phy->mscr & Mscr1000THD) && (r & Mssr1000THD))
+                       phy->speed = 1000;
+       }
+
+       anlpar = mii->rw(mii, 0, phyno, Anlpar, 0);
+       if(phy->speed == 0){
+               r = phy->anar & anlpar;
+               if(r & AnaTXFD){
+                       phy->speed = 100;
+                       phy->fd = 1;
+               }
+               else if(r & AnaTXHD)
+                       phy->speed = 100;
+               else if(r & Ana10FD){
+                       phy->speed = 10;
+                       phy->fd = 1;
+               }
+               else if(r & Ana10HD)
+                       phy->speed = 10;
+       }
+       if(phy->speed == 0)
+               return -1;
+
+       if(phy->fd){
+               p = phy->fc;
+               r = anlpar & (AnaAP|AnaP);
+               if(p == AnaAP && r == (AnaAP|AnaP))
+                       phy->tfc = 1;
+               else if(p == (AnaAP|AnaP) && r == AnaAP)
+                       phy->rfc = 1;
+               else if((p & AnaP) && (r & AnaP))
+                       phy->rfc = phy->tfc = 1;
+       }
+
+       phy->link = 1;
+
+       return 0;
+}
+
+char*
+miidumpphy(struct mii* mii, char* p, char* e)
+{
+       int i, r;
+
+       if(mii == NULL || mii->curphy == NULL)
+               return p;
+
+       p = seprintf(p, e, "phy:   ");
+       for(i = 0; i < NMiiPhyr; i++){
+               if(i && ((i & 0x07) == 0))
+                       p = seprintf(p, e, "\n       ");
+               r = mii->rw(mii, 0, mii->curphy->phyno, i, 0);
+               p = seprintf(p, e, " %4.4ux", r);
+       }
+       p = seprintf(p, e, "\n");
+
+       return p;
+}
+
+void
+miidetach(struct mii* mii)
+{
+       int i;
+
+       for(i = 0; i < NMiiPhy; i++){
+               if(mii->phy[i] == NULL)
+                       continue;
+               kfree(mii);
+               mii->phy[i] = NULL;
+       }
+       kfree(mii);
+}
+
+struct mii*
+miiattach(void* ctlr, int mask, int (*rw)(struct mii*, int unused_int, int, int, int))
+{
+       struct mii* mii;
+
+       if((mii = kzmalloc(sizeof(struct mii), 0)) == NULL)
+               return NULL;
+       mii->ctlr = ctlr;
+       mii->rw = rw;
+
+       if(miiprobe(mii, mask) == 0){
+               kfree(mii);
+               mii = NULL;
+       }
+
+       return mii;
+}
diff --git a/kern/drivers/net/ethermii.h b/kern/drivers/net/ethermii.h
new file mode 100644 (file)
index 0000000..f6f69f5
--- /dev/null
@@ -0,0 +1,124 @@
+/* This file is part of the UCB release of Plan 9. It is subject to the license
+ * terms in the LICENSE file found in the top-level directory of this
+ * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
+ * part of the UCB release of Plan 9, including this file, may be copied,
+ * modified, propagated, or distributed except according to the terms contained
+ * in the LICENSE file. */
+
+enum {                                 /* registers */
+       Bmcr            = 0x00,         /* Basic Mode Control */
+       Bmsr            = 0x01,         /* Basic Mode Status */
+       Phyidr1         = 0x02,         /* PHY Identifier #1 */
+       Phyidr2         = 0x03,         /* PHY Identifier #2 */
+       Anar            = 0x04,         /* Auto-Negotiation Advertisement */
+       Anlpar          = 0x05,         /* AN Link Partner Ability */
+       Aner            = 0x06,         /* AN Expansion */
+       Annptr          = 0x07,         /* AN Next Page TX */
+       Annprr          = 0x08,         /* AN Next Page RX */
+       Mscr            = 0x09,         /* MASTER-SLAVE Control */
+       Mssr            = 0x0a,         /* MASTER-SLAVE Status */
+       Esr             = 0x0f,         /* Extended Status */
+
+       NMiiPhyr        = 32,
+       NMiiPhy         = 32,
+};
+
+enum {                                 /* Bmcr */
+       BmcrSs1         = 0x0040,       /* Speed Select[1] */
+       BmcrCte         = 0x0080,       /* Collision Test Enable */
+       BmcrDm          = 0x0100,       /* Duplex Mode */
+       BmcrRan         = 0x0200,       /* Restart Auto-Negotiation */
+       BmcrI           = 0x0400,       /* Isolate */
+       BmcrPd          = 0x0800,       /* Power Down */
+       BmcrAne         = 0x1000,       /* Auto-Negotiation Enable */
+       BmcrSs0         = 0x2000,       /* Speed Select[0] */
+       BmcrLe          = 0x4000,       /* Loopback Enable */
+       BmcrR           = 0x8000,       /* Reset */
+};
+
+enum {                                 /* Bmsr */
+       BmsrEc          = 0x0001,       /* Extended Capability */
+       BmsrJd          = 0x0002,       /* Jabber Detect */
+       BmsrLs          = 0x0004,       /* Link Status */
+       BmsrAna         = 0x0008,       /* Auto-Negotiation Ability */
+       BmsrRf          = 0x0010,       /* Remote Fault */
+       BmsrAnc         = 0x0020,       /* Auto-Negotiation Complete */
+       BmsrPs          = 0x0040,       /* Preamble Suppression Capable */
+       BmsrEs          = 0x0100,       /* Extended Status */
+       Bmsr100T2HD     = 0x0200,       /* 100BASE-T2 HD Capable */
+       Bmsr100T2FD     = 0x0400,       /* 100BASE-T2 FD Capable */
+       Bmsr10THD       = 0x0800,       /* 10BASE-T HD Capable */
+       Bmsr10TFD       = 0x1000,       /* 10BASE-T FD Capable */
+       Bmsr100TXHD     = 0x2000,       /* 100BASE-TX HD Capable */
+       Bmsr100TXFD     = 0x4000,       /* 100BASE-TX FD Capable */
+       Bmsr100T4       = 0x8000,       /* 100BASE-T4 Capable */
+};
+
+enum {                                 /* Anar/Anlpar */
+       Ana10G          = 0x0001,
+
+       Ana10HD         = 0x0020,       /* Advertise 10BASE-T */
+       Ana10FD         = 0x0040,       /* Advertise 10BASE-T FD */
+       AnaTXHD         = 0x0080,       /* Advertise 100BASE-TX */
+       AnaTXFD         = 0x0100,       /* Advertise 100BASE-TX FD */
+       AnaT4           = 0x0200,       /* Advertise 100BASE-T4 */
+       AnaP            = 0x0400,       /* Pause */
+       AnaAP           = 0x0800,       /* Asymmetrical Pause */
+       AnaRf           = 0x2000,       /* Remote Fault */
+       AnaAck          = 0x4000,       /* Acknowledge */
+       AnaNp           = 0x8000,       /* Next Page Indication */
+};
+
+enum {                                 /* Mscr */
+       Mscr1000THD     = 0x0100,       /* Advertise 1000BASE-T HD */
+       Mscr1000TFD     = 0x0200,       /* Advertise 1000BASE-T FD */
+};
+
+enum {                                 /* Mssr */
+       Mssr1000THD     = 0x0400,       /* Link Partner 1000BASE-T HD able */
+       Mssr1000TFD     = 0x0800,       /* Link Partner 1000BASE-T FD able */
+};
+
+enum {                                 /* Esr */
+       Esr1000THD      = 0x1000,       /* 1000BASE-T HD Capable */
+       Esr1000TFD      = 0x2000,       /* 1000BASE-T FD Capable */
+       Esr1000XHD      = 0x4000,       /* 1000BASE-X HD Capable */
+       Esr1000XFD      = 0x8000,       /* 1000BASE-X FD Capable */
+};
+
+struct mii {
+       spinlock_t lock;
+       int     nphy;
+       int     mask;
+       struct miiphy*  phy[NMiiPhy];
+       struct miiphy*  curphy;
+
+       void*   ctlr;
+       int     (*rw)(struct mii*, int unused_int, int, int, int);
+};
+
+struct miiphy {
+       struct mii*     mii;
+       int     phyno;
+       int     phyid;
+       int     oui;
+
+       int     anar;
+       int     fc;
+       int     mscr;
+
+       int     link;
+       int     speed;
+       int     fd;
+       int     rfc;
+       int     tfc;
+};
+
+int miimir(struct mii *mii, int r);
+int miimiw(struct mii *mii, int r, int data);
+int miireset(struct mii *mii);
+int miiane(struct mii *mii, int a, int p, int e);
+int miistatus(struct mii *mii);
+char *miidumpphy(struct mii *mii, char *p, char *e);
+void miidetach(struct mii *mii);
+struct mii *miiattach(void *ctlr, int mask, int (*rw)(struct mii *, int unused_int, int unused2, int unused3, int unused4));