akaros/kern/drivers/net/ether82563.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008-2014
   3 * erik quanstrom
   4 *
   5 * This software is provided `as-is,' without any express or implied
   6 * warranty.  In no event will the author be held liable for any damages
   7 * arising from the use of this software.
   8 *
   9 * Permission is granted to anyone to use this software for any purpose,
  10 * including commercial applications, and to alter it and redistribute it
  11 * freely, subject to the following restrictions:
  12 *
  13 * 1.  The origin of this software must not be misrepresented; you must
  14 * not claim that you wrote the original software.  If you use this
  15 * software in a product, an acknowledgment in the product documentation
  16 * would be appreciated but is not required.
  17 *
  18 * 2.  Altered source versions must be plainly marked as such, and must
  19 * not be misrepresented as being the original software.
  20 *
  21 * 3.  This notice may not be removed or altered from any source
  22 * distribution.
  23 */
  24/* This code has been modified by UC Berkeley and Google to work in Akaros. */
  25/*
  26 * Intel Gigabit Ethernet PCI-Express Controllers.
  27 *      8256[367], 8257[1-79], 21[078]
  28 * Pretty basic, does not use many of the chip smarts.
  29 * The interrupt mitigation tuning for each chip variant
  30 * is probably different. The reset/initialisation
  31 * sequence needs straightened out. Doubt the PHY code
  32 * for the 82575eb is right.
  33 *
  34 * on the assumption that allowing jumbo packets makes the controller
  35 * much slower (as is true of the 82579), never allow jumbos.
  36 */
  37#include <assert.h>
  38#include <cpio.h>
  39#include <error.h>
  40#include <kmalloc.h>
  41#include <kref.h>
  42#include <net/ip.h>
  43#include <pmap.h>
  44#include <slab.h>
  45#include <smp.h>
  46#include <stdio.h>
  47#include <string.h>
  48
  49#define now() TK2MS(MACHP(0)->ticks)
  50
  51/*
  52 * these are in the order they appear in the manual, not numeric order.
  53 * It was too hard to find them in the book. Ref 21489, rev 2.6
  54 */
  55
  56enum {
  57        /* General */
  58        Ctrl = 0x0000,       /* Device Control */
  59        Status = 0x0008,     /* Device Status */
  60        Eec = 0x0010,        /* EEPROM/Flash Control/Data */
  61        Fextnvm6 = 0x0010,   /* Future Extended NVM 6 */
  62        Eerd = 0x0014,       /* EEPROM Read */
  63        Ctrlext = 0x0018,    /* Extended Device Control */
  64        Fla = 0x001c,        /* Flash Access */
  65        Mdic = 0x0020,       /* MDI Control */
  66        Seresctl = 0x0024,   /* Serdes ana */
  67        Fcal = 0x0028,       /* Flow Control Address Low */
  68        Fcah = 0x002C,       /* Flow Control Address High */
  69        Fct = 0x0030,        /* Flow Control Type */
  70        Kumctrlsta = 0x0034, /* MAC-PHY Interface */
  71        Vet = 0x0038,        /* VLAN EtherType */
  72        Fcttv = 0x0170,      /* Flow Control Transmit Timer Value */
  73        Txcw = 0x0178,       /* Transmit Configuration Word */
  74        Rxcw = 0x0180,       /* Receive Configuration Word */
  75        Ledctl = 0x0E00,     /* LED control */
  76        Pba = 0x1000,        /* Packet Buffer Allocation */
  77        Pbs = 0x1008,        /* Packet Buffer Size */
  78
  79        /* Interrupt */
  80        Icr = 0x00C0, /* Interrupt Cause Read */
  81        Itr = 0x00c4, /* Interrupt Throttling Rate */
  82        Ics = 0x00C8, /* Interrupt Cause Set */
  83        Ims = 0x00D0, /* Interrupt Mask Set/Read */
  84        Imc = 0x00D8, /* Interrupt mask Clear */
  85        Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */
  86
  87        /* Receive */
  88        Rctl = 0x0100,    /* Control */
  89        Ert = 0x2008,     /* Early Receive Threshold (573[EVL], 579 only) */
  90        Fcrtl = 0x2160,   /* Flow Control RX Threshold Low */
  91        Fcrth = 0x2168,   /* Flow Control Rx Threshold High */
  92        Psrctl = 0x2170,  /* Packet Split Receive Control */
  93        Rdbal = 0x2800,   /* Rdesc Base Address Low Queue 0 */
  94        Rdbah = 0x2804,   /* Rdesc Base Address High Queue 0 */
  95        Rdlen = 0x2808,   /* Descriptor Length Queue 0 */
  96        Srrctl = 0x280c,  /* split and replication rx control (82575) */
  97        Rdh = 0x2810,     /* Descriptor Head Queue 0 */
  98        Rdt = 0x2818,     /* Descriptor Tail Queue 0 */
  99        Rdtr = 0x2820,    /* Descriptor Timer Ring */
 100        Rxdctl = 0x2828,  /* Descriptor Control */
 101        Radv = 0x282C,    /* Interrupt Absolute Delay Timer */
 102        Rdbal1 = 0x2900,  /* Rdesc Base Address Low Queue 1 */
 103        Rdbah1 = 0x2804,  /* Rdesc Base Address High Queue 1 */
 104        Rdlen1 = 0x2908,  /* Descriptor Length Queue 1 */
 105        Rdh1 = 0x2910,    /* Descriptor Head Queue 1 */
 106        Rdt1 = 0x2918,    /* Descriptor Tail Queue 1 */
 107        Rxdctl1 = 0x2928, /* Descriptor Control Queue 1 */
 108        Rsrpd = 0x2c00,   /* Small Packet Detect */
 109        Raid = 0x2c08,    /* ACK interrupt delay */
 110        Cpuvec = 0x2c10,  /* CPU Vector */
 111        Rxcsum = 0x5000,  /* Checksum Control */
 112        Rmpl = 0x5004,    /* rx maximum packet length (82575) */
 113        Rfctl = 0x5008,   /* Filter Control */
 114        Mta = 0x5200,     /* Multicast Table Array */
 115        Ral = 0x5400,     /* Receive Address Low */
 116        Rah = 0x5404,     /* Receive Address High */
 117        Vfta = 0x5600,    /* VLAN Filter Table Array */
 118        Mrqc = 0x5818,    /* Multiple Receive Queues Command */
 119        Rssim = 0x5864,   /* RSS Interrupt Mask */
 120        Rssir = 0x5868,   /* RSS Interrupt Request */
 121        Reta = 0x5c00,    /* Redirection Table */
 122        Rssrk = 0x5c80,   /* RSS Random Key */
 123
 124        /* Transmit */
 125        Tctl = 0x0400, /* Transmit Control */
 126        Tipg = 0x0410, /* Transmit IPG */
 127        Tkabgtxd =
 128            0x3004,      /* glci afe band gap transmit ref data, or something */
 129        Tdbal = 0x3800,  /* Tdesc Base Address Low */
 130        Tdbah = 0x3804,  /* Tdesc Base Address High */
 131        Tdlen = 0x3808,  /* Descriptor Length */
 132        Tdh = 0x3810,    /* Descriptor Head */
 133        Tdt = 0x3818,    /* Descriptor Tail */
 134        Tidv = 0x3820,   /* Interrupt Delay Value */
 135        Txdctl = 0x3828, /* Descriptor Control */
 136        Tadv = 0x382C,   /* Interrupt Absolute Delay Timer */
 137        Tarc0 = 0x3840,  /* Arbitration Counter Queue 0 */
 138        Tdbal1 = 0x3900, /* Descriptor Base Low Queue 1 */
 139        Tdbah1 = 0x3904, /* Descriptor Base High Queue 1 */
 140        Tdlen1 = 0x3908, /* Descriptor Length Queue 1 */
 141        Tdh1 = 0x3910,   /* Descriptor Head Queue 1 */
 142        Tdt1 = 0x3918,   /* Descriptor Tail Queue 1 */
 143        Txdctl1 = 0x3928, /* Descriptor Control 1 */
 144        Tarc1 = 0x3940,   /* Arbitration Counter Queue 1 */
 145
 146        /* Statistics */
 147        Statistics = 0x4000, /* Start of Statistics Area */
 148        Gorcl = 0x88 / 4,    /* Good Octets Received Count */
 149        Gotcl = 0x90 / 4,    /* Good Octets Transmitted Count */
 150        Torl = 0xC0 / 4,     /* Total Octets Received */
 151        Totl = 0xC8 / 4,     /* Total Octets Transmitted */
 152        Nstatistics = 0x124 / 4,
 153};
 154
 155enum {                      /* Ctrl */
 156       GIOmd = 1 << 2,      /* BIO master disable */
 157       Lrst = 1 << 3,       /* link reset */
 158       Slu = 1 << 6,        /* Set Link Up */
 159       SspeedMASK = 3 << 8, /* Speed Selection */
 160       SspeedSHIFT = 8,
 161       Sspeed10 = 0x00000000,      /* 10Mb/s */
 162       Sspeed100 = 0x00000100,     /* 100Mb/s */
 163       Sspeed1000 = 0x00000200,    /* 1000Mb/s */
 164       Frcspd = 1 << 11,           /* Force Speed */
 165       Frcdplx = 1 << 12,          /* Force Duplex */
 166       SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */
 167       SwdpinsloSHIFT = 18,
 168       SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */
 169       SwdpioloSHIFT = 22,
 170       Devrst = 1 << 26, /* Device Reset */
 171       Rfce = 1 << 27,   /* Receive Flow Control Enable */
 172       Tfce = 1 << 28,   /* Transmit Flow Control Enable */
 173       Vme = 1 << 30,    /* VLAN Mode Enable */
 174       Phyrst = 1 << 31, /* Phy Reset */
 175};
 176
 177enum {                   /* Status */
 178       Lu = 1 << 1,      /* Link Up */
 179       Lanid = 3 << 2,   /* mask for Lan ID. */
 180       Txoff = 1 << 4,   /* Transmission Paused */
 181       Tbimode = 1 << 5, /* TBI Mode Indication */
 182       Phyra = 1 << 10,  /* PHY Reset Asserted */
 183       GIOme = 1 << 19,  /* GIO Master Enable Status */
 184};
 185
 186enum {
 187        /* Eec */
 188        Nvpres = 1 << 8,   /* nvram present */
 189        Autord = 1 << 9,   /* autoread complete */
 190        Sec1val = 1 << 22, /* sector 1 valid (!sec0) */
 191};
 192
 193enum {                   /* Eerd */
 194       EEstart = 1 << 0, /* Start Read */
 195       EEdone = 1 << 1,  /* Read done */
 196};
 197
 198enum {                    /* Ctrlext */
 199       Asdchk = 1 << 12,  /* ASD Check */
 200       Eerst = 1 << 13,   /* EEPROM Reset */
 201       Spdbyps = 1 << 15, /* Speed Select Bypass */
 202};
 203
 204/*
 205 * TODO(dcross): 'Ea' is 0 elsewhere. Investigate and possibly correct.
 206 */
 207enum {               /* EEPROM content offsets */
 208       OldEa = 0x00, /* Old Ethernet address */
 209       Ea = 0x01,    /* Ethernet Address */
 210       Cf = 0x03,    /* Compatibility Field */
 211       Icw1 = 0x0A,  /* Initialization Control Word 1 */
 212       Sid = 0x0B,   /* Subsystem ID */
 213       Svid = 0x0C,  /* Subsystem Vendor ID */
 214       Did = 0x0D,   /* Device ID */
 215       Vid = 0x0E,   /* Vendor ID */
 216       Icw2 = 0x0F,  /* Initialization Control Word 2 */
 217};
 218
 219enum {                        /* Mdic */
 220       MDIdMASK = 0x0000FFFF, /* Data */
 221       MDIdSHIFT = 0,
 222       MDIrMASK = 0x001F0000, /* PHY Register Address */
 223       MDIrSHIFT = 16,
 224       MDIpMASK = 0x03E00000, /* PHY Address */
 225       MDIpSHIFT = 21,
 226       MDIwop = 0x04000000,   /* Write Operation */
 227       MDIrop = 0x08000000,   /* Read Operation */
 228       MDIready = 0x10000000, /* End of Transaction */
 229       MDIie = 0x20000000,    /* Interrupt Enable */
 230       MDIe = 0x40000000,     /* Error */
 231};
 232
 233enum {                 /* phy interface registers */
 234       Phyctl = 0,     /* phy ctl */
 235       Physsr = 17,    /* phy secondary status */
 236       Phyier = 18,    /* 82573 phy interrupt enable */
 237       Phyisr = 19,    /* 82563 phy interrupt status */
 238       Phylhr = 19,    /* 8257[12] link health */
 239       Phyier218 = 24, /* 218 (phy79?) phy interrupt enable */
 240       Phyisr218 = 25, /* 218 (phy79?) phy interrupt status */
 241       Phystat = 26,   /* 82580 (phy79?) phy status */
 242       Phypage = 31,   /* page number */
 243
 244       Rtlink = 1 << 10, /* realtime link status */
 245       Phyan = 1 << 11,  /* phy has auto-negotiated */
 246
 247       /* Phyctl bits */
 248       Ran = 1 << 9,  /* restart auto-negotiation */
 249       Ean = 1 << 12, /* enable auto-negotiation */
 250
 251       /* 82573 Phyier interrupt enable bits */
 252       Lscie = 1 << 10, /* link status changed */
 253       Ancie = 1 << 11, /* auto-negotiation complete */
 254       Spdie = 1 << 14, /* speed changed */
 255       Panie = 1 << 15, /* phy auto-negotiation error */
 256
 257       /* Phylhr/Phyisr bits */
 258       Anf = 1 << 6,  /* lhr: auto-negotiation fault */
 259       Ane = 1 << 15, /* isr: auto-negotiation error */
 260
 261       /* 82580 Phystat bits */
 262       Ans = 3 << 14, /* 82580 autoneg. status */
 263       Link = 1 << 6, /* 82580 link */
 264
 265       /* 218 Phystat bits */
 266       Anfs = 3 << 13,   /* fault status */
 267       Ans218 = 1 << 12, /* autoneg complete */
 268
 269       /* 218 Phyier218 interrupt enable bits */
 270       Spdie218 = 1 << 1, /* speed changed */
 271       Lscie218 = 1 << 2, /* link status changed */
 272       Ancie218 = 1 << 8, /* auto-negotiation changed */
 273};
 274
 275enum {                      /* Icr, Ics, Ims, Imc */
 276       Txdw = 0x00000001,   /* Transmit Descriptor Written Back */
 277       Txqe = 0x00000002,   /* Transmit Queue Empty */
 278       Lsc = 0x00000004,    /* Link Status Change */
 279       Rxseq = 0x00000008,  /* Receive Sequence Error */
 280       Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */
 281       Rxo = 0x00000040,    /* Receiver Overrun */
 282       Rxt0 = 0x00000080,   /* Receiver Timer Interrupt */
 283       Mdac = 0x00000200,   /* MDIO Access Completed */
 284       Rxcfg = 0x00000400,  /* Receiving /C/ ordered sets */
 285       Gpi0 = 0x00000800,   /* General Purpose Interrupts */
 286       Gpi1 = 0x00001000,
 287       Gpi2 = 0x00002000,
 288       Gpi3 = 0x00004000,
 289       Ack = 0x00020000, /* Receive ACK frame */
 290};
 291
 292enum {                             /* Txcw */
 293       TxcwFd = 0x00000020,        /* Full Duplex */
 294       TxcwHd = 0x00000040,        /* Half Duplex */
 295       TxcwPauseMASK = 0x00000180, /* Pause */
 296       TxcwPauseSHIFT = 7,
 297       TxcwPs = 1 << TxcwPauseSHIFT, /* Pause Supported */
 298       TxcwAs = 2 << TxcwPauseSHIFT, /* Asymmetric FC desired */
 299       TxcwRfiMASK = 0x00003000,     /* Remote Fault Indication */
 300       TxcwRfiSHIFT = 12,
 301       TxcwNpr = 0x00008000,    /* Next Page Request */
 302       TxcwConfig = 0x40000000, /* Transmit Config Control */
 303       TxcwAne = 0x80000000,    /* Auto-Negotiation Enable */
 304};
 305
 306enum {                            /* Rctl */
 307       Rrst = 0x00000001,         /* Receiver Software Reset */
 308       Ren = 0x00000002,          /* Receiver Enable */
 309       Sbp = 0x00000004,          /* Store Bad Packets */
 310       Upe = 0x00000008,          /* Unicast Promiscuous Enable */
 311       Mpe = 0x00000010,          /* Multicast Promiscuous Enable */
 312       Lpe = 0x00000020,          /* Long Packet Reception Enable */
 313       LbmMASK = 0x000000C0,      /* Loopback Mode */
 314       LbmOFF = 0x00000000,       /* No Loopback */
 315       LbmTBI = 0x00000040,       /* TBI Loopback */
 316       LbmMII = 0x00000080,       /* GMII/MII Loopback */
 317       LbmXCVR = 0x000000C0,      /* Transceiver Loopback */
 318       RdtmsMASK = 0x00000300,    /* Rdesc Minimum Threshold Size */
 319       RdtmsHALF = 0x00000000,    /* Threshold is 1/2 Rdlen */
 320       RdtmsQUARTER = 0x00000100, /* Threshold is 1/4 Rdlen */
 321       RdtmsEIGHTH = 0x00000200,  /* Threshold is 1/8 Rdlen */
 322       MoMASK = 0x00003000,       /* Multicast Offset */
 323       Bam = 0x00008000,          /* Broadcast Accept Mode */
 324       BsizeMASK = 0x00030000,    /* Receive Buffer Size */
 325       Bsize16384 = 0x00010000,   /* Bsex = 1 */
 326       Bsize8192 = 0x00020000,    /* Bsex = 1 */
 327       Bsize2048 = 0x00000000,
 328       Bsize1024 = 0x00010000,
 329       Bsize512 = 0x00020000,
 330       Bsize256 = 0x00030000,
 331       BsizeFlex = 0x08000000, /* Flexible Bsize in 1KB increments */
 332       Vfe = 0x00040000,       /* VLAN Filter Enable */
 333       Cfien = 0x00080000,     /* Canonical Form Indicator Enable */
 334       Cfi = 0x00100000,       /* Canonical Form Indicator value */
 335       Dpf = 0x00400000,       /* Discard Pause Frames */
 336       Pmcf = 0x00800000,      /* Pass MAC Control Frames */
 337       Bsex = 0x02000000,      /* Buffer Size Extension */
 338       Secrc = 0x04000000,     /* Strip CRC from incoming packet */
 339};
 340
 341enum { /* Srrctl */
 342       Dropen = 1 << 31,
 343};
 344
 345enum {                      /* Tctl */
 346       Trst = 0x00000001,   /* Transmitter Software Reset */
 347       Ten = 0x00000002,    /* Transmit Enable */
 348       Psp = 0x00000008,    /* Pad Short Packets */
 349       Mulr = 0x10000000,   /* Allow multiple concurrent requests */
 350       Ctmask = 0x00000FF0, /* Collision Threshold */
 351       Ctshift = 4,
 352       ColdMASK = 0x003FF000, /* Collision Distance */
 353       ColdSHIFT = 12,
 354       Swxoff = 0x00400000, /* Sofware XOFF Transmission */
 355       Pbe = 0x00800000,    /* Packet Burst Enable */
 356       Rtlc = 0x01000000,   /* Re-transmit on Late Collision */
 357       Nrtu = 0x02000000,   /* No Re-transmit on Underrrun */
 358};
 359
 360enum {                           /* [RT]xdctl */
 361       PthreshMASK = 0x0000003F, /* Prefetch Threshold */
 362       PthreshSHIFT = 0,
 363       HthreshMASK = 0x00003F00, /* Host Threshold */
 364       HthreshSHIFT = 8,
 365       WthreshMASK = 0x003F0000, /* Writeback Threshold */
 366       WthreshSHIFT = 16,
 367       Gran = 0x01000000,    /* Granularity (descriptors, not cls) */
 368       Qenable = 0x02000000, /* Queue Enable (82575) */
 369};
 370
 371enum {                    /* Rxcsum */
 372       PcssMASK = 0x00FF, /* Packet Checksum Start */
 373       PcssSHIFT = 0,
 374       Ipofl = 0x0100, /* IP Checksum Off-load Enable */
 375       Tuofl = 0x0200, /* TCP/UDP Checksum Off-load Enable */
 376};
 377
 378enum {                     /* Receive Delay Timer Ring */
 379       DelayMASK = 0xFFFF, /* delay timer in 1.024nS increments */
 380       DelaySHIFT = 0,
 381       Fpd = 0x80000000, /* Flush partial Descriptor Block */
 382};
 383
 384struct rd { /* Receive Descriptor */
 385        uint32_t addr[2];
 386        uint16_t length;
 387        uint16_t checksum;
 388        uint8_t status;
 389        uint8_t errors;
 390        uint16_t special;
 391};
 392
 393enum {               /* Rd status */
 394       Rdd = 0x01,   /* Descriptor Done */
 395       Reop = 0x02,  /* End of Packet */
 396       Ixsm = 0x04,  /* Ignore Checksum Indication */
 397       Vp = 0x08,    /* Packet is 802.1Q (matched VET) */
 398       Tcpcs = 0x20, /* TCP Checksum Calculated on Packet */
 399       Ipcs = 0x40,  /* IP Checksum Calculated on Packet */
 400       Pif = 0x80,   /* Passed in-exact filter */
 401};
 402
 403enum {              /* Rd errors */
 404       Ce = 0x01,   /* CRC Error or Alignment Error */
 405       Se = 0x02,   /* Symbol Error */
 406       Seq = 0x04,  /* Sequence Error */
 407       Cxe = 0x10,  /* Carrier Extension Error */
 408       Tcpe = 0x20, /* TCP/UDP Checksum Error */
 409       Ipe = 0x40,  /* IP Checksum Error */
 410       Rxe = 0x80,  /* RX Data Error */
 411};
 412
 413struct td {               /* Transmit Descriptor */
 414        uint32_t addr[2]; /* Data */
 415        uint32_t control;
 416        uint32_t status;
 417};
 418
 419enum {                       /* Tdesc control */
 420       LenMASK = 0x000FFFFF, /* Data/Packet Length Field */
 421       LenSHIFT = 0,
 422       DtypeCD = 0x00000000,  /* Data Type 'Context Descriptor' */
 423       DtypeDD = 0x00100000,  /* Data Type 'Data Descriptor' */
 424       PtypeTCP = 0x01000000, /* TCP/UDP Packet Type (CD) */
 425       Teop = 0x01000000,     /* End of Packet (DD) */
 426       PtypeIP = 0x02000000,  /* IP Packet Type (CD) */
 427       Ifcs = 0x02000000,     /* Insert FCS (DD) */
 428       Tse = 0x04000000,      /* TCP Segmentation Enable */
 429       Rs = 0x08000000,       /* Report Status */
 430       Rps = 0x10000000,      /* Report Status Sent */
 431       Dext = 0x20000000,     /* Descriptor Extension */
 432       Vle = 0x40000000,      /* VLAN Packet Enable */
 433       Ide = 0x80000000,      /* Interrupt Delay Enable */
 434};
 435
 436enum {                   /* Tdesc status */
 437       Tdd = 0x0001,     /* Descriptor Done */
 438       Ec = 0x0002,      /* Excess Collisions */
 439       Lc = 0x0004,      /* Late Collision */
 440       Tu = 0x0008,      /* Transmit Underrun */
 441       CssMASK = 0xFF00, /* Checksum Start Field */
 442       CssSHIFT = 8,
 443};
 444
 445struct flash {
 446        uint16_t *reg;
 447        uint32_t *reg32;
 448        uint16_t base;
 449        uint16_t lim;
 450};
 451
 452enum {
 453        /* 16 and 32-bit flash registers for ich flash parts */
 454        Bfpr = 0x00 / 4,  /* flash base 0:12; lim 16:28 */
 455        Fsts = 0x04 / 2,  /* flash status;  Hsfsts */
 456        Fctl = 0x06 / 2,  /* flash control; Hsfctl */
 457        Faddr = 0x08 / 4, /* flash address to r/w */
 458        Fdata = 0x10 / 4, /* data @ address */
 459
 460        /* status register */
 461        Fdone = 1 << 0,   /* flash cycle done */
 462        Fcerr = 1 << 1,   /* cycle error; write 1 to clear */
 463        Ael = 1 << 2,     /* direct access error log; 1 to clear */
 464        Scip = 1 << 5,    /* spi cycle in progress */
 465        Fvalid = 1 << 14, /* flash descriptor valid */
 466
 467        /* control register */
 468        Fgo = 1 << 0,     /* start cycle */
 469        Flcycle = 1 << 1, /* two bits: r=0; w=2 */
 470        Fdbc = 1 << 8,    /* bytes to read; 5 bits */
 471};
 472
 473/*
 474 * the kumeran interface is mac-to-phy for external gigabit ethernet on
 475 * intel's esb2 ich8 (io controller hub), it carries mii bits.  can be used
 476 * to reset the phy.  intel proprietary, see "kumeran specification".
 477 */
 478enum { I217inbandctlpage = 770, /* phy page */
 479       I217inbandctlreg = 18,   /* phy register */
 480       I217inbandctllnkststxtmoutmask = 0x3F00,
 481       I217inbandctllnkststxtmoutshift = 8,
 482
 483       Fextnvm6reqpllclk = 0x100,
 484       Fextnvm6enak1entrycond = 0x200, /* extend K1 entry latency */
 485
 486       Nvmk1cfg = 0x1B,   /* NVM K1 Config Word */
 487       Nvmk1enable = 0x1, /* NVM Enable K1 bit */
 488
 489       Kumctrlstaoff = 0x1F0000,
 490       Kumctrlstaoffshift = 16,
 491       Kumctrlstaren = 0x200000,
 492       Kumctrlstak1cfg = 0x7,
 493       Kumctrlstak1enable = 0x2,
 494};
 495
 496enum {
 497        /*
 498         * these were 512, 1024 & 64, but 52, 253 & 9 are usually ample;
 499         * however cpu servers and terminals can need more receive buffers
 500         * due to bursts of traffic.
 501         *
 502         * Tdlen and Rdlen have to be multiples of 128.  Rd and Td are both
 503         * 16 bytes long, so Nrd and Ntd must be multiples of 8.
 504         */
 505        Ntd = 32,  /* power of two >= 8 */
 506        Nrd = 128, /* power of two >= 8 */
 507        Rbalign = 16,
 508        Slop = 32, /* for vlan headers, crcs, etc. */
 509};
 510
 511enum { Iany = -1,
 512       i82563,
 513       i82566,
 514       i82567,
 515       i82567m,
 516       i82571,
 517       i82572,
 518       i82573,
 519       i82574,
 520       i82575,
 521       i82576,
 522       i82577,
 523       i82577m,
 524       i82578,
 525       i82578m,
 526       i82579,
 527       i82580,
 528       i82583,
 529       i210,
 530       i217,
 531       i218,
 532       i350,
 533       Nctlrtype,
 534};
 535
 536enum { Fload = 1 << 0,
 537       Fert = 1 << 1,
 538       F75 = 1 << 2,
 539       Fpba = 1 << 3,
 540       Fflashea = 1 << 4,
 541       F79phy = 1 << 5,
 542       Fnofct = 1 << 6,
 543};
 544
 545struct ctlrtype {
 546        int type;
 547        int mtu;
 548        int phyno;
 549        char *name;
 550        int flag;
 551};
 552
 553static struct ctlrtype ctlrtab[Nctlrtype] = {
 554    {i82563, 9014, 1, "i82563", Fpba},
 555    {i82566, 1514, 1, "i82566", Fload},
 556    {i82567, 9234, 1, "i82567", Fload},
 557    {i82567m, 1514, 1, "i82567m", 0},
 558    {i82571, 9234, 1, "i82571", Fpba},
 559    {i82572, 9234, 1, "i82572", Fpba},
 560    {i82573, 8192, 1, "i82573", Fert}, /* terrible perf above 8k */
 561    {i82574, 9018, 1, "i82574", 0},
 562    {i82575, 9728, 1, "i82575", F75 | Fflashea},
 563    {i82576, 9728, 1, "i82576", F75},
 564    {i82577, 4096, 2, "i82577", Fload | Fert},
 565    {i82577m, 1514, 2, "i82577", Fload | Fert},
 566    {i82578, 4096, 2, "i82578", Fload | Fert},
 567    {i82578m, 1514, 2, "i82578", Fload | Fert},
 568    {i82579, 9018, 2, "i82579", Fload | Fert | F79phy | Fnofct},
 569    {i82580, 9728, 1, "i82580", F75 | F79phy},
 570    {i82583, 1514, 1, "i82583", 0},
 571    {i210, 9728, 1, "i210", F75 | Fnofct | Fert},
 572    {i217, 9728, 1, "i217", F79phy | Fnofct | Fload | Fert},
 573    {i350, 9728, 1, "i350", F75 | F79phy | Fnofct},
 574};
 575
 576struct ctlr {
 577        struct pci_device *pcidev;
 578        struct ctlr *next;
 579        struct ether *edev;
 580        int active;
 581        int type;
 582        uint16_t eeprom[0x40];
 583
 584        qlock_t alock; /* attach */
 585        void *alloc;
 586        unsigned int rbsz;
 587        int attached;
 588
 589        int *nic;
 590        spinlock_t imlock;
 591        int im; /* interrupt mask */
 592
 593        struct rendez lrendez;
 594        int lim;
 595        int phynum;
 596        int didk1fix;
 597
 598        qlock_t slock;
 599        unsigned int statistics[Nstatistics];
 600        unsigned int lsleep;
 601        unsigned int lintr;
 602        unsigned int rsleep;
 603        unsigned int rintr;
 604        unsigned int txdw;
 605        unsigned int tintr;
 606        unsigned int ixsm;
 607        unsigned int ipcs;
 608        unsigned int tcpcs;
 609        unsigned int speeds[4];
 610
 611        uint8_t ra[Eaddrlen]; /* receive address */
 612        uint32_t mta[128];    /* multicast table array */
 613
 614        struct rendez rrendez;
 615        int rim;
 616        int rdfree;        /* rx descriptors awaiting packets */
 617        struct rd *rdba;   /* receive descriptor base address */
 618        struct block **rb; /* receive buffers */
 619        unsigned int rdh;  /* receive descriptor head */
 620        unsigned int rdt;  /* receive descriptor tail */
 621        int rdtr;          /* receive delay timer ring value */
 622        int radv;          /* receive interrupt absolute delay timer */
 623
 624        struct rendez trendez;
 625        qlock_t tlock;
 626        struct td *tdba;   /* transmit descriptor base address */
 627        struct block **tb; /* transmit buffers */
 628        int tdh;           /* transmit descriptor head */
 629        int tdt;           /* transmit descriptor tail */
 630
 631        int fcrtl;
 632        int fcrth;
 633
 634        unsigned int pbs; /* packet buffer size */
 635        unsigned int pba; /* packet buffer allocation */
 636};
 637
 638static inline uint32_t csr32r(struct ctlr *c, uintptr_t reg)
 639{
 640        return read_mmreg32((uintptr_t)(c->nic + (reg / 4)));
 641}
 642
 643static inline void csr32w(struct ctlr *c, uintptr_t reg, uint32_t val)
 644{
 645        write_mmreg32((uintptr_t)(c->nic + (reg / 4)), val);
 646}
 647
 648static struct ctlr *i82563ctlrhead;
 649static struct ctlr *i82563ctlrtail;
 650
 651static int speedtab[] = {10, 100, 1000, 0};
 652
 653static char *statistics[] = {
 654    "CRC Error",
 655    "Alignment Error",
 656    "Symbol Error",
 657    "RX Error",
 658    "Missed Packets",
 659    "Single Collision",
 660    "Excessive Collisions",
 661    "Multiple Collision",
 662    "Late Collisions",
 663    NULL,
 664    "Collision",
 665    "Transmit Underrun",
 666    "Defer",
 667    "Transmit - No CRS",
 668    "Sequence Error",
 669    "Carrier Extension Error",
 670    "Receive Error Length",
 671    NULL,
 672    "XON Received",
 673    "XON Transmitted",
 674    "XOFF Received",
 675    "XOFF Transmitted",
 676    "FC Received Unsupported",
 677    "Packets Received (64 Bytes)",
 678    "Packets Received (65-127 Bytes)",
 679    "Packets Received (128-255 Bytes)",
 680    "Packets Received (256-511 Bytes)",
 681    "Packets Received (512-1023 Bytes)",
 682    "Packets Received (1024-mtu Bytes)",
 683    "Good Packets Received",
 684    "Broadcast Packets Received",
 685    "Multicast Packets Received",
 686    "Good Packets Transmitted",
 687    NULL,
 688    "Good Octets Received",
 689    NULL,
 690    "Good Octets Transmitted",
 691    NULL,
 692    NULL,
 693    NULL,
 694    "Receive No Buffers",
 695    "Receive Undersize",
 696    "Receive Fragment",
 697    "Receive Oversize",
 698    "Receive Jabber",
 699    "Management Packets Rx",
 700    "Management Packets Drop",
 701    "Management Packets Tx",
 702    "Total Octets Received",
 703    NULL,
 704    "Total Octets Transmitted",
 705    NULL,
 706    "Total Packets Received",
 707    "Total Packets Transmitted",
 708    "Packets Transmitted (64 Bytes)",
 709    "Packets Transmitted (65-127 Bytes)",
 710    "Packets Transmitted (128-255 Bytes)",
 711    "Packets Transmitted (256-511 Bytes)",
 712    "Packets Transmitted (512-1023 Bytes)",
 713    "Packets Transmitted (1024-mtu Bytes)",
 714    "Multicast Packets Transmitted",
 715    "Broadcast Packets Transmitted",
 716    "TCP Segmentation Context Transmitted",
 717    "TCP Segmentation Context Fail",
 718    "Interrupt Assertion",
 719    "Interrupt Rx Pkt Timer",
 720    "Interrupt Rx Abs Timer",
 721    "Interrupt Tx Pkt Timer",
 722    "Interrupt Tx Abs Timer",
 723    "Interrupt Tx Queue Empty",
 724    "Interrupt Tx Desc Low",
 725    "Interrupt Rx Min",
 726    "Interrupt Rx Overrun",
 727};
 728
 729static char *cname(struct ctlr *c)
 730{
 731        return ctlrtab[c->type].name;
 732}
 733
 734static int i82563reset(struct ctlr *);
 735
 736static long i82563ifstat(struct ether *edev, void *a, long n, uint32_t offset)
 737{
 738        struct ctlr *ctlr;
 739        char *s, *p, *e, *stat;
 740        int i, r;
 741        uint64_t tuvl, ruvl;
 742
 743        ctlr = edev->ctlr;
 744        qlock(&ctlr->slock);
 745        p = s = kzmalloc(READSTR, 0);
 746        if (p == NULL) {
 747                qunlock(&ctlr->slock);
 748                error(ENOMEM, "kzmalloc did not panic");
 749        }
 750        e = p + READSTR;
 751
 752        for (i = 0; i < Nstatistics; i++) {
 753                r = csr32r(ctlr, Statistics + i * 4);
 754                stat = statistics[i];
 755                if (stat == NULL)
 756                        continue;
 757                switch (i) {
 758                case Gorcl:
 759                case Gotcl:
 760                case Torl:
 761                case Totl:
 762                        ruvl = r;
 763                        ruvl += (uint64_t)csr32r(ctlr, Statistics + (i + 1) * 4)
 764                                << 32;
 765                        tuvl = ruvl;
 766                        tuvl += ctlr->statistics[i];
 767                        tuvl += (uint64_t)ctlr->statistics[i + 1] << 32;
 768                        if (tuvl == 0)
 769                                continue;
 770                        ctlr->statistics[i] = tuvl;
 771                        ctlr->statistics[i + 1] = tuvl >> 32;
 772                        p = seprintf(p, e, "%s: %llud %llud\n", stat, tuvl,
 773                                     ruvl);
 774                        i++;
 775                        break;
 776
 777                default:
 778                        ctlr->statistics[i] += r;
 779                        if (ctlr->statistics[i] == 0)
 780                                continue;
 781                        p = seprintf(p, e, "%s: %ud %ud\n", stat,
 782                                     ctlr->statistics[i], r);
 783                        break;
 784                }
 785        }
 786
 787        p = seprintf(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep);
 788        p = seprintf(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep);
 789        p = seprintf(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw);
 790        p = seprintf(p, e, "ixcs: %ud %ud %ud\n", ctlr->ixsm, ctlr->ipcs,
 791                     ctlr->tcpcs);
 792        p = seprintf(p, e, "ctrl: %.8ux\n", csr32r(ctlr, Ctrl));
 793        p = seprintf(p, e, "ctrlext: %.8ux\n", csr32r(ctlr, Ctrlext));
 794        p = seprintf(p, e, "status: %.8ux\n", csr32r(ctlr, Status));
 795        p = seprintf(p, e, "txcw: %.8ux\n", csr32r(ctlr, Txcw));
 796        p = seprintf(p, e, "txdctl: %.8ux\n", csr32r(ctlr, Txdctl));
 797        p = seprintf(p, e, "pbs: %dKB\n", ctlr->pbs);
 798        p = seprintf(p, e, "pba: %#.8ux\n", ctlr->pba);
 799
 800        p = seprintf(p, e, "speeds: 10:%ud 100:%ud 1000:%ud ?:%ud\n",
 801                     ctlr->speeds[0], ctlr->speeds[1], ctlr->speeds[2],
 802                     ctlr->speeds[3]);
 803        p = seprintf(p, e, "type: %s\n", cname(ctlr));
 804
 805        //  p = seprintf(p, e, "eeprom:");
 806        //  for(i = 0; i < 0x40; i++){
 807        //      if(i && ((i & 7) == 0))
 808        //          p = seprintf(p, e, "\n       ");
 809        //      p = seprintf(p, e, " %4.4ux", ctlr->eeprom[i]);
 810        //  }
 811        //  p = seprintf(p, e, "\n");
 812
 813        n = readstr(offset, a, n, s);
 814        kfree(s);
 815        qunlock(&ctlr->slock);
 816
 817        return n;
 818}
 819
 820enum { CMrdtr,
 821       CMradv,
 822       CMpause,
 823       CMan,
 824};
 825
 826static struct cmdtab i82563ctlmsg[] = {
 827    {CMrdtr, "rdtr", 2},
 828    {CMradv, "radv", 2},
 829    {CMpause, "pause", 1},
 830    {CMan, "an", 1},
 831};
 832
 833static long i82563ctl(struct ether *edev, void *buf, size_t n)
 834{
 835        ERRSTACK(1);
 836        char *p;
 837        uint32_t v;
 838        struct ctlr *ctlr;
 839        struct cmdbuf *cb;
 840        struct cmdtab *ct;
 841
 842        ctlr = edev->ctlr;
 843        if (ctlr == NULL)
 844                error(ENODEV, "i82563ctl: NULL controller");
 845
 846        cb = parsecmd(buf, n);
 847        if (waserror()) {
 848                kfree(cb);
 849                nexterror();
 850        }
 851
 852        ct = lookupcmd(cb, i82563ctlmsg, ARRAY_SIZE(i82563ctlmsg));
 853        switch (ct->index) {
 854        case CMrdtr:
 855                v = strtoul(cb->f[1], &p, 0);
 856                if (*p || v > 0xffff)
 857                        error(EINVAL, ERROR_FIXME);
 858                ctlr->rdtr = v;
 859                csr32w(ctlr, Rdtr, v);
 860                break;
 861        case CMradv:
 862                v = strtoul(cb->f[1], &p, 0);
 863                if (*p || v > 0xffff)
 864                        error(EINVAL, ERROR_FIXME);
 865                ctlr->radv = v;
 866                csr32w(ctlr, Radv, v);
 867                break;
 868        case CMpause:
 869                csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) ^ (Rfce | Tfce));
 870                break;
 871        case CMan:
 872                csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) | Lrst | Phyrst);
 873                break;
 874        }
 875        kfree(cb);
 876        poperror();
 877
 878        return n;
 879}
 880
 881static void i82563promiscuous(void *arg, int on)
 882{
 883        int rctl;
 884        struct ctlr *ctlr;
 885        struct ether *edev;
 886
 887        edev = arg;
 888        ctlr = edev->ctlr;
 889
 890        rctl = csr32r(ctlr, Rctl) & ~MoMASK;
 891        if (on)
 892                rctl |= Upe | Mpe;
 893        else
 894                rctl &= ~(Upe | Mpe);
 895        csr32w(ctlr, Rctl, rctl);
 896}
 897
 898/*
 899 * Returns the number of bits of mac address used in multicast hash,
 900 * thus the number of longs of ctlr->mta (2^(bits-5)).
 901 * This must be right for multicast (thus ipv6) to work reliably.
 902 *
 903 * The default multicast hash for mta is based on 12 bits of MAC address;
 904 * the rightmost bit is a function of Rctl's Multicast Offset: 0=>36,
 905 * 1=>35, 2=>34, 3=>32.  Exceptions include the 578, 579, 217, 218, 219;
 906 * they use only 10 bits, ignoring the rightmost 2 of the 12.
 907 */
 908static int mcastbits(struct ctlr *ctlr)
 909{
 910        switch (ctlr->type) {
 911        /*
 912         * openbsd says all `ich8' versions (ich8, ich9, ich10, pch,
 913         * pch2 and pch_lpt) have 32 longs (use 10 bits of mac address
 914         * for hash).
 915         */
 916        case i82566:
 917        case i82567:
 918                //      case i82578:
 919        case i82579:
 920        case i217:
 921        case i218:
 922                //      case i219:
 923                return 10; /* 32 longs */
 924        case i82563:
 925        case i82571:
 926        case i82572:
 927        case i82573:
 928        case i82574:
 929                //      case i82575:
 930                //      case i82583:
 931        case i210:         /* includes i211 */
 932                return 12; /* 128 longs */
 933        default:
 934                printk("82563: unsure of multicast bits in mac addresses; "
 935                       "enabling promiscuous multicast reception\n");
 936                csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Mpe);
 937                return 10; /* be conservative (for mta size) */
 938        }
 939}
 940
 941static int mcbitstolongs(int nmcbits)
 942{
 943        return 1 << (nmcbits - 5); /* 2^5 = 32 */
 944}
 945
 946static void i82563multicast(void *arg, uint8_t *addr, int on)
 947{
 948        uint32_t nbits, tblsz, hash, word, bit;
 949        struct ctlr *ctlr;
 950        struct ether *edev;
 951
 952        edev = arg;
 953        ctlr = edev->ctlr;
 954
 955        nbits = mcastbits(ctlr);
 956        tblsz = mcbitstolongs(nbits);
 957        /* assume multicast offset in Rctl is 0 (we clear it above) */
 958        hash = addr[5] << 4 | addr[4] >> 4; /* bits 47:36 of mac */
 959        if (nbits == 10)
 960                hash >>= 2; /* discard 37:36 of mac */
 961        word = (hash / 32) & (tblsz - 1);
 962        bit = 1UL << (hash % 32);
 963        /*
 964         * multiple ether addresses can hash to the same filter bit,
 965         * so it's never safe to clear a filter bit.
 966         * if we want to clear filter bits, we need to keep track of
 967         * all the multicast addresses in use, clear all the filter bits,
 968         * then set the ones corresponding to in-use addresses.
 969         */
 970        if (on)
 971                ctlr->mta[word] |= bit;
 972        //      else
 973        //              ctlr->mta[word] &= ~bit;
 974        csr32w(ctlr, Mta + word * 4, ctlr->mta[word]);
 975}
 976
 977static void i82563im(struct ctlr *ctlr, int im)
 978{
 979        spin_lock_irqsave(&ctlr->imlock);
 980        ctlr->im |= im;
 981        csr32w(ctlr, Ims, ctlr->im);
 982        spin_unlock_irqsave(&ctlr->imlock);
 983}
 984
 985static void i82563txinit(struct ctlr *ctlr)
 986{
 987        int i, r, tctl;
 988        struct block *bp;
 989
 990        /*
 991         * TODO(dcross): Figure out how to integrate this table driven
 992         * code into the stanza below.
 993         */
 994        tctl = 0x0F << Ctshift | Psp;
 995        if (0) {
 996                if ((ctlrtab[ctlr->type].flag & F75) == 0)
 997                        tctl |= (66 << ColdSHIFT | Mulr);
 998        }
 999        switch (ctlr->type) {
1000        case i210:
1001                break;
1002        default:
1003                tctl |= Mulr;
1004                /* fall through */
1005        case i217:
1006        case i218:
1007                tctl |= 66 << ColdSHIFT;
1008                break;
1009        }
1010        csr32w(ctlr, Tctl, tctl);
1011        csr32w(ctlr, Tipg, 6 << 20 | 8 << 10 | 8); /* yb sez: 0x702008 */
1012        for (i = 0; i < Ntd; i++) {
1013                bp = ctlr->tb[i];
1014                if (bp != NULL) {
1015                        ctlr->tb[i] = NULL;
1016                        freeb(bp);
1017                }
1018        }
1019        memset(ctlr->tdba, 0, Ntd * sizeof(struct td));
1020        csr32w(ctlr, Tdbal, paddr_low32(ctlr->tdba));
1021        csr32w(ctlr, Tdbah, paddr_high32(ctlr->tdba));
1022        csr32w(ctlr, Tdlen, Ntd * sizeof(struct td));
1023        ctlr->tdh = PREV_RING(0, Ntd);
1024        csr32w(ctlr, Tdh, 0);
1025        ctlr->tdt = 0;
1026        csr32w(ctlr, Tdt, 0);
1027        csr32w(ctlr, Tidv, 0); /* don't coalesce interrupts */
1028        csr32w(ctlr, Tadv, 0);
1029        r = csr32r(ctlr, Txdctl) & ~(WthreshMASK | PthreshMASK);
1030        r |= 4 << WthreshSHIFT | 4 << PthreshSHIFT;
1031        if (ctlrtab[ctlr->type].flag & F75)
1032                r |= Qenable;
1033        csr32w(ctlr, Txdctl, r);
1034        csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) | Ten);
1035}
1036
1037static int i82563cleanup(struct ctlr *ctlr)
1038{
1039        struct block *bp;
1040        int tdh, n;
1041
1042        tdh = ctlr->tdh;
1043        while (ctlr->tdba[n = NEXT_RING(tdh, Ntd)].status & Tdd) {
1044                tdh = n;
1045                bp = ctlr->tb[tdh];
1046                if (bp != NULL) {
1047                        ctlr->tb[tdh] = NULL;
1048                        freeb(bp);
1049                } else
1050                        iprint("82563 tx underrun!\n");
1051                ctlr->tdba[tdh].status = 0;
1052        }
1053        return ctlr->tdh = tdh;
1054}
1055
1056static void i82563transmit(struct ether *edev)
1057{
1058        struct td *td;
1059        struct block *bp;
1060        struct ctlr *ctlr;
1061        int tdh, tdt;
1062
1063        ctlr = edev->ctlr;
1064        qlock(&ctlr->tlock);
1065
1066        /*
1067         * Free any completed packets
1068         */
1069        tdh = i82563cleanup(ctlr);
1070
1071        /* if link down on 218, don't try since we need k1fix to run first */
1072        if (!edev->link && ctlr->type == i218 && !ctlr->didk1fix) {
1073                qunlock(&ctlr->tlock);
1074                return;
1075        }
1076
1077        /*
1078         * Try to fill the ring back up.
1079         */
1080        tdt = ctlr->tdt;
1081        for (;;) {
1082                if (NEXT_RING(tdt, Ntd) == tdh) { /* ring full? */
1083                        ctlr->txdw++;
1084                        i82563im(ctlr, Txdw);
1085                        break;
1086                }
1087                bp = qget(edev->oq);
1088                if (bp == NULL)
1089                        break;
1090                td = &ctlr->tdba[tdt];
1091                td->addr[0] = paddr_low32(bp->rp);
1092                td->addr[1] = paddr_high32(bp->rp);
1093                td->control = Ide | Rs | Ifcs | Teop | BLEN(bp);
1094                ctlr->tb[tdt] = bp;
1095                tdt = NEXT_RING(tdt, Ntd);
1096        }
1097        if (ctlr->tdt != tdt) {
1098                ctlr->tdt = tdt;
1099                wmb_f();
1100                csr32w(ctlr, Tdt, tdt);
1101        }
1102        /* else may not be any new ones, but could be some still in flight */
1103        qunlock(&ctlr->tlock);
1104}
1105
1106static void i82563replenish(struct ctlr *ctlr)
1107{
1108        struct rd *rd;
1109        int rdt;
1110        struct block *bp;
1111
1112        rdt = ctlr->rdt;
1113        while (NEXT_RING(rdt, Nrd) != ctlr->rdh) {
1114                rd = &ctlr->rdba[rdt];
1115                if (ctlr->rb[rdt] != NULL) {
1116                        printd("#l%d: 82563: rx overrun\n", ctlr->edev->ctlrno);
1117                        break;
1118                }
1119                bp = block_alloc(ctlr->rbsz + Slop + Rbalign, MEM_ATOMIC);
1120                if (bp == NULL) {
1121                        warn_once("OOM, trying to survive");
1122                        break;
1123                }
1124                ctlr->rb[rdt] = bp;
1125                rd->addr[0] = paddr_low32(bp->rp);
1126                rd->addr[1] = paddr_high32(bp->rp);
1127                rd->status = 0;
1128                ctlr->rdfree++;
1129                rdt = NEXT_RING(rdt, Nrd);
1130        }
1131        if (ctlr->rdt != rdt) {
1132                ctlr->rdt = rdt;
1133                wmb_f();
1134                csr32w(ctlr, Rdt, rdt);
1135        }
1136}
1137
1138static void i82563rxinit(struct ctlr *ctlr)
1139{
1140        struct block *bp;
1141        int i, r, rctl, type;
1142
1143        type = ctlr->type;
1144
1145        if (ctlr->rbsz <= 2048)
1146                csr32w(ctlr, Rctl, Dpf | Bsize2048 | Bam | RdtmsHALF);
1147        else {
1148                i = ctlr->rbsz / 1024;
1149                if (ctlr->rbsz % 1024)
1150                        i++;
1151                if (ctlrtab[ctlr->type].flag & F75) {
1152                        csr32w(ctlr, Rctl,
1153                               Lpe | Dpf | Bsize2048 | Bam | RdtmsHALF | Secrc);
1154                        if (ctlr->type != i82575)
1155                                i |= (Nrd / 2 >> 4) << 20; /* RdmsHalf */
1156                        csr32w(ctlr, Srrctl, i | Dropen);
1157                        csr32w(ctlr, Rmpl, ctlr->rbsz);
1158                        // csr32w(ctlr, Drxmxod, 0x7ff);
1159                } else
1160                        csr32w(ctlr, Rctl,
1161                               Lpe | Dpf | BsizeFlex * i | Bam | RdtmsHALF |
1162                                   Secrc);
1163        }
1164
1165        /*
1166         * TODO(dcross): Reconcile this with latest above code block.
1167         */
1168        if (0) {
1169                rctl = Dpf | Bsize2048 | Bam | RdtmsHALF;
1170                if (type == i82575 || type == i82576 || type == i210) {
1171                        /*
1172                         * Setting Qenable in Rxdctl does not
1173                         * appear to stick unless Ren is on.
1174                         */
1175                        csr32w(ctlr, Rctl, Ren | rctl);
1176                        csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Qenable);
1177                }
1178                csr32w(ctlr, Rctl, rctl);
1179        }
1180
1181        if (ctlrtab[ctlr->type].flag & Fert)
1182                csr32w(ctlr, Ert, 1024 / 8); /* early rx threshold */
1183
1184        csr32w(ctlr, Rdbal, paddr_low32(ctlr->rdba));
1185        csr32w(ctlr, Rdbah, paddr_high32(ctlr->rdba));
1186        csr32w(ctlr, Rdlen, Nrd * sizeof(struct rd));
1187        ctlr->rdh = ctlr->rdt = 0;
1188        csr32w(ctlr, Rdh, 0);
1189        csr32w(ctlr, Rdt, 0);
1190
1191        /* to hell with interrupt moderation, we want low latency */
1192        csr32w(ctlr, Rdtr, 0);
1193        csr32w(ctlr, Radv, 0);
1194
1195        for (i = 0; i < Nrd; i++) {
1196                bp = ctlr->rb[i];
1197                if (bp != NULL) {
1198                        ctlr->rb[i] = NULL;
1199                        freeb(bp);
1200                }
1201        }
1202        i82563replenish(ctlr);
1203
1204        if (type == i82575 || type == i82576 || type == i210) {
1205                /*
1206                 * See comment above for Qenable.
1207                 * Could shuffle the code?
1208                 */
1209                r = csr32r(ctlr, Rxdctl) & ~(WthreshMASK | PthreshMASK);
1210                csr32w(ctlr, Rxdctl, r | 2 << WthreshSHIFT | 2 << PthreshSHIFT);
1211        }
1212
1213        /*
1214         * Don't enable checksum offload.  In practice, it interferes with
1215         * tftp booting on at least the 82575.
1216         */
1217        csr32w(ctlr, Rxcsum, 0);
1218}
1219
1220static int i82563rim(void *ctlr)
1221{
1222        return ((struct ctlr *)ctlr)->rim != 0;
1223}
1224
1225/*
1226 * With no errors and the Ixsm bit set,
1227 * the descriptor status Tpcs and Ipcs bits give
1228 * an indication of whether the checksums were
1229 * calculated and valid.
1230 *
1231 * Must be called with rd->errors == 0.
1232 */
1233static void ckcksums(struct ctlr *ctlr, struct rd *rd, struct block *bp)
1234{
1235        if (0) {
1236                if (rd->status & Ixsm)
1237                        return;
1238                ctlr->ixsm++;
1239                if (rd->status & Ipcs) {
1240                        /*
1241                         * IP checksum calculated (and valid as errors == 0).
1242                         */
1243                        ctlr->ipcs++;
1244                        bp->flag |= Bipck;
1245                }
1246                if (rd->status & Tcpcs) {
1247                        /*
1248                         * TCP/UDP checksum calculated (and valid as errors ==
1249                         * 0).
1250                         */
1251                        ctlr->tcpcs++;
1252                        bp->flag |= Btcpck | Budpck;
1253                }
1254                bp->flag |= Bpktck;
1255        }
1256}
1257
1258static void i82563rproc(void *arg)
1259{
1260        struct rd *rd;
1261        struct block *bp;
1262        struct ctlr *ctlr;
1263        int rdh, rim, passed;
1264        struct ether *edev;
1265
1266        edev = arg;
1267        ctlr = edev->ctlr;
1268        i82563rxinit(ctlr);
1269        csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
1270
1271        /*
1272         * TODO(dcross): Work references to ctlrtab into this code.
1273         */
1274        if (ctlr->type == i210)
1275                csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Qenable);
1276
1277        for (;;) {
1278                i82563replenish(ctlr);
1279                i82563im(ctlr, Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1280                ctlr->rsleep++;
1281                rendez_sleep(&ctlr->rrendez, i82563rim, ctlr);
1282
1283                rdh = ctlr->rdh;
1284                passed = 0;
1285                for (;;) {
1286                        rim = ctlr->rim;
1287                        ctlr->rim = 0;
1288                        rd = &ctlr->rdba[rdh];
1289                        if (!(rd->status & Rdd))
1290                                break;
1291
1292                        /*
1293                         * Accept eop packets with no errors.
1294                         */
1295                        bp = ctlr->rb[rdh];
1296                        if ((rd->status & Reop) && rd->errors == 0) {
1297                                bp->wp += rd->length;
1298                                bp->lim = bp->wp; /* lie like a dog. */
1299                                if (0)
1300                                        ckcksums(ctlr, rd, bp);
1301                                etheriq(edev, bp, 1); /* pass pkt upstream */
1302                                passed++;
1303                        } else {
1304                                if (rd->status & Reop && rd->errors)
1305                                        printd("%s: input packet error %#ux\n",
1306                                               tname[ctlr->type], rd->errors);
1307                                freeb(bp);
1308                        }
1309                        ctlr->rb[rdh] = NULL;
1310
1311                        /* rd needs to be replenished to accept another pkt */
1312                        rd->status = 0;
1313                        ctlr->rdfree--;
1314                        ctlr->rdh = rdh = NEXT_RING(rdh, Nrd);
1315                        /*
1316                         * if number of rds ready for packets is too low,
1317                         * set up the unready ones.
1318                         */
1319                        if (ctlr->rdfree <= Nrd - 32 || (rim & Rxdmt0))
1320                                i82563replenish(ctlr);
1321                }
1322        }
1323}
1324
1325static int i82563lim(void *ctlr)
1326{
1327        return ((struct ctlr *)ctlr)->lim != 0;
1328}
1329
1330static int phynum(struct ctlr *ctlr)
1331{
1332        if (ctlr->phynum < 0)
1333                switch (ctlr->type) {
1334                case i82577:
1335                        //      case i82578:            /* not yet implemented
1336                        //      */
1337                case i82579:
1338                case i217:
1339                case i218:
1340                        ctlr->phynum = 2; /* pcie phy */
1341                        break;
1342                default:
1343                        ctlr->phynum = 1; /* gbe phy */
1344                        break;
1345                }
1346        return ctlr->phynum;
1347}
1348
1349static unsigned int phyread(struct ctlr *ctlr, int reg)
1350{
1351        unsigned int phy, i;
1352
1353        if (reg >= 32)
1354                iprint("phyread: reg %d >= 32\n", reg);
1355        csr32w(ctlr, Mdic,
1356               MDIrop | phynum(ctlr) << MDIpSHIFT | reg << MDIrSHIFT);
1357        phy = 0;
1358        for (i = 0; i < 64; i++) {
1359                phy = csr32r(ctlr, Mdic);
1360                if (phy & (MDIe | MDIready))
1361                        break;
1362                udelay(1);
1363        }
1364        if ((phy & (MDIe | MDIready)) != MDIready)
1365                return ~0;
1366        return phy & 0xffff;
1367}
1368
1369static unsigned int phywrite(struct ctlr *ctlr, int reg, uint16_t val)
1370{
1371        unsigned int phy, i;
1372
1373        if (reg >= 32)
1374                iprint("phyread: reg %d >= 32\n", reg);
1375        csr32w(ctlr, Mdic,
1376               MDIwop | phynum(ctlr) << MDIpSHIFT | reg << MDIrSHIFT | val);
1377        phy = 0;
1378        for (i = 0; i < 64; i++) {
1379                phy = csr32r(ctlr, Mdic);
1380                if (phy & (MDIe | MDIready))
1381                        break;
1382                udelay(1);
1383        }
1384        if ((phy & (MDIe | MDIready)) != MDIready)
1385                return ~0;
1386        return 0;
1387}
1388
1389static uint32_t kmrnread(struct ctlr *ctlr, uint32_t reg_addr)
1390{
1391        /* write register address */
1392        csr32w(ctlr, Kumctrlsta,
1393               ((reg_addr << Kumctrlstaoffshift) & Kumctrlstaoff) |
1394                   Kumctrlstaren);
1395        udelay(2);
1396        /* read data */
1397        return csr32r(ctlr, Kumctrlsta);
1398}
1399
1400static void kmrnwrite(struct ctlr *ctlr, uint32_t reg_addr, uint16_t data)
1401{
1402        csr32w(ctlr, Kumctrlsta,
1403               ((reg_addr << Kumctrlstaoffshift) & Kumctrlstaoff) | data);
1404        udelay(2);
1405}
1406
1407/*
1408 * this is essentially black magic.  we blindly follow the incantations
1409 * prescribed by the god Intel:
1410 *
1411 * On ESB2, the MAC-to-PHY (Kumeran) interface must be configured after
1412 * link is up before any traffic is sent.
1413 *
1414 * workaround DMA unit hang on I218
1415 *
1416 * At 1Gbps link speed, one of the MAC's internal clocks can be stopped
1417 * for up to 4us when entering K1 (a power mode of the MAC-PHY
1418 * interconnect).  If the MAC is waiting for completion indications for 2
1419 * DMA write requests into Host memory (e.g.  descriptor writeback or Rx
1420 * packet writing) and the indications occur while the clock is stopped,
1421 * both indications will be missed by the MAC, causing the MAC to wait
1422 * for the completion indications and be unable to generate further DMA
1423 * write requests.  This results in an apparent hardware hang.
1424 *
1425 * Work-around the bug by disabling the de-assertion of the clock request
1426 * when 1Gbps link is acquired (K1 must be disabled while doing this).
1427 * Also, set appropriate Tx re-transmission timeouts for 10 and 100-half
1428 * link speeds to avoid Tx hangs.
1429 */
1430static void k1fix(struct ctlr *ctlr)
1431{
1432        int txtmout; /* units of 10Ás */
1433        uint32_t fextnvm6, status;
1434        uint16_t reg;
1435        struct ether *edev;
1436
1437        edev = ctlr->edev;
1438        fextnvm6 = csr32r(ctlr, Fextnvm6);
1439        status = csr32r(ctlr, Status);
1440        /* status speed bits are different on 217/8 than earlier ctlrs */
1441        if (edev->link && status & (Sspeed1000 >> 2)) {
1442                reg = kmrnread(ctlr, Kumctrlstak1cfg);
1443                kmrnwrite(ctlr, Kumctrlstak1cfg, reg & ~Kumctrlstak1enable);
1444                udelay(10);
1445                csr32w(ctlr, Fextnvm6, fextnvm6 | Fextnvm6reqpllclk);
1446                kmrnwrite(ctlr, Kumctrlstak1cfg, reg);
1447                ctlr->didk1fix = 1;
1448                return;
1449        }
1450        /* else uncommon cases */
1451
1452        fextnvm6 &= ~Fextnvm6reqpllclk;
1453        /*
1454         * 217 manual claims not to have Frcdplx bit in status;
1455         * 218 manual just omits the non-phy registers.
1456         */
1457        if (!edev->link || (status & (Sspeed100 >> 2 | Frcdplx)) ==
1458                               (Sspeed100 >> 2 | Frcdplx)) {
1459                csr32w(ctlr, Fextnvm6, fextnvm6);
1460                ctlr->didk1fix = 1;
1461                return;
1462        }
1463
1464        /* access other page via phy addr 1 reg 31, then access reg 16-30 */
1465        phywrite(ctlr, Phypage, I217inbandctlpage << 5);
1466        reg = phyread(ctlr, I217inbandctlreg) & ~I217inbandctllnkststxtmoutmask;
1467        if (status & (Sspeed100 >> 2)) { /* 100Mb/s half-duplex? */
1468                txtmout = 5;
1469                fextnvm6 &= ~Fextnvm6enak1entrycond;
1470        } else { /* 10Mb/s */
1471                txtmout = 50;
1472                fextnvm6 |= Fextnvm6enak1entrycond;
1473        }
1474        phywrite(ctlr, I217inbandctlreg,
1475                 reg | txtmout << I217inbandctllnkststxtmoutshift);
1476        csr32w(ctlr, Fextnvm6, fextnvm6);
1477        phywrite(ctlr, Phypage, 0 << 5); /* reset page to usual 0 */
1478        ctlr->didk1fix = 1;
1479}
1480
1481/*
1482 * watch for changes of link state
1483 */
1484static void i82563lproc(void *v)
1485{
1486        unsigned int phy, sp, a, phy79, prevlink;
1487        struct ctlr *ctlr;
1488        struct ether *edev;
1489
1490        edev = v;
1491        ctlr = edev->ctlr;
1492        phy79 = 0;
1493        switch (ctlr->type) {
1494        case i82579:
1495        case i82580:
1496        case i217:
1497        case i218:
1498        case i350:
1499                phy79 = 1;
1500                break;
1501        }
1502        /*
1503         * TODO(dcross): Extract PHY number from ctlrtab.
1504         */
1505        if (ctlr->type == i82573 && phyread(ctlr, Phyier) != ~0) {
1506                phy = phyread(ctlr, Phyier);
1507                phywrite(ctlr, Phyier, phy | Lscie | Ancie | Spdie | Panie);
1508        } else if (phy79 && phyread(ctlr, Phyier218) != ~0) {
1509                phy = phyread(ctlr, Phyier218);
1510                phywrite(ctlr, Phyier218, phy | Lscie218 | Ancie218 | Spdie218);
1511        }
1512        prevlink = 0;
1513        for (;;) {
1514                a = 0;
1515                phy = phyread(ctlr, phy79 ? Phystat : Physsr);
1516                if (phy == ~0)
1517                        goto next;
1518                if (phy79) {
1519                        sp = (phy >> 8) & 3;
1520                        // a = phy & (ctlr->type == i218? Anfs: Ans);
1521                        a = phy & Anfs;
1522                } else {
1523                        sp = (phy >> 14) & 3;
1524                        switch (ctlr->type) {
1525                        case i82563:
1526                        case i210:
1527                                a = phyread(ctlr, Phyisr) & Ane; /* a-n error */
1528                                break;
1529                        case i82571:
1530                        case i82572:
1531                        case i82575:
1532                        case i82576:
1533                                a = phyread(ctlr, Phylhr) & Anf; /* a-n fault */
1534                                sp = (sp - 1) & 3;
1535                                break;
1536                        }
1537                }
1538                if (a) /* enable & restart autoneg */ /* enable & restart
1539                                                         autoneg */
1540                        phywrite(ctlr, Phyctl,
1541                                 phyread(ctlr, Phyctl) | Ran | Ean);
1542                edev->link = (phy & (phy79 ? Link : Rtlink)) != 0;
1543                if (edev->link) {
1544                        ctlr->speeds[sp]++;
1545                        if (speedtab[sp])
1546                                edev->mbps = speedtab[sp];
1547                        if (prevlink == 0 && ctlr->type == i218)
1548                                k1fix(ctlr); /* link newly up: kludge away */
1549                        netif_carrier_on(edev);
1550                } else
1551                        ctlr->didk1fix = 0; /* force fix at next link up */
1552                prevlink = edev->link;
1553        next:
1554                ctlr->lim = 0;
1555                i82563im(ctlr, Lsc);
1556                ctlr->lsleep++;
1557                rendez_sleep(&ctlr->lrendez, i82563lim, ctlr);
1558        }
1559}
1560
1561static int return0(void *unused_void_p)
1562{
1563        return 0;
1564}
1565
1566static void i82563tproc(void *v)
1567{
1568        struct ether *edev;
1569        struct ctlr *ctlr;
1570
1571        edev = v;
1572        ctlr = edev->ctlr;
1573        for (;;) {
1574                rendez_sleep(&ctlr->trendez, return0, 0);
1575                i82563transmit(edev);
1576        }
1577}
1578
1579/*
1580 * controller is buggered; shock it back to life.
1581 */
1582static void restart(struct ctlr *ctlr)
1583{
1584        if (0) {
1585                static spinlock_t rstlock;
1586
1587                qlock(&ctlr->tlock);
1588                spin_lock_irqsave(&rstlock);
1589                iprint("#l%d: resetting...", ctlr->edev->ctlrno);
1590                i82563reset(ctlr);
1591                /* [rt]xinit reset the ring indices */
1592                i82563txinit(ctlr);
1593                i82563rxinit(ctlr);
1594                csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
1595                spin_unlock_irqsave(&rstlock);
1596                qunlock(&ctlr->tlock);
1597                iprint("reset\n");
1598        }
1599}
1600
1601static void freemem(struct ctlr *ctlr)
1602{
1603        kfree(ctlr->tb);
1604        ctlr->tb = NULL;
1605        kfree(ctlr->rb);
1606        ctlr->rb = NULL;
1607        kfree(ctlr->tdba);
1608        ctlr->tdba = NULL;
1609        kfree(ctlr->rdba);
1610        ctlr->rdba = NULL;
1611}
1612
1613static void i82563attach(struct ether *edev)
1614{
1615        ERRSTACK(2);
1616        int i;
1617        struct block *bp;
1618        struct ctlr *ctlr;
1619        char *lname, *rname, *tname;
1620
1621        ctlr = edev->ctlr;
1622        qlock(&ctlr->alock);
1623
1624        if (ctlr->attached) {
1625                qunlock(&ctlr->alock);
1626                return;
1627        }
1628
1629        if (waserror()) {
1630                freemem(ctlr);
1631                qunlock(&ctlr->alock);
1632                nexterror();
1633        }
1634
1635        ctlr->alloc = kzmalloc(
1636            Nrd * sizeof(struct rd) + Ntd * sizeof(struct td) + 255, MEM_WAIT);
1637        if (ctlr->alloc == NULL) {
1638                qunlock(&ctlr->alock);
1639                error(ENOMEM, "i82563attach: error allocating rx/tx rings");
1640        }
1641        ctlr->rdba = (struct rd *)ROUNDUP((uintptr_t)ctlr->alloc, 256);
1642        ctlr->tdba = (struct td *)(ctlr->rdba + Nrd);
1643        ctlr->rb = kzmalloc(Nrd * sizeof(struct block *), 0);
1644        ctlr->tb = kzmalloc(Ntd * sizeof(struct block *), 0);
1645        if (ctlr->rb == NULL || ctlr->tb == NULL) {
1646                qunlock(&ctlr->alock);
1647                error(ENOMEM, "i82563attach: error allocating rx/tx buffers");
1648        }
1649
1650        ctlr->edev = edev; /* point back to Ether* */
1651        ctlr->attached = 1;
1652
1653        lname = kzmalloc(KNAMELEN, MEM_WAIT);
1654        snprintf(lname, KNAMELEN, "#l%dl", edev->ctlrno);
1655        ktask(lname, i82563lproc, edev);
1656
1657        rname = kzmalloc(KNAMELEN, MEM_WAIT);
1658        snprintf(rname, KNAMELEN, "#l%dr", edev->ctlrno);
1659        ktask(rname, i82563rproc, edev);
1660
1661        tname = kzmalloc(KNAMELEN, MEM_WAIT);
1662        snprintf(tname, KNAMELEN, "#l%dt", edev->ctlrno);
1663        ktask(tname, i82563tproc, edev);
1664
1665        i82563txinit(ctlr);
1666
1667        qunlock(&ctlr->alock);
1668        poperror();
1669}
1670
1671static void i82563interrupt(struct hw_trapframe *unused_hw_trapframe, void *arg)
1672{
1673        struct ctlr *ctlr;
1674        struct ether *edev;
1675        int icr, im, i, loops;
1676
1677        edev = arg;
1678        ctlr = edev->ctlr;
1679        spin_lock_irqsave(&ctlr->imlock);
1680        csr32w(ctlr, Imc, ~0);
1681        im = ctlr->im;
1682        loops = 0;
1683        i = Nrd; /* don't livelock */
1684        for (icr = csr32r(ctlr, Icr); icr & ctlr->im && i-- > 0;
1685             icr = csr32r(ctlr, Icr)) {
1686                loops++;
1687                if (icr & Lsc) {
1688                        im &= ~Lsc;
1689                        ctlr->lim = icr & Lsc;
1690                        rendez_wakeup(&ctlr->lrendez);
1691                        ctlr->lintr++;
1692                }
1693                if (icr & (Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack)) {
1694                        ctlr->rim = icr & (Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1695                        im &= ~(Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1696                        rendez_wakeup(&ctlr->rrendez);
1697                        ctlr->rintr++;
1698                }
1699                if (icr & Txdw) {
1700                        im &= ~Txdw;
1701                        ctlr->tintr++;
1702                        rendez_wakeup(&ctlr->trendez);
1703                }
1704        }
1705        ctlr->im = im;
1706        csr32w(ctlr, Ims, im);
1707        spin_unlock_irqsave(&ctlr->imlock);
1708}
1709
1710/* assume misrouted interrupts and check all controllers */
1711static void i82575interrupt(struct hw_trapframe *unused_hw_trapframe,
1712                            void *unused_arg)
1713{
1714        struct ctlr *ctlr;
1715
1716        for (ctlr = i82563ctlrhead; ctlr != NULL && ctlr->edev != NULL;
1717             ctlr = ctlr->next)
1718                i82563interrupt(NULL, ctlr->edev);
1719}
1720
1721static int i82563detach0(struct ctlr *ctlr)
1722{
1723        int r, timeo;
1724
1725        /*
1726         * Perform a device reset to get the chip back to the
1727         * power-on state, followed by an EEPROM reset to read
1728         * the defaults for some internal registers.
1729         */
1730        csr32w(ctlr, Imc, ~0);
1731        csr32w(ctlr, Rctl, 0);
1732        csr32w(ctlr, Tctl, 0);
1733
1734        udelay(1000 * 1000);
1735
1736        /*
1737         * Balance Rx/Tx packet buffer.
1738         * No need to set PBA register unless using jumbo, defaults to 32KB
1739         * for receive. If it is changed, then have to do a MAC reset,
1740         * and need to do that at the the right time as it will wipe stuff.
1741         *
1742         * TODO(dcross): reconcile the following code with the above commentary.
1743         */
1744        if (0) {
1745                if (ctlr->rbsz > 8192 && ctlrtab[ctlr->type].flag & Fpba) {
1746                        ctlr->pba = csr32r(ctlr, Pba);
1747                        r = ctlr->pba >> 16;
1748                        r += ctlr->pba & 0xffff;
1749                        r >>= 1;
1750                        csr32w(ctlr, Pba, r);
1751                } else if (ctlr->type == i82573 && ctlr->rbsz > 1514)
1752                        csr32w(ctlr, Pba, 14);
1753        }
1754        ctlr->pba = csr32r(ctlr, Pba);
1755
1756        /* set packet buffer size if present.  no effect until soft reset. */
1757        switch (ctlr->type) {
1758        case i82566:
1759        case i82567:
1760        case i217:
1761                ctlr->pbs = 16; /* in KB */
1762                csr32w(ctlr, Pbs, ctlr->pbs);
1763                break;
1764        case i218:
1765                // after pxe or 9fat boot, pba is always 0xe0012 on i218 => 32K
1766                ctlr->pbs = (ctlr->pba >> 16) + (uint16_t)ctlr->pba;
1767                csr32w(ctlr, Pbs, ctlr->pbs);
1768                break;
1769        }
1770
1771        r = csr32r(ctlr, Ctrl);
1772        if (ctlr->type == i82566 || ctlr->type == i82567 ||
1773            ctlr->type == i82579)
1774                r |= Phyrst;
1775        csr32w(ctlr, Ctrl, Devrst | r);
1776        udelay(1000);
1777        for (timeo = 0; timeo < 1000; timeo++) {
1778                if (!(csr32r(ctlr, Ctrl) & Devrst))
1779                        break;
1780                udelay(1000);
1781        }
1782        if (csr32r(ctlr, Ctrl) & Devrst)
1783                return -1;
1784
1785        r = csr32r(ctlr, Ctrlext);
1786        csr32w(ctlr, Ctrlext, r | Eerst);
1787        udelay(1000);
1788        for (timeo = 0; timeo < 1000; timeo++) {
1789                if (!(csr32r(ctlr, Ctrlext) & Eerst))
1790                        break;
1791                udelay(1000);
1792        }
1793        if (csr32r(ctlr, Ctrlext) & Eerst)
1794                return -1;
1795
1796        csr32w(ctlr, Imc, ~0);
1797        udelay(1000);
1798        for (timeo = 0; timeo < 1000; timeo++) {
1799                if (!csr32r(ctlr, Icr))
1800                        break;
1801                udelay(1000);
1802        }
1803        if (csr32r(ctlr, Icr))
1804                return -1;
1805
1806        csr32w(ctlr, Ctrl, Slu | csr32r(ctlr, Ctrl));
1807        return 0;
1808}
1809
1810static int i82563detach(struct ctlr *ctlr)
1811{
1812        int r;
1813        static spinlock_t detlck;
1814
1815        spin_lock_irqsave(&detlck);
1816        r = i82563detach0(ctlr);
1817        spin_unlock_irqsave(&detlck);
1818        return r;
1819}
1820
1821static void i82563shutdown(struct ether *ether)
1822{
1823        i82563detach(ether->ctlr);
1824}
1825
1826static uint16_t eeread(struct ctlr *ctlr, int adr)
1827{
1828        uint32_t n;
1829
1830        csr32w(ctlr, Eerd, EEstart | adr << 2);
1831        n = 1000000;
1832        while (n > 0 && (csr32r(ctlr, Eerd) & EEdone) == 0)
1833                n--;
1834        if (n == 0)
1835                panic("i82563: eeread stuck");
1836        return csr32r(ctlr, Eerd) >> 16;
1837}
1838
1839/* load eeprom into ctlr */
1840static int eeload(struct ctlr *ctlr)
1841{
1842        uint16_t sum;
1843        int data, adr;
1844
1845        sum = 0;
1846        for (adr = 0; adr < 0x40; adr++) {
1847                data = eeread(ctlr, adr);
1848                ctlr->eeprom[adr] = data;
1849                sum += data;
1850        }
1851        return sum;
1852}
1853
1854static int fcycle(struct ctlr *unused_ctlr_p, struct flash *f)
1855{
1856        uint16_t s, i;
1857
1858        s = f->reg[Fsts];
1859        if ((s & Fvalid) == 0)
1860                return -1;
1861        f->reg[Fsts] |= Fcerr | Ael;
1862        for (i = 0; i < 10; i++) {
1863                if ((s & Scip) == 0) /* spi cycle done? */
1864                        return 0;
1865                udelay(1000);
1866                s = f->reg[Fsts];
1867        }
1868        return -1;
1869}
1870
1871static int fread(struct ctlr *ctlr, struct flash *f, int ladr)
1872{
1873        uint16_t s;
1874        uint32_t n;
1875
1876        udelay(1000);
1877        if (fcycle(ctlr, f) == -1)
1878                return -1;
1879        f->reg[Fsts] |= Fdone;
1880        f->reg32[Faddr] = ladr;
1881
1882        /* setup flash control register */
1883        s = f->reg[Fctl] & ~(0x1f << 8);
1884        s |= (2 - 1) << 8;   /* 2 bytes */
1885        s &= ~(2 * Flcycle); /* read */
1886        f->reg[Fctl] = s | Fgo;
1887
1888        n = 1000000;
1889        while (n > 0 && (f->reg[Fsts] & Fdone) == 0)
1890                n--;
1891        if (n == 0)
1892                panic("i82563: fread stuck");
1893        if (f->reg[Fsts] & (Fcerr | Ael))
1894                return -1;
1895        return f->reg32[Fdata] & 0xffff;
1896}
1897
1898/* load flash into ctlr */
1899static int fload(struct ctlr *ctlr)
1900{
1901        uint32_t data, r, adr;
1902        uint16_t sum;
1903        struct pci_device *pcidev = ctlr->pcidev;
1904        struct flash f;
1905
1906        f.reg = pci_get_mmio_bar_kva(pcidev, 1);
1907        if (f.reg == NULL)
1908                return -1;
1909        f.reg32 = (void *)f.reg;
1910        f.base = f.reg32[Bfpr] & 0x1fff;
1911        f.lim = (f.reg32[Bfpr] >> 16) & 0x1fff;
1912        if (csr32r(ctlr, Eec) & Sec1val)
1913                f.base += (f.lim + 1 - f.base) >> 1;
1914        r = f.base << 12;
1915
1916        sum = 0;
1917        for (adr = 0; adr < 0x40; adr++) {
1918                data = fread(ctlr, &f, r + adr * 2);
1919                if (data == -1)
1920                        break;
1921                ctlr->eeprom[adr] = data;
1922                sum += data;
1923        }
1924        return sum;
1925}
1926
1927static int i82563reset(struct ctlr *ctlr)
1928{
1929        int i, r, type;
1930
1931        /*
1932         * TODO(dcross): Integrate ctlrtab references into this code.
1933         */
1934        if (i82563detach(ctlr)) {
1935                iprint("82563 reset: detach failed\n");
1936                return -1;
1937        }
1938        type = ctlr->type;
1939        if (ctlr->ra[Eaddrlen - 1] != 0)
1940                goto macset;
1941        switch (type) {
1942        case i82566:
1943        case i82567:
1944        case i82577:
1945                //  case i82578:            /* not yet implemented */
1946        case i82579:
1947        case i217:
1948        case i218:
1949                r = fload(ctlr);
1950                break;
1951        default:
1952                r = eeload(ctlr);
1953                break;
1954        }
1955        if (r != 0 && r != 0xBABA) {
1956                printd("%s: bad EEPROM checksum - %#.4ux\n", tname[type], r);
1957                // return -1;
1958        }
1959
1960        /* set mac addr */
1961        for (i = 0; i < Eaddrlen / 2; i++) {
1962                ctlr->ra[2 * i] = ctlr->eeprom[Ea + i];
1963                ctlr->ra[2 * i + 1] = ctlr->eeprom[Ea + i] >> 8;
1964        }
1965        /* ea ctlr[1] = ea ctlr[0]+1 */
1966        ctlr->ra[5] += (csr32r(ctlr, Status) & Lanid) >> 2;
1967        /*
1968         * zero other mac addresses.`
1969         * AV bits should be zeroed by master reset & there may only be 11
1970         * other registers on e.g., the i217.
1971         */
1972        for (i = 1; i < 12; i++) { /* `12' used to be `16' here */
1973                csr32w(ctlr, Ral + i * 8, 0);
1974                csr32w(ctlr, Rah + i * 8, 0);
1975        }
1976        memset(ctlr->mta, 0, sizeof(ctlr->mta));
1977macset:
1978        /* low mac addr */
1979        csr32w(ctlr, Ral,
1980               ctlr->ra[3] << 24 | ctlr->ra[2] << 16 | ctlr->ra[1] << 8 |
1981                   ctlr->ra[0]);
1982        /* address valid | high mac addr */
1983        csr32w(ctlr, Rah, 0x80000000 | ctlr->ra[5] << 8 | ctlr->ra[4]);
1984
1985        /* populate multicast table */
1986        for (i = 0; i < mcbitstolongs(mcastbits(ctlr)); i++)
1987                csr32w(ctlr, Mta + i * 4, ctlr->mta[i]);
1988
1989        /*
1990         * Does autonegotiation affect this manual setting?
1991         * The correct values here should depend on the PBA value
1992         * and maximum frame length, no?
1993         */
1994        /* fixed flow control ethernet address 0x0180c2000001 */
1995        csr32w(ctlr, Fcal, 0x00C28001);
1996        csr32w(ctlr, Fcah, 0x0100);
1997        if (type != i82579 && type != i210 && type != i217 && type != i218)
1998                /* flow control type, dictated by Intel */
1999                csr32w(ctlr, Fct, 0x8808);
2000        csr32w(ctlr, Fcttv, 0x0100); /* for XOFF frame */
2001        // ctlr->fcrtl = 0x00002000;        /* rcv low water mark: 8KB */
2002        /* rcv high water mark: 16KB, < rcv buffer in PBA & RXA */
2003        // ctlr->fcrth = 0x00004000;
2004        ctlr->fcrtl = ctlr->fcrth = 0;
2005        csr32w(ctlr, Fcrtl, ctlr->fcrtl);
2006        csr32w(ctlr, Fcrth, ctlr->fcrth);
2007        return 0;
2008}
2009
2010static void i82563pci(void)
2011{
2012        int type;
2013        void *mem;
2014        struct pci_device *p;
2015        struct ctlr *ctlr;
2016
2017        p = NULL;
2018        STAILQ_FOREACH (p, &pci_devices, all_dev) {
2019                if (p->ven_id != 0x8086)
2020                        continue;
2021                switch (p->dev_id) {
2022                default:
2023                        continue;
2024                case 0x1096:
2025                case 0x10ba:
2026                case 0x1098: /* serdes; not seen */
2027                case 0x10bb: /* serdes */
2028                        type = i82563;
2029                        break;
2030                case 0x1049: /* mm */
2031                case 0x104a: /* dm */
2032                case 0x104b: /* dc */
2033                case 0x104d: /* mc */
2034                case 0x10bd: /* dm */
2035                case 0x294c: /* dc-2 */
2036                        type = i82566;
2037                        break;
2038                case 0x10de: /* lm-3 */
2039                case 0x10df: /* lf ich10 */
2040                case 0x10e5: /* lm ich9 */
2041                case 0x10f5: /* lm-2 */
2042                        type = i82567;
2043                        break;
2044                case 0x10bf: /* lf ich9m */
2045                case 0x10cb: /* v ich9m */
2046                case 0x10cd: /* lf ich10 */
2047                case 0x10ce: /* v ich10 */
2048                case 0x10cc: /* lm ich10 */
2049                        type = i82567m;
2050                        break;
2051                case 0x105e: /* eb */
2052                case 0x105f: /* eb */
2053                case 0x1060: /* eb */
2054                case 0x10a4: /* eb */
2055                case 0x10a5: /* eb  fiber */
2056                case 0x10bc: /* eb */
2057                case 0x10d9: /* eb serdes */
2058                case 0x10da: /* eb serdes “ophir” */
2059                        type = i82571;
2060                        break;
2061                case 0x107d: /* eb copper */
2062                case 0x107e: /* ei fiber */
2063                case 0x107f: /* ei */
2064                case 0x10b9: /* sic, 82572gi */
2065                        type = i82572;
2066                        break;
2067                case 0x108b: /*  v */
2068                case 0x108c: /*  e (iamt) */
2069                case 0x109a: /*  l */
2070                        type = i82573;
2071                        break;
2072                case 0x10d3: /* l */
2073                        type = i82574;
2074                        break;
2075                case 0x10a7: /* 82575eb: one of a pair of controllers */
2076                case 0x10a9: /* fiber/serdes */
2077                        type = i82575;
2078                        break;
2079                case 0x10c9: /* 82576 copper */
2080                case 0x10e6: /* 82576 fiber */
2081                case 0x10e7: /* 82576 serdes */
2082                case 0x150d: /* backplane */
2083                        type = i82576;
2084                        break;
2085                case 0x10ea: /* 82577lm */
2086                        type = i82577;
2087                        break;
2088                case 0x10eb: /* lm “calpella” */
2089                        type = i82577m;
2090                        break;
2091                case 0x1502: /* 82579lm */
2092                case 0x1503: /* 82579v */
2093                        type = i82579;
2094                        break;
2095                case 0x10f0: /* dm “king's creek” */
2096                        type = i82578m;
2097                        break;
2098                case 0x150e: /* “barton hills” */
2099                case 0x150f: /* fiber */
2100                case 0x1510: /* backplane */
2101                case 0x1511: /* sfp */
2102                case 0x1516:
2103                        type = i82580;
2104                        break;
2105                case 0x1506: /* v */
2106                        type = i82583;
2107                        break;
2108                case 0x1533: /* i210-t1 */
2109                case 0x1534: /* i210 */
2110                case 0x1536: /* i210-fiber */
2111                case 0x1537: /* i210-backplane */
2112                case 0x1538:
2113                case 0x1539: /* i211 */
2114                case 0x157b: /* i210 */
2115                case 0x157c: /* i210 */
2116                        type = i210;
2117                        break;
2118                case 0x153a: /* i217-lm */
2119                case 0x153b: /* i217-v */
2120                        type = i217;
2121                        break;
2122                case 0x15a0: /* i218-lm */
2123                case 0x15a1: /* i218-v */
2124                case 0x15a2: /* i218-lm */
2125                case 0x15a3: /* i218-v */
2126                        type = i218;
2127                        break;
2128                case 0x151f: /* “powerville” eeprom-less */
2129                case 0x1521: /* copper */
2130                case 0x1522: /* fiber */
2131                case 0x1523: /* serdes */
2132                case 0x1524: /* sgmii */
2133                        type = i350;
2134                        break;
2135                }
2136
2137                mem = pci_get_mmio_bar_kva(p, 0);
2138                if (mem == NULL) {
2139                        printd("%s: can't map bar 0!\n", tname[type]);
2140                        continue;
2141                }
2142                ctlr = kzmalloc(sizeof(struct ctlr), 0);
2143                if (ctlr == NULL)
2144                        error(ENOMEM, "i82563pci: alloc for ctlr failed");
2145                ctlr->rbsz = ctlrtab[type].mtu;
2146                ctlr->pcidev = p;
2147                ctlr->type = type;
2148                ctlr->nic = mem;
2149                ctlr->phynum = -1; /* not yet known */
2150
2151                qlock_init(&ctlr->alock);
2152                spinlock_init_irqsave(&ctlr->imlock);
2153                rendez_init(&ctlr->lrendez);
2154                qlock_init(&ctlr->slock);
2155                rendez_init(&ctlr->rrendez);
2156                rendez_init(&ctlr->trendez);
2157                qlock_init(&ctlr->tlock);
2158
2159                pci_set_bus_master(p);
2160                if (i82563reset(ctlr)) {
2161                        kfree(ctlr);
2162                        continue;
2163                }
2164
2165                if (i82563ctlrhead != NULL)
2166                        i82563ctlrtail->next = ctlr;
2167                else
2168                        i82563ctlrhead = ctlr;
2169                i82563ctlrtail = ctlr;
2170        }
2171}
2172
2173static int pnp(struct ether *edev, int type)
2174{
2175        struct ctlr *ctlr;
2176        static int done;
2177
2178        if (!done) {
2179                i82563pci();
2180                done = 1;
2181        }
2182
2183        /*
2184         * Any adapter matches if no edev->port is supplied,
2185         * otherwise the ports must match.  Using the 'NIC', which is BAR0's
2186         * unique KVA, for identification.
2187         */
2188        for (ctlr = i82563ctlrhead; ctlr != NULL; ctlr = ctlr->next) {
2189                if (ctlr->active)
2190                        continue;
2191                if (type != Iany && ctlr->type != type)
2192                        continue;
2193                if (edev->port == 0 || edev->port == (uintptr_t)ctlr->nic) {
2194                        ctlr->active = 1;
2195                        break;
2196                }
2197        }
2198        if (ctlr == NULL)
2199                return -1;
2200
2201        edev->ctlr = ctlr;
2202        strlcpy(edev->drv_name, "i82563", KNAMELEN);
2203        ctlr->edev = edev; /* point back to Ether* */
2204        edev->port = (uintptr_t)ctlr->nic;
2205        edev->irq = ctlr->pcidev->irqline;
2206        edev->tbdf = pci_to_tbdf(ctlr->pcidev);
2207        edev->mbps = 1000;
2208        edev->max_mtu = ctlr->rbsz - ETHERHDRSIZE;
2209        edev->mtu = edev->mtu;
2210        memmove(edev->ea, ctlr->ra, Eaddrlen);
2211        /* Jim or whoever have this turned on already.  We might be capable of
2212         * other features. */
2213        edev->feat = NETF_RXCSUM;
2214
2215        /*
2216         * Linkage to the generic ethernet driver.
2217         */
2218        edev->attach = i82563attach;
2219        edev->transmit = i82563transmit;
2220        edev->ifstat = i82563ifstat;
2221        edev->ctl = i82563ctl;
2222
2223        edev->arg = edev;
2224        edev->promiscuous = i82563promiscuous;
2225        edev->shutdown = i82563shutdown;
2226        edev->multicast = i82563multicast;
2227
2228        register_irq(edev->irq,
2229                     ctlr->type == i82575 ? i82575interrupt : i82563interrupt,
2230                     edev, edev->tbdf);
2231        return 0;
2232}
2233
2234static int anypnp(struct ether *e)
2235{
2236        return pnp(e, Iany);
2237}
2238
2239static int i82563pnp(struct ether *e)
2240{
2241        return pnp(e, i82563);
2242}
2243
2244static int i82566pnp(struct ether *e)
2245{
2246        return pnp(e, i82566);
2247}
2248
2249static int i82571pnp(struct ether *e)
2250{
2251        return pnp(e, i82571);
2252}
2253
2254static int i82572pnp(struct ether *e)
2255{
2256        return pnp(e, i82572);
2257}
2258
2259static int i82573pnp(struct ether *e)
2260{
2261        return pnp(e, i82573);
2262}
2263
2264static int i82575pnp(struct ether *e)
2265{
2266        return pnp(e, i82575);
2267}
2268
2269static int i82579pnp(struct ether *e)
2270{
2271        return pnp(e, i82579);
2272}
2273
2274static int i210pnp(struct ether *e)
2275{
2276        return pnp(e, i210);
2277}
2278
2279static int i217pnp(struct ether *e)
2280{
2281        return pnp(e, i217);
2282}
2283
2284static int i218pnp(struct ether *e)
2285{
2286        return pnp(e, i218);
2287}
2288
2289static void __init ether82563link(void)
2290{
2291        /* recognise lots of model numbers for debugging assistance */
2292        addethercard("i82563", i82563pnp);
2293        addethercard("i82566", i82566pnp);
2294        addethercard("i82571", i82571pnp);
2295        addethercard("i82572", i82572pnp);
2296        addethercard("i82573", i82573pnp);
2297        addethercard("i82575", i82575pnp);
2298        addethercard("i82579", i82579pnp);
2299        addethercard("i210", i210pnp);
2300        addethercard("i217", i217pnp);
2301        addethercard("i218", i218pnp);
2302        addethercard("igbepcie", anypnp);
2303}
2304init_func_3(ether82563link);
2305