akaros/kern/drivers/net/ether82563.c
<<
>>
Prefs
   1/*
   2 * Copyright 2008-2014
   3 * erik quanstrom
   4 *
   5 * This software is provided `as-is,' without any express or implied
   6 * warranty.  In no event will the author be held liable for any damages
   7 * arising from the use of this software.
   8 *
   9 * Permission is granted to anyone to use this software for any purpose,
  10 * including commercial applications, and to alter it and redistribute it
  11 * freely, subject to the following restrictions:
  12 *
  13 * 1.  The origin of this software must not be misrepresented; you must
  14 * not claim that you wrote the original software.  If you use this
  15 * software in a product, an acknowledgment in the product documentation
  16 * would be appreciated but is not required.
  17 *
  18 * 2.  Altered source versions must be plainly marked as such, and must
  19 * not be misrepresented as being the original software.
  20 *
  21 * 3.  This notice may not be removed or altered from any source
  22 * distribution.
  23 */
  24/* This code has been modified by UC Berkeley and Google to work in Akaros. */
  25/*
  26 * Intel Gigabit Ethernet PCI-Express Controllers.
  27 *      8256[367], 8257[1-79], 21[078]
  28 * Pretty basic, does not use many of the chip smarts.
  29 * The interrupt mitigation tuning for each chip variant
  30 * is probably different. The reset/initialisation
  31 * sequence needs straightened out. Doubt the PHY code
  32 * for the 82575eb is right.
  33 *
  34 * on the assumption that allowing jumbo packets makes the controller
  35 * much slower (as is true of the 82579), never allow jumbos.
  36 */
  37#include <assert.h>
  38#include <cpio.h>
  39#include <error.h>
  40#include <kmalloc.h>
  41#include <kref.h>
  42#include <net/ip.h>
  43#include <pmap.h>
  44#include <slab.h>
  45#include <smp.h>
  46#include <stdio.h>
  47#include <string.h>
  48
  49#define now() TK2MS(MACHP(0)->ticks)
  50
  51/*
  52 * these are in the order they appear in the manual, not numeric order.
  53 * It was too hard to find them in the book. Ref 21489, rev 2.6
  54 */
  55
  56enum {
  57        /* General */
  58        Ctrl = 0x0000,       /* Device Control */
  59        Status = 0x0008,     /* Device Status */
  60        Eec = 0x0010,        /* EEPROM/Flash Control/Data */
  61        Fextnvm6 = 0x0010,   /* Future Extended NVM 6 */
  62        Eerd = 0x0014,       /* EEPROM Read */
  63        Ctrlext = 0x0018,    /* Extended Device Control */
  64        Fla = 0x001c,        /* Flash Access */
  65        Mdic = 0x0020,       /* MDI Control */
  66        Seresctl = 0x0024,   /* Serdes ana */
  67        Fcal = 0x0028,       /* Flow Control Address Low */
  68        Fcah = 0x002C,       /* Flow Control Address High */
  69        Fct = 0x0030,        /* Flow Control Type */
  70        Kumctrlsta = 0x0034, /* MAC-PHY Interface */
  71        Vet = 0x0038,        /* VLAN EtherType */
  72        Fcttv = 0x0170,      /* Flow Control Transmit Timer Value */
  73        Txcw = 0x0178,       /* Transmit Configuration Word */
  74        Rxcw = 0x0180,       /* Receive Configuration Word */
  75        Ledctl = 0x0E00,     /* LED control */
  76        Pba = 0x1000,        /* Packet Buffer Allocation */
  77        Pbs = 0x1008,        /* Packet Buffer Size */
  78
  79        /* Interrupt */
  80        Icr = 0x00C0, /* Interrupt Cause Read */
  81        Itr = 0x00c4, /* Interrupt Throttling Rate */
  82        Ics = 0x00C8, /* Interrupt Cause Set */
  83        Ims = 0x00D0, /* Interrupt Mask Set/Read */
  84        Imc = 0x00D8, /* Interrupt mask Clear */
  85        Iam = 0x00E0, /* Interrupt acknowledge Auto Mask */
  86
  87        /* Receive */
  88        Rctl = 0x0100,    /* Control */
  89        Ert = 0x2008,     /* Early Receive Threshold (573[EVL], 579 only) */
  90        Fcrtl = 0x2160,   /* Flow Control RX Threshold Low */
  91        Fcrth = 0x2168,   /* Flow Control Rx Threshold High */
  92        Psrctl = 0x2170,  /* Packet Split Receive Control */
  93        Rdbal = 0x2800,   /* Rdesc Base Address Low Queue 0 */
  94        Rdbah = 0x2804,   /* Rdesc Base Address High Queue 0 */
  95        Rdlen = 0x2808,   /* Descriptor Length Queue 0 */
  96        Srrctl = 0x280c,  /* split and replication rx control (82575) */
  97        Rdh = 0x2810,     /* Descriptor Head Queue 0 */
  98        Rdt = 0x2818,     /* Descriptor Tail Queue 0 */
  99        Rdtr = 0x2820,    /* Descriptor Timer Ring */
 100        Rxdctl = 0x2828,  /* Descriptor Control */
 101        Radv = 0x282C,    /* Interrupt Absolute Delay Timer */
 102        Rdbal1 = 0x2900,  /* Rdesc Base Address Low Queue 1 */
 103        Rdbah1 = 0x2804,  /* Rdesc Base Address High Queue 1 */
 104        Rdlen1 = 0x2908,  /* Descriptor Length Queue 1 */
 105        Rdh1 = 0x2910,    /* Descriptor Head Queue 1 */
 106        Rdt1 = 0x2918,    /* Descriptor Tail Queue 1 */
 107        Rxdctl1 = 0x2928, /* Descriptor Control Queue 1 */
 108        Rsrpd = 0x2c00,   /* Small Packet Detect */
 109        Raid = 0x2c08,    /* ACK interrupt delay */
 110        Cpuvec = 0x2c10,  /* CPU Vector */
 111        Rxcsum = 0x5000,  /* Checksum Control */
 112        Rmpl = 0x5004,    /* rx maximum packet length (82575) */
 113        Rfctl = 0x5008,   /* Filter Control */
 114        Mta = 0x5200,     /* Multicast Table Array */
 115        Ral = 0x5400,     /* Receive Address Low */
 116        Rah = 0x5404,     /* Receive Address High */
 117        Vfta = 0x5600,    /* VLAN Filter Table Array */
 118        Mrqc = 0x5818,    /* Multiple Receive Queues Command */
 119        Rssim = 0x5864,   /* RSS Interrupt Mask */
 120        Rssir = 0x5868,   /* RSS Interrupt Request */
 121        Reta = 0x5c00,    /* Redirection Table */
 122        Rssrk = 0x5c80,   /* RSS Random Key */
 123
 124        /* Transmit */
 125        Tctl = 0x0400, /* Transmit Control */
 126        Tipg = 0x0410, /* Transmit IPG */
 127        Tkabgtxd =
 128            0x3004,      /* glci afe band gap transmit ref data, or something */
 129        Tdbal = 0x3800,  /* Tdesc Base Address Low */
 130        Tdbah = 0x3804,  /* Tdesc Base Address High */
 131        Tdlen = 0x3808,  /* Descriptor Length */
 132        Tdh = 0x3810,    /* Descriptor Head */
 133        Tdt = 0x3818,    /* Descriptor Tail */
 134        Tidv = 0x3820,   /* Interrupt Delay Value */
 135        Txdctl = 0x3828, /* Descriptor Control */
 136        Tadv = 0x382C,   /* Interrupt Absolute Delay Timer */
 137        Tarc0 = 0x3840,  /* Arbitration Counter Queue 0 */
 138        Tdbal1 = 0x3900, /* Descriptor Base Low Queue 1 */
 139        Tdbah1 = 0x3904, /* Descriptor Base High Queue 1 */
 140        Tdlen1 = 0x3908, /* Descriptor Length Queue 1 */
 141        Tdh1 = 0x3910,   /* Descriptor Head Queue 1 */
 142        Tdt1 = 0x3918,   /* Descriptor Tail Queue 1 */
 143        Txdctl1 = 0x3928, /* Descriptor Control 1 */
 144        Tarc1 = 0x3940,   /* Arbitration Counter Queue 1 */
 145
 146        /* Statistics */
 147        Statistics = 0x4000, /* Start of Statistics Area */
 148        Gorcl = 0x88 / 4,    /* Good Octets Received Count */
 149        Gotcl = 0x90 / 4,    /* Good Octets Transmitted Count */
 150        Torl = 0xC0 / 4,     /* Total Octets Received */
 151        Totl = 0xC8 / 4,     /* Total Octets Transmitted */
 152        Nstatistics = 0x124 / 4,
 153};
 154
 155enum {                      /* Ctrl */
 156       GIOmd = 1 << 2,      /* BIO master disable */
 157       Lrst = 1 << 3,       /* link reset */
 158       Slu = 1 << 6,        /* Set Link Up */
 159       SspeedMASK = 3 << 8, /* Speed Selection */
 160       SspeedSHIFT = 8,
 161       Sspeed10 = 0x00000000,      /* 10Mb/s */
 162       Sspeed100 = 0x00000100,     /* 100Mb/s */
 163       Sspeed1000 = 0x00000200,    /* 1000Mb/s */
 164       Frcspd = 1 << 11,           /* Force Speed */
 165       Frcdplx = 1 << 12,          /* Force Duplex */
 166       SwdpinsloMASK = 0x003C0000, /* Software Defined Pins - lo nibble */
 167       SwdpinsloSHIFT = 18,
 168       SwdpioloMASK = 0x03C00000, /* Software Defined Pins - I or O */
 169       SwdpioloSHIFT = 22,
 170       Devrst = 1 << 26, /* Device Reset */
 171       Rfce = 1 << 27,   /* Receive Flow Control Enable */
 172       Tfce = 1 << 28,   /* Transmit Flow Control Enable */
 173       Vme = 1 << 30,    /* VLAN Mode Enable */
 174       Phyrst = 1 << 31, /* Phy Reset */
 175};
 176
 177enum {                   /* Status */
 178       Lu = 1 << 1,      /* Link Up */
 179       Lanid = 3 << 2,   /* mask for Lan ID. */
 180       Txoff = 1 << 4,   /* Transmission Paused */
 181       Tbimode = 1 << 5, /* TBI Mode Indication */
 182       Phyra = 1 << 10,  /* PHY Reset Asserted */
 183       GIOme = 1 << 19,  /* GIO Master Enable Status */
 184};
 185
 186enum {
 187        /* Eec */
 188        Nvpres = 1 << 8,   /* nvram present */
 189        Autord = 1 << 9,   /* autoread complete */
 190        Sec1val = 1 << 22, /* sector 1 valid (!sec0) */
 191};
 192
 193enum {                   /* Eerd */
 194       EEstart = 1 << 0, /* Start Read */
 195       EEdone = 1 << 1,  /* Read done */
 196};
 197
 198enum {                    /* Ctrlext */
 199       Asdchk = 1 << 12,  /* ASD Check */
 200       Eerst = 1 << 13,   /* EEPROM Reset */
 201       Spdbyps = 1 << 15, /* Speed Select Bypass */
 202};
 203
 204/*
 205 * TODO(dcross): 'Ea' is 0 elsewhere. Investigate and possibly correct.
 206 */
 207enum {               /* EEPROM content offsets */
 208       OldEa = 0x00, /* Old Ethernet address */
 209       Ea = 0x01,    /* Ethernet Address */
 210       Cf = 0x03,    /* Compatibility Field */
 211       Icw1 = 0x0A,  /* Initialization Control Word 1 */
 212       Sid = 0x0B,   /* Subsystem ID */
 213       Svid = 0x0C,  /* Subsystem Vendor ID */
 214       Did = 0x0D,   /* Device ID */
 215       Vid = 0x0E,   /* Vendor ID */
 216       Icw2 = 0x0F,  /* Initialization Control Word 2 */
 217};
 218
 219enum {                        /* Mdic */
 220       MDIdMASK = 0x0000FFFF, /* Data */
 221       MDIdSHIFT = 0,
 222       MDIrMASK = 0x001F0000, /* PHY Register Address */
 223       MDIrSHIFT = 16,
 224       MDIpMASK = 0x03E00000, /* PHY Address */
 225       MDIpSHIFT = 21,
 226       MDIwop = 0x04000000,   /* Write Operation */
 227       MDIrop = 0x08000000,   /* Read Operation */
 228       MDIready = 0x10000000, /* End of Transaction */
 229       MDIie = 0x20000000,    /* Interrupt Enable */
 230       MDIe = 0x40000000,     /* Error */
 231};
 232
 233enum {                 /* phy interface registers */
 234       Phyctl = 0,     /* phy ctl */
 235       Physsr = 17,    /* phy secondary status */
 236       Phyier = 18,    /* 82573 phy interrupt enable */
 237       Phyisr = 19,    /* 82563 phy interrupt status */
 238       Phylhr = 19,    /* 8257[12] link health */
 239       Phyier218 = 24, /* 218 (phy79?) phy interrupt enable */
 240       Phyisr218 = 25, /* 218 (phy79?) phy interrupt status */
 241       Phystat = 26,   /* 82580 (phy79?) phy status */
 242       Phypage = 31,   /* page number */
 243
 244       Rtlink = 1 << 10, /* realtime link status */
 245       Phyan = 1 << 11,  /* phy has auto-negotiated */
 246
 247       /* Phyctl bits */
 248       Ran = 1 << 9,  /* restart auto-negotiation */
 249       Ean = 1 << 12, /* enable auto-negotiation */
 250
 251       /* 82573 Phyier interrupt enable bits */
 252       Lscie = 1 << 10, /* link status changed */
 253       Ancie = 1 << 11, /* auto-negotiation complete */
 254       Spdie = 1 << 14, /* speed changed */
 255       Panie = 1 << 15, /* phy auto-negotiation error */
 256
 257       /* Phylhr/Phyisr bits */
 258       Anf = 1 << 6,  /* lhr: auto-negotiation fault */
 259       Ane = 1 << 15, /* isr: auto-negotiation error */
 260
 261       /* 82580 Phystat bits */
 262       Ans = 3 << 14, /* 82580 autoneg. status */
 263       Link = 1 << 6, /* 82580 link */
 264
 265       /* 218 Phystat bits */
 266       Anfs = 3 << 13,   /* fault status */
 267       Ans218 = 1 << 12, /* autoneg complete */
 268
 269       /* 218 Phyier218 interrupt enable bits */
 270       Spdie218 = 1 << 1, /* speed changed */
 271       Lscie218 = 1 << 2, /* link status changed */
 272       Ancie218 = 1 << 8, /* auto-negotiation changed */
 273};
 274
 275enum {                      /* Icr, Ics, Ims, Imc */
 276       Txdw = 0x00000001,   /* Transmit Descriptor Written Back */
 277       Txqe = 0x00000002,   /* Transmit Queue Empty */
 278       Lsc = 0x00000004,    /* Link Status Change */
 279       Rxseq = 0x00000008,  /* Receive Sequence Error */
 280       Rxdmt0 = 0x00000010, /* Rdesc Minimum Threshold Reached */
 281       Rxo = 0x00000040,    /* Receiver Overrun */
 282       Rxt0 = 0x00000080,   /* Receiver Timer Interrupt */
 283       Mdac = 0x00000200,   /* MDIO Access Completed */
 284       Rxcfg = 0x00000400,  /* Receiving /C/ ordered sets */
 285       Gpi0 = 0x00000800,   /* General Purpose Interrupts */
 286       Gpi1 = 0x00001000,
 287       Gpi2 = 0x00002000,
 288       Gpi3 = 0x00004000,
 289       Ack = 0x00020000, /* Receive ACK frame */
 290};
 291
 292enum {                             /* Txcw */
 293       TxcwFd = 0x00000020,        /* Full Duplex */
 294       TxcwHd = 0x00000040,        /* Half Duplex */
 295       TxcwPauseMASK = 0x00000180, /* Pause */
 296       TxcwPauseSHIFT = 7,
 297       TxcwPs = 1 << TxcwPauseSHIFT, /* Pause Supported */
 298       TxcwAs = 2 << TxcwPauseSHIFT, /* Asymmetric FC desired */
 299       TxcwRfiMASK = 0x00003000,     /* Remote Fault Indication */
 300       TxcwRfiSHIFT = 12,
 301       TxcwNpr = 0x00008000,    /* Next Page Request */
 302       TxcwConfig = 0x40000000, /* Transmit Config Control */
 303       TxcwAne = 0x80000000,    /* Auto-Negotiation Enable */
 304};
 305
 306enum {                            /* Rctl */
 307       Rrst = 0x00000001,         /* Receiver Software Reset */
 308       Ren = 0x00000002,          /* Receiver Enable */
 309       Sbp = 0x00000004,          /* Store Bad Packets */
 310       Upe = 0x00000008,          /* Unicast Promiscuous Enable */
 311       Mpe = 0x00000010,          /* Multicast Promiscuous Enable */
 312       Lpe = 0x00000020,          /* Long Packet Reception Enable */
 313       LbmMASK = 0x000000C0,      /* Loopback Mode */
 314       LbmOFF = 0x00000000,       /* No Loopback */
 315       LbmTBI = 0x00000040,       /* TBI Loopback */
 316       LbmMII = 0x00000080,       /* GMII/MII Loopback */
 317       LbmXCVR = 0x000000C0,      /* Transceiver Loopback */
 318       RdtmsMASK = 0x00000300,    /* Rdesc Minimum Threshold Size */
 319       RdtmsHALF = 0x00000000,    /* Threshold is 1/2 Rdlen */
 320       RdtmsQUARTER = 0x00000100, /* Threshold is 1/4 Rdlen */
 321       RdtmsEIGHTH = 0x00000200,  /* Threshold is 1/8 Rdlen */
 322       MoMASK = 0x00003000,       /* Multicast Offset */
 323       Bam = 0x00008000,          /* Broadcast Accept Mode */
 324       BsizeMASK = 0x00030000,    /* Receive Buffer Size */
 325       Bsize16384 = 0x00010000,   /* Bsex = 1 */
 326       Bsize8192 = 0x00020000,    /* Bsex = 1 */
 327       Bsize2048 = 0x00000000,
 328       Bsize1024 = 0x00010000,
 329       Bsize512 = 0x00020000,
 330       Bsize256 = 0x00030000,
 331       BsizeFlex = 0x08000000, /* Flexible Bsize in 1KB increments */
 332       Vfe = 0x00040000,       /* VLAN Filter Enable */
 333       Cfien = 0x00080000,     /* Canonical Form Indicator Enable */
 334       Cfi = 0x00100000,       /* Canonical Form Indicator value */
 335       Dpf = 0x00400000,       /* Discard Pause Frames */
 336       Pmcf = 0x00800000,      /* Pass MAC Control Frames */
 337       Bsex = 0x02000000,      /* Buffer Size Extension */
 338       Secrc = 0x04000000,     /* Strip CRC from incoming packet */
 339};
 340
 341enum { /* Srrctl */
 342       Dropen = 1 << 31,
 343};
 344
 345enum {                      /* Tctl */
 346       Trst = 0x00000001,   /* Transmitter Software Reset */
 347       Ten = 0x00000002,    /* Transmit Enable */
 348       Psp = 0x00000008,    /* Pad Short Packets */
 349       Mulr = 0x10000000,   /* Allow multiple concurrent requests */
 350       Ctmask = 0x00000FF0, /* Collision Threshold */
 351       Ctshift = 4,
 352       ColdMASK = 0x003FF000, /* Collision Distance */
 353       ColdSHIFT = 12,
 354       Swxoff = 0x00400000, /* Sofware XOFF Transmission */
 355       Pbe = 0x00800000,    /* Packet Burst Enable */
 356       Rtlc = 0x01000000,   /* Re-transmit on Late Collision */
 357       Nrtu = 0x02000000,   /* No Re-transmit on Underrrun */
 358};
 359
 360enum {                           /* [RT]xdctl */
 361       PthreshMASK = 0x0000003F, /* Prefetch Threshold */
 362       PthreshSHIFT = 0,
 363       HthreshMASK = 0x00003F00, /* Host Threshold */
 364       HthreshSHIFT = 8,
 365       WthreshMASK = 0x003F0000, /* Writeback Threshold */
 366       WthreshSHIFT = 16,
 367       Gran = 0x01000000,    /* Granularity (descriptors, not cls) */
 368       Qenable = 0x02000000, /* Queue Enable (82575) */
 369};
 370
 371enum {                    /* Rxcsum */
 372       PcssMASK = 0x00FF, /* Packet Checksum Start */
 373       PcssSHIFT = 0,
 374       Ipofl = 0x0100, /* IP Checksum Off-load Enable */
 375       Tuofl = 0x0200, /* TCP/UDP Checksum Off-load Enable */
 376};
 377
 378enum {                     /* Receive Delay Timer Ring */
 379       DelayMASK = 0xFFFF, /* delay timer in 1.024nS increments */
 380       DelaySHIFT = 0,
 381       Fpd = 0x80000000, /* Flush partial Descriptor Block */
 382};
 383
 384struct rd { /* Receive Descriptor */
 385        uint32_t addr[2];
 386        uint16_t length;
 387        uint16_t checksum;
 388        uint8_t status;
 389        uint8_t errors;
 390        uint16_t special;
 391};
 392
 393enum {               /* Rd status */
 394       Rdd = 0x01,   /* Descriptor Done */
 395       Reop = 0x02,  /* End of Packet */
 396       Ixsm = 0x04,  /* Ignore Checksum Indication */
 397       Vp = 0x08,    /* Packet is 802.1Q (matched VET) */
 398       Tcpcs = 0x20, /* TCP Checksum Calculated on Packet */
 399       Ipcs = 0x40,  /* IP Checksum Calculated on Packet */
 400       Pif = 0x80,   /* Passed in-exact filter */
 401};
 402
 403enum {              /* Rd errors */
 404       Ce = 0x01,   /* CRC Error or Alignment Error */
 405       Se = 0x02,   /* Symbol Error */
 406       Seq = 0x04,  /* Sequence Error */
 407       Cxe = 0x10,  /* Carrier Extension Error */
 408       Tcpe = 0x20, /* TCP/UDP Checksum Error */
 409       Ipe = 0x40,  /* IP Checksum Error */
 410       Rxe = 0x80,  /* RX Data Error */
 411};
 412
 413struct td {               /* Transmit Descriptor */
 414        uint32_t addr[2]; /* Data */
 415        uint32_t control;
 416        uint32_t status;
 417};
 418
 419enum {                       /* Tdesc control */
 420       LenMASK = 0x000FFFFF, /* Data/Packet Length Field */
 421       LenSHIFT = 0,
 422       DtypeCD = 0x00000000,  /* Data Type 'Context Descriptor' */
 423       DtypeDD = 0x00100000,  /* Data Type 'Data Descriptor' */
 424       PtypeTCP = 0x01000000, /* TCP/UDP Packet Type (CD) */
 425       Teop = 0x01000000,     /* End of Packet (DD) */
 426       PtypeIP = 0x02000000,  /* IP Packet Type (CD) */
 427       Ifcs = 0x02000000,     /* Insert FCS (DD) */
 428       Tse = 0x04000000,      /* TCP Segmentation Enable */
 429       Rs = 0x08000000,       /* Report Status */
 430       Rps = 0x10000000,      /* Report Status Sent */
 431       Dext = 0x20000000,     /* Descriptor Extension */
 432       Vle = 0x40000000,      /* VLAN Packet Enable */
 433       Ide = 0x80000000,      /* Interrupt Delay Enable */
 434};
 435
 436enum {                   /* Tdesc status */
 437       Tdd = 0x0001,     /* Descriptor Done */
 438       Ec = 0x0002,      /* Excess Collisions */
 439       Lc = 0x0004,      /* Late Collision */
 440       Tu = 0x0008,      /* Transmit Underrun */
 441       CssMASK = 0xFF00, /* Checksum Start Field */
 442       CssSHIFT = 8,
 443};
 444
 445struct flash {
 446        uint16_t *reg;
 447        uint32_t *reg32;
 448        uint16_t base;
 449        uint16_t lim;
 450};
 451
 452enum {
 453        /* 16 and 32-bit flash registers for ich flash parts */
 454        Bfpr = 0x00 / 4,  /* flash base 0:12; lim 16:28 */
 455        Fsts = 0x04 / 2,  /* flash status;  Hsfsts */
 456        Fctl = 0x06 / 2,  /* flash control; Hsfctl */
 457        Faddr = 0x08 / 4, /* flash address to r/w */
 458        Fdata = 0x10 / 4, /* data @ address */
 459
 460        /* status register */
 461        Fdone = 1 << 0,   /* flash cycle done */
 462        Fcerr = 1 << 1,   /* cycle error; write 1 to clear */
 463        Ael = 1 << 2,     /* direct access error log; 1 to clear */
 464        Scip = 1 << 5,    /* spi cycle in progress */
 465        Fvalid = 1 << 14, /* flash descriptor valid */
 466
 467        /* control register */
 468        Fgo = 1 << 0,     /* start cycle */
 469        Flcycle = 1 << 1, /* two bits: r=0; w=2 */
 470        Fdbc = 1 << 8,    /* bytes to read; 5 bits */
 471};
 472
 473/*
 474 * the kumeran interface is mac-to-phy for external gigabit ethernet on
 475 * intel's esb2 ich8 (io controller hub), it carries mii bits.  can be used
 476 * to reset the phy.  intel proprietary, see "kumeran specification".
 477 */
 478enum { I217inbandctlpage = 770, /* phy page */
 479       I217inbandctlreg = 18,   /* phy register */
 480       I217inbandctllnkststxtmoutmask = 0x3F00,
 481       I217inbandctllnkststxtmoutshift = 8,
 482
 483       Fextnvm6reqpllclk = 0x100,
 484       Fextnvm6enak1entrycond = 0x200, /* extend K1 entry latency */
 485
 486       Nvmk1cfg = 0x1B,   /* NVM K1 Config Word */
 487       Nvmk1enable = 0x1, /* NVM Enable K1 bit */
 488
 489       Kumctrlstaoff = 0x1F0000,
 490       Kumctrlstaoffshift = 16,
 491       Kumctrlstaren = 0x200000,
 492       Kumctrlstak1cfg = 0x7,
 493       Kumctrlstak1enable = 0x2,
 494};
 495
 496enum {
 497        /*
 498         * these were 512, 1024 & 64, but 52, 253 & 9 are usually ample;
 499         * however cpu servers and terminals can need more receive buffers
 500         * due to bursts of traffic.
 501         *
 502         * Tdlen and Rdlen have to be multiples of 128.  Rd and Td are both
 503         * 16 bytes long, so Nrd and Ntd must be multiples of 8.
 504         */
 505        Ntd = 32,  /* power of two >= 8 */
 506        Nrd = 128, /* power of two >= 8 */
 507        Rbalign = 16,
 508        Slop = 32, /* for vlan headers, crcs, etc. */
 509};
 510
 511enum { Iany = -1,
 512       i82563,
 513       i82566,
 514       i82567,
 515       i82567m,
 516       i82571,
 517       i82572,
 518       i82573,
 519       i82574,
 520       i82575,
 521       i82576,
 522       i82577,
 523       i82577m,
 524       i82578,
 525       i82578m,
 526       i82579,
 527       i82580,
 528       i82583,
 529       i210,
 530       i217,
 531       i218,
 532       i350,
 533       Nctlrtype,
 534};
 535
 536enum { Fload = 1 << 0,
 537       Fert = 1 << 1,
 538       F75 = 1 << 2,
 539       Fpba = 1 << 3,
 540       Fflashea = 1 << 4,
 541       F79phy = 1 << 5,
 542       Fnofct = 1 << 6,
 543};
 544
 545struct ctlrtype {
 546        int type;
 547        int mtu;
 548        int phyno;
 549        char *name;
 550        int flag;
 551};
 552
 553static struct ctlrtype ctlrtab[Nctlrtype] = {
 554    {i82563, 9014, 1, "i82563", Fpba},
 555    {i82566, 1514, 1, "i82566", Fload},
 556    {i82567, 9234, 1, "i82567", Fload},
 557    {i82567m, 1514, 1, "i82567m", 0},
 558    {i82571, 9234, 1, "i82571", Fpba},
 559    {i82572, 9234, 1, "i82572", Fpba},
 560    {i82573, 8192, 1, "i82573", Fert}, /* terrible perf above 8k */
 561    {i82574, 9018, 1, "i82574", 0},
 562    {i82575, 9728, 1, "i82575", F75 | Fflashea},
 563    {i82576, 9728, 1, "i82576", F75},
 564    {i82577, 4096, 2, "i82577", Fload | Fert},
 565    {i82577m, 1514, 2, "i82577", Fload | Fert},
 566    {i82578, 4096, 2, "i82578", Fload | Fert},
 567    {i82578m, 1514, 2, "i82578", Fload | Fert},
 568    {i82579, 9018, 2, "i82579", Fload | Fert | F79phy | Fnofct},
 569    {i82580, 9728, 1, "i82580", F75 | F79phy},
 570    {i82583, 1514, 1, "i82583", 0},
 571    {i210, 9728, 1, "i210", F75 | Fnofct | Fert},
 572    {i217, 9728, 1, "i217", F79phy | Fnofct | Fload | Fert},
 573    {i350, 9728, 1, "i350", F75 | F79phy | Fnofct},
 574};
 575
 576struct ctlr {
 577        uintptr_t mmio_paddr;
 578        struct pci_device *pcidev;
 579        struct ctlr *next;
 580        struct ether *edev;
 581        int active;
 582        int type;
 583        uint16_t eeprom[0x40];
 584
 585        qlock_t alock; /* attach */
 586        void *alloc;
 587        unsigned int rbsz;
 588        int attached;
 589
 590        int *nic;
 591        spinlock_t imlock;
 592        int im; /* interrupt mask */
 593
 594        struct rendez lrendez;
 595        int lim;
 596        int phynum;
 597        int didk1fix;
 598
 599        qlock_t slock;
 600        unsigned int statistics[Nstatistics];
 601        unsigned int lsleep;
 602        unsigned int lintr;
 603        unsigned int rsleep;
 604        unsigned int rintr;
 605        unsigned int txdw;
 606        unsigned int tintr;
 607        unsigned int ixsm;
 608        unsigned int ipcs;
 609        unsigned int tcpcs;
 610        unsigned int speeds[4];
 611
 612        uint8_t ra[Eaddrlen]; /* receive address */
 613        uint32_t mta[128];    /* multicast table array */
 614
 615        struct rendez rrendez;
 616        int rim;
 617        int rdfree;        /* rx descriptors awaiting packets */
 618        struct rd *rdba;   /* receive descriptor base address */
 619        struct block **rb; /* receive buffers */
 620        unsigned int rdh;  /* receive descriptor head */
 621        unsigned int rdt;  /* receive descriptor tail */
 622        int rdtr;          /* receive delay timer ring value */
 623        int radv;          /* receive interrupt absolute delay timer */
 624
 625        struct rendez trendez;
 626        qlock_t tlock;
 627        struct td *tdba;   /* transmit descriptor base address */
 628        struct block **tb; /* transmit buffers */
 629        int tdh;           /* transmit descriptor head */
 630        int tdt;           /* transmit descriptor tail */
 631
 632        int fcrtl;
 633        int fcrth;
 634
 635        unsigned int pbs; /* packet buffer size */
 636        unsigned int pba; /* packet buffer allocation */
 637};
 638
 639static inline uint32_t csr32r(struct ctlr *c, uintptr_t reg)
 640{
 641        return read_mmreg32((uintptr_t)(c->nic + (reg / 4)));
 642}
 643
 644static inline void csr32w(struct ctlr *c, uintptr_t reg, uint32_t val)
 645{
 646        write_mmreg32((uintptr_t)(c->nic + (reg / 4)), val);
 647}
 648
 649static struct ctlr *i82563ctlrhead;
 650static struct ctlr *i82563ctlrtail;
 651
 652static int speedtab[] = {10, 100, 1000, 0};
 653
 654static char *statistics[] = {
 655    "CRC Error",
 656    "Alignment Error",
 657    "Symbol Error",
 658    "RX Error",
 659    "Missed Packets",
 660    "Single Collision",
 661    "Excessive Collisions",
 662    "Multiple Collision",
 663    "Late Collisions",
 664    NULL,
 665    "Collision",
 666    "Transmit Underrun",
 667    "Defer",
 668    "Transmit - No CRS",
 669    "Sequence Error",
 670    "Carrier Extension Error",
 671    "Receive Error Length",
 672    NULL,
 673    "XON Received",
 674    "XON Transmitted",
 675    "XOFF Received",
 676    "XOFF Transmitted",
 677    "FC Received Unsupported",
 678    "Packets Received (64 Bytes)",
 679    "Packets Received (65-127 Bytes)",
 680    "Packets Received (128-255 Bytes)",
 681    "Packets Received (256-511 Bytes)",
 682    "Packets Received (512-1023 Bytes)",
 683    "Packets Received (1024-mtu Bytes)",
 684    "Good Packets Received",
 685    "Broadcast Packets Received",
 686    "Multicast Packets Received",
 687    "Good Packets Transmitted",
 688    NULL,
 689    "Good Octets Received",
 690    NULL,
 691    "Good Octets Transmitted",
 692    NULL,
 693    NULL,
 694    NULL,
 695    "Receive No Buffers",
 696    "Receive Undersize",
 697    "Receive Fragment",
 698    "Receive Oversize",
 699    "Receive Jabber",
 700    "Management Packets Rx",
 701    "Management Packets Drop",
 702    "Management Packets Tx",
 703    "Total Octets Received",
 704    NULL,
 705    "Total Octets Transmitted",
 706    NULL,
 707    "Total Packets Received",
 708    "Total Packets Transmitted",
 709    "Packets Transmitted (64 Bytes)",
 710    "Packets Transmitted (65-127 Bytes)",
 711    "Packets Transmitted (128-255 Bytes)",
 712    "Packets Transmitted (256-511 Bytes)",
 713    "Packets Transmitted (512-1023 Bytes)",
 714    "Packets Transmitted (1024-mtu Bytes)",
 715    "Multicast Packets Transmitted",
 716    "Broadcast Packets Transmitted",
 717    "TCP Segmentation Context Transmitted",
 718    "TCP Segmentation Context Fail",
 719    "Interrupt Assertion",
 720    "Interrupt Rx Pkt Timer",
 721    "Interrupt Rx Abs Timer",
 722    "Interrupt Tx Pkt Timer",
 723    "Interrupt Tx Abs Timer",
 724    "Interrupt Tx Queue Empty",
 725    "Interrupt Tx Desc Low",
 726    "Interrupt Rx Min",
 727    "Interrupt Rx Overrun",
 728};
 729
 730static char *cname(struct ctlr *c)
 731{
 732        return ctlrtab[c->type].name;
 733}
 734
 735static int i82563reset(struct ctlr *);
 736
 737static long i82563ifstat(struct ether *edev, void *a, long n, uint32_t offset)
 738{
 739        struct ctlr *ctlr;
 740        char *s, *p, *e, *stat;
 741        int i, r;
 742        uint64_t tuvl, ruvl;
 743
 744        ctlr = edev->ctlr;
 745        qlock(&ctlr->slock);
 746        p = s = kzmalloc(READSTR, 0);
 747        if (p == NULL) {
 748                qunlock(&ctlr->slock);
 749                error(ENOMEM, "kzmalloc did not panic");
 750        }
 751        e = p + READSTR;
 752
 753        for (i = 0; i < Nstatistics; i++) {
 754                r = csr32r(ctlr, Statistics + i * 4);
 755                stat = statistics[i];
 756                if (stat == NULL)
 757                        continue;
 758                switch (i) {
 759                case Gorcl:
 760                case Gotcl:
 761                case Torl:
 762                case Totl:
 763                        ruvl = r;
 764                        ruvl += (uint64_t)csr32r(ctlr, Statistics + (i + 1) * 4)
 765                                << 32;
 766                        tuvl = ruvl;
 767                        tuvl += ctlr->statistics[i];
 768                        tuvl += (uint64_t)ctlr->statistics[i + 1] << 32;
 769                        if (tuvl == 0)
 770                                continue;
 771                        ctlr->statistics[i] = tuvl;
 772                        ctlr->statistics[i + 1] = tuvl >> 32;
 773                        p = seprintf(p, e, "%s: %llud %llud\n", stat, tuvl,
 774                                     ruvl);
 775                        i++;
 776                        break;
 777
 778                default:
 779                        ctlr->statistics[i] += r;
 780                        if (ctlr->statistics[i] == 0)
 781                                continue;
 782                        p = seprintf(p, e, "%s: %ud %ud\n", stat,
 783                                     ctlr->statistics[i], r);
 784                        break;
 785                }
 786        }
 787
 788        p = seprintf(p, e, "lintr: %ud %ud\n", ctlr->lintr, ctlr->lsleep);
 789        p = seprintf(p, e, "rintr: %ud %ud\n", ctlr->rintr, ctlr->rsleep);
 790        p = seprintf(p, e, "tintr: %ud %ud\n", ctlr->tintr, ctlr->txdw);
 791        p = seprintf(p, e, "ixcs: %ud %ud %ud\n", ctlr->ixsm, ctlr->ipcs,
 792                     ctlr->tcpcs);
 793        p = seprintf(p, e, "ctrl: %.8ux\n", csr32r(ctlr, Ctrl));
 794        p = seprintf(p, e, "ctrlext: %.8ux\n", csr32r(ctlr, Ctrlext));
 795        p = seprintf(p, e, "status: %.8ux\n", csr32r(ctlr, Status));
 796        p = seprintf(p, e, "txcw: %.8ux\n", csr32r(ctlr, Txcw));
 797        p = seprintf(p, e, "txdctl: %.8ux\n", csr32r(ctlr, Txdctl));
 798        p = seprintf(p, e, "pbs: %dKB\n", ctlr->pbs);
 799        p = seprintf(p, e, "pba: %#.8ux\n", ctlr->pba);
 800
 801        p = seprintf(p, e, "speeds: 10:%ud 100:%ud 1000:%ud ?:%ud\n",
 802                     ctlr->speeds[0], ctlr->speeds[1], ctlr->speeds[2],
 803                     ctlr->speeds[3]);
 804        p = seprintf(p, e, "type: %s\n", cname(ctlr));
 805
 806        //  p = seprintf(p, e, "eeprom:");
 807        //  for(i = 0; i < 0x40; i++){
 808        //      if(i && ((i & 7) == 0))
 809        //          p = seprintf(p, e, "\n       ");
 810        //      p = seprintf(p, e, " %4.4ux", ctlr->eeprom[i]);
 811        //  }
 812        //  p = seprintf(p, e, "\n");
 813
 814        n = readstr(offset, a, n, s);
 815        kfree(s);
 816        qunlock(&ctlr->slock);
 817
 818        return n;
 819}
 820
 821enum { CMrdtr,
 822       CMradv,
 823       CMpause,
 824       CMan,
 825};
 826
 827static struct cmdtab i82563ctlmsg[] = {
 828    {CMrdtr, "rdtr", 2},
 829    {CMradv, "radv", 2},
 830    {CMpause, "pause", 1},
 831    {CMan, "an", 1},
 832};
 833
 834static long i82563ctl(struct ether *edev, void *buf, size_t n)
 835{
 836        ERRSTACK(1);
 837        char *p;
 838        uint32_t v;
 839        struct ctlr *ctlr;
 840        struct cmdbuf *cb;
 841        struct cmdtab *ct;
 842
 843        ctlr = edev->ctlr;
 844        if (ctlr == NULL)
 845                error(ENODEV, "i82563ctl: NULL controller");
 846
 847        cb = parsecmd(buf, n);
 848        if (waserror()) {
 849                kfree(cb);
 850                nexterror();
 851        }
 852
 853        ct = lookupcmd(cb, i82563ctlmsg, ARRAY_SIZE(i82563ctlmsg));
 854        switch (ct->index) {
 855        case CMrdtr:
 856                v = strtoul(cb->f[1], &p, 0);
 857                if (*p || v > 0xffff)
 858                        error(EINVAL, ERROR_FIXME);
 859                ctlr->rdtr = v;
 860                csr32w(ctlr, Rdtr, v);
 861                break;
 862        case CMradv:
 863                v = strtoul(cb->f[1], &p, 0);
 864                if (*p || v > 0xffff)
 865                        error(EINVAL, ERROR_FIXME);
 866                ctlr->radv = v;
 867                csr32w(ctlr, Radv, v);
 868                break;
 869        case CMpause:
 870                csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) ^ (Rfce | Tfce));
 871                break;
 872        case CMan:
 873                csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) | Lrst | Phyrst);
 874                break;
 875        }
 876        kfree(cb);
 877        poperror();
 878
 879        return n;
 880}
 881
 882static void i82563promiscuous(void *arg, int on)
 883{
 884        int rctl;
 885        struct ctlr *ctlr;
 886        struct ether *edev;
 887
 888        edev = arg;
 889        ctlr = edev->ctlr;
 890
 891        rctl = csr32r(ctlr, Rctl) & ~MoMASK;
 892        if (on)
 893                rctl |= Upe | Mpe;
 894        else
 895                rctl &= ~(Upe | Mpe);
 896        csr32w(ctlr, Rctl, rctl);
 897}
 898
 899/*
 900 * Returns the number of bits of mac address used in multicast hash,
 901 * thus the number of longs of ctlr->mta (2^(bits-5)).
 902 * This must be right for multicast (thus ipv6) to work reliably.
 903 *
 904 * The default multicast hash for mta is based on 12 bits of MAC address;
 905 * the rightmost bit is a function of Rctl's Multicast Offset: 0=>36,
 906 * 1=>35, 2=>34, 3=>32.  Exceptions include the 578, 579, 217, 218, 219;
 907 * they use only 10 bits, ignoring the rightmost 2 of the 12.
 908 */
 909static int mcastbits(struct ctlr *ctlr)
 910{
 911        switch (ctlr->type) {
 912        /*
 913         * openbsd says all `ich8' versions (ich8, ich9, ich10, pch,
 914         * pch2 and pch_lpt) have 32 longs (use 10 bits of mac address
 915         * for hash).
 916         */
 917        case i82566:
 918        case i82567:
 919                //      case i82578:
 920        case i82579:
 921        case i217:
 922        case i218:
 923                //      case i219:
 924                return 10; /* 32 longs */
 925        case i82563:
 926        case i82571:
 927        case i82572:
 928        case i82573:
 929        case i82574:
 930                //      case i82575:
 931                //      case i82583:
 932        case i210:         /* includes i211 */
 933                return 12; /* 128 longs */
 934        default:
 935                printk("82563: unsure of multicast bits in mac addresses; "
 936                       "enabling promiscuous multicast reception\n");
 937                csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Mpe);
 938                return 10; /* be conservative (for mta size) */
 939        }
 940}
 941
 942static int mcbitstolongs(int nmcbits)
 943{
 944        return 1 << (nmcbits - 5); /* 2^5 = 32 */
 945}
 946
 947static void i82563multicast(void *arg, uint8_t *addr, int on)
 948{
 949        uint32_t nbits, tblsz, hash, word, bit;
 950        struct ctlr *ctlr;
 951        struct ether *edev;
 952
 953        edev = arg;
 954        ctlr = edev->ctlr;
 955
 956        nbits = mcastbits(ctlr);
 957        tblsz = mcbitstolongs(nbits);
 958        /* assume multicast offset in Rctl is 0 (we clear it above) */
 959        hash = addr[5] << 4 | addr[4] >> 4; /* bits 47:36 of mac */
 960        if (nbits == 10)
 961                hash >>= 2; /* discard 37:36 of mac */
 962        word = (hash / 32) & (tblsz - 1);
 963        bit = 1UL << (hash % 32);
 964        /*
 965         * multiple ether addresses can hash to the same filter bit,
 966         * so it's never safe to clear a filter bit.
 967         * if we want to clear filter bits, we need to keep track of
 968         * all the multicast addresses in use, clear all the filter bits,
 969         * then set the ones corresponding to in-use addresses.
 970         */
 971        if (on)
 972                ctlr->mta[word] |= bit;
 973        //      else
 974        //              ctlr->mta[word] &= ~bit;
 975        csr32w(ctlr, Mta + word * 4, ctlr->mta[word]);
 976}
 977
 978static void i82563im(struct ctlr *ctlr, int im)
 979{
 980        spin_lock_irqsave(&ctlr->imlock);
 981        ctlr->im |= im;
 982        csr32w(ctlr, Ims, ctlr->im);
 983        spin_unlock_irqsave(&ctlr->imlock);
 984}
 985
 986static void i82563txinit(struct ctlr *ctlr)
 987{
 988        int i, r, tctl;
 989        struct block *bp;
 990
 991        /*
 992         * TODO(dcross): Figure out how to integrate this table driven
 993         * code into the stanza below.
 994         */
 995        tctl = 0x0F << Ctshift | Psp;
 996        if (0) {
 997                if ((ctlrtab[ctlr->type].flag & F75) == 0)
 998                        tctl |= (66 << ColdSHIFT | Mulr);
 999        }
1000        switch (ctlr->type) {
1001        case i210:
1002                break;
1003        default:
1004                tctl |= Mulr;
1005                /* fall through */
1006        case i217:
1007        case i218:
1008                tctl |= 66 << ColdSHIFT;
1009                break;
1010        }
1011        csr32w(ctlr, Tctl, tctl);
1012        csr32w(ctlr, Tipg, 6 << 20 | 8 << 10 | 8); /* yb sez: 0x702008 */
1013        for (i = 0; i < Ntd; i++) {
1014                bp = ctlr->tb[i];
1015                if (bp != NULL) {
1016                        ctlr->tb[i] = NULL;
1017                        freeb(bp);
1018                }
1019        }
1020        memset(ctlr->tdba, 0, Ntd * sizeof(struct td));
1021        csr32w(ctlr, Tdbal, paddr_low32(ctlr->tdba));
1022        csr32w(ctlr, Tdbah, paddr_high32(ctlr->tdba));
1023        csr32w(ctlr, Tdlen, Ntd * sizeof(struct td));
1024        ctlr->tdh = PREV_RING(0, Ntd);
1025        csr32w(ctlr, Tdh, 0);
1026        ctlr->tdt = 0;
1027        csr32w(ctlr, Tdt, 0);
1028        csr32w(ctlr, Tidv, 0); /* don't coalesce interrupts */
1029        csr32w(ctlr, Tadv, 0);
1030        r = csr32r(ctlr, Txdctl) & ~(WthreshMASK | PthreshMASK);
1031        r |= 4 << WthreshSHIFT | 4 << PthreshSHIFT;
1032        if (ctlrtab[ctlr->type].flag & F75)
1033                r |= Qenable;
1034        csr32w(ctlr, Txdctl, r);
1035        csr32w(ctlr, Tctl, csr32r(ctlr, Tctl) | Ten);
1036}
1037
1038static int i82563cleanup(struct ctlr *ctlr)
1039{
1040        struct block *bp;
1041        int tdh, n;
1042
1043        tdh = ctlr->tdh;
1044        while (ctlr->tdba[n = NEXT_RING(tdh, Ntd)].status & Tdd) {
1045                tdh = n;
1046                bp = ctlr->tb[tdh];
1047                if (bp != NULL) {
1048                        ctlr->tb[tdh] = NULL;
1049                        freeb(bp);
1050                } else
1051                        iprint("82563 tx underrun!\n");
1052                ctlr->tdba[tdh].status = 0;
1053        }
1054        return ctlr->tdh = tdh;
1055}
1056
1057static void i82563transmit(struct ether *edev)
1058{
1059        struct td *td;
1060        struct block *bp;
1061        struct ctlr *ctlr;
1062        int tdh, tdt;
1063
1064        ctlr = edev->ctlr;
1065        qlock(&ctlr->tlock);
1066
1067        /*
1068         * Free any completed packets
1069         */
1070        tdh = i82563cleanup(ctlr);
1071
1072        /* if link down on 218, don't try since we need k1fix to run first */
1073        if (!edev->link && ctlr->type == i218 && !ctlr->didk1fix) {
1074                qunlock(&ctlr->tlock);
1075                return;
1076        }
1077
1078        /*
1079         * Try to fill the ring back up.
1080         */
1081        tdt = ctlr->tdt;
1082        for (;;) {
1083                if (NEXT_RING(tdt, Ntd) == tdh) { /* ring full? */
1084                        ctlr->txdw++;
1085                        i82563im(ctlr, Txdw);
1086                        break;
1087                }
1088                bp = qget(edev->oq);
1089                if (bp == NULL)
1090                        break;
1091                td = &ctlr->tdba[tdt];
1092                td->addr[0] = paddr_low32(bp->rp);
1093                td->addr[1] = paddr_high32(bp->rp);
1094                td->control = Ide | Rs | Ifcs | Teop | BLEN(bp);
1095                ctlr->tb[tdt] = bp;
1096                tdt = NEXT_RING(tdt, Ntd);
1097        }
1098        if (ctlr->tdt != tdt) {
1099                ctlr->tdt = tdt;
1100                wmb_f();
1101                csr32w(ctlr, Tdt, tdt);
1102        }
1103        /* else may not be any new ones, but could be some still in flight */
1104        qunlock(&ctlr->tlock);
1105}
1106
1107static void i82563replenish(struct ctlr *ctlr)
1108{
1109        struct rd *rd;
1110        int rdt;
1111        struct block *bp;
1112
1113        rdt = ctlr->rdt;
1114        while (NEXT_RING(rdt, Nrd) != ctlr->rdh) {
1115                rd = &ctlr->rdba[rdt];
1116                if (ctlr->rb[rdt] != NULL) {
1117                        printd("#l%d: 82563: rx overrun\n", ctlr->edev->ctlrno);
1118                        break;
1119                }
1120                bp = block_alloc(ctlr->rbsz + Slop + Rbalign, MEM_ATOMIC);
1121                if (bp == NULL) {
1122                        warn_once("OOM, trying to survive");
1123                        break;
1124                }
1125                ctlr->rb[rdt] = bp;
1126                rd->addr[0] = paddr_low32(bp->rp);
1127                rd->addr[1] = paddr_high32(bp->rp);
1128                rd->status = 0;
1129                ctlr->rdfree++;
1130                rdt = NEXT_RING(rdt, Nrd);
1131        }
1132        if (ctlr->rdt != rdt) {
1133                ctlr->rdt = rdt;
1134                wmb_f();
1135                csr32w(ctlr, Rdt, rdt);
1136        }
1137}
1138
1139static void i82563rxinit(struct ctlr *ctlr)
1140{
1141        struct block *bp;
1142        int i, r, rctl, type;
1143
1144        type = ctlr->type;
1145
1146        if (ctlr->rbsz <= 2048)
1147                csr32w(ctlr, Rctl, Dpf | Bsize2048 | Bam | RdtmsHALF);
1148        else {
1149                i = ctlr->rbsz / 1024;
1150                if (ctlr->rbsz % 1024)
1151                        i++;
1152                if (ctlrtab[ctlr->type].flag & F75) {
1153                        csr32w(ctlr, Rctl,
1154                               Lpe | Dpf | Bsize2048 | Bam | RdtmsHALF | Secrc);
1155                        if (ctlr->type != i82575)
1156                                i |= (Nrd / 2 >> 4) << 20; /* RdmsHalf */
1157                        csr32w(ctlr, Srrctl, i | Dropen);
1158                        csr32w(ctlr, Rmpl, ctlr->rbsz);
1159                        // csr32w(ctlr, Drxmxod, 0x7ff);
1160                } else
1161                        csr32w(ctlr, Rctl,
1162                               Lpe | Dpf | BsizeFlex * i | Bam | RdtmsHALF |
1163                                   Secrc);
1164        }
1165
1166        /*
1167         * TODO(dcross): Reconcile this with latest above code block.
1168         */
1169        if (0) {
1170                rctl = Dpf | Bsize2048 | Bam | RdtmsHALF;
1171                if (type == i82575 || type == i82576 || type == i210) {
1172                        /*
1173                         * Setting Qenable in Rxdctl does not
1174                         * appear to stick unless Ren is on.
1175                         */
1176                        csr32w(ctlr, Rctl, Ren | rctl);
1177                        csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Qenable);
1178                }
1179                csr32w(ctlr, Rctl, rctl);
1180        }
1181
1182        if (ctlrtab[ctlr->type].flag & Fert)
1183                csr32w(ctlr, Ert, 1024 / 8); /* early rx threshold */
1184
1185        csr32w(ctlr, Rdbal, paddr_low32(ctlr->rdba));
1186        csr32w(ctlr, Rdbah, paddr_high32(ctlr->rdba));
1187        csr32w(ctlr, Rdlen, Nrd * sizeof(struct rd));
1188        ctlr->rdh = ctlr->rdt = 0;
1189        csr32w(ctlr, Rdh, 0);
1190        csr32w(ctlr, Rdt, 0);
1191
1192        /* to hell with interrupt moderation, we want low latency */
1193        csr32w(ctlr, Rdtr, 0);
1194        csr32w(ctlr, Radv, 0);
1195
1196        for (i = 0; i < Nrd; i++) {
1197                bp = ctlr->rb[i];
1198                if (bp != NULL) {
1199                        ctlr->rb[i] = NULL;
1200                        freeb(bp);
1201                }
1202        }
1203        i82563replenish(ctlr);
1204
1205        if (type == i82575 || type == i82576 || type == i210) {
1206                /*
1207                 * See comment above for Qenable.
1208                 * Could shuffle the code?
1209                 */
1210                r = csr32r(ctlr, Rxdctl) & ~(WthreshMASK | PthreshMASK);
1211                csr32w(ctlr, Rxdctl, r | 2 << WthreshSHIFT | 2 << PthreshSHIFT);
1212        }
1213
1214        /*
1215         * Don't enable checksum offload.  In practice, it interferes with
1216         * tftp booting on at least the 82575.
1217         */
1218        csr32w(ctlr, Rxcsum, 0);
1219}
1220
1221static int i82563rim(void *ctlr)
1222{
1223        return ((struct ctlr *)ctlr)->rim != 0;
1224}
1225
1226/*
1227 * With no errors and the Ixsm bit set,
1228 * the descriptor status Tpcs and Ipcs bits give
1229 * an indication of whether the checksums were
1230 * calculated and valid.
1231 *
1232 * Must be called with rd->errors == 0.
1233 */
1234static void ckcksums(struct ctlr *ctlr, struct rd *rd, struct block *bp)
1235{
1236        if (0) {
1237                if (rd->status & Ixsm)
1238                        return;
1239                ctlr->ixsm++;
1240                if (rd->status & Ipcs) {
1241                        /*
1242                         * IP checksum calculated (and valid as errors == 0).
1243                         */
1244                        ctlr->ipcs++;
1245                        bp->flag |= Bipck;
1246                }
1247                if (rd->status & Tcpcs) {
1248                        /*
1249                         * TCP/UDP checksum calculated (and valid as errors ==
1250                         * 0).
1251                         */
1252                        ctlr->tcpcs++;
1253                        bp->flag |= Btcpck | Budpck;
1254                }
1255                bp->flag |= Bpktck;
1256        }
1257}
1258
1259static void i82563rproc(void *arg)
1260{
1261        struct rd *rd;
1262        struct block *bp;
1263        struct ctlr *ctlr;
1264        int rdh, rim, passed;
1265        struct ether *edev;
1266
1267        edev = arg;
1268        ctlr = edev->ctlr;
1269        i82563rxinit(ctlr);
1270        csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
1271
1272        /*
1273         * TODO(dcross): Work references to ctlrtab into this code.
1274         */
1275        if (ctlr->type == i210)
1276                csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Qenable);
1277
1278        for (;;) {
1279                i82563replenish(ctlr);
1280                i82563im(ctlr, Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1281                ctlr->rsleep++;
1282                rendez_sleep(&ctlr->rrendez, i82563rim, ctlr);
1283
1284                rdh = ctlr->rdh;
1285                passed = 0;
1286                for (;;) {
1287                        rim = ctlr->rim;
1288                        ctlr->rim = 0;
1289                        rd = &ctlr->rdba[rdh];
1290                        if (!(rd->status & Rdd))
1291                                break;
1292
1293                        /*
1294                         * Accept eop packets with no errors.
1295                         */
1296                        bp = ctlr->rb[rdh];
1297                        if ((rd->status & Reop) && rd->errors == 0) {
1298                                bp->wp += rd->length;
1299                                bp->lim = bp->wp; /* lie like a dog. */
1300                                if (0)
1301                                        ckcksums(ctlr, rd, bp);
1302                                etheriq(edev, bp, 1); /* pass pkt upstream */
1303                                passed++;
1304                        } else {
1305                                if (rd->status & Reop && rd->errors)
1306                                        printd("%s: input packet error %#ux\n",
1307                                               tname[ctlr->type], rd->errors);
1308                                freeb(bp);
1309                        }
1310                        ctlr->rb[rdh] = NULL;
1311
1312                        /* rd needs to be replenished to accept another pkt */
1313                        rd->status = 0;
1314                        ctlr->rdfree--;
1315                        ctlr->rdh = rdh = NEXT_RING(rdh, Nrd);
1316                        /*
1317                         * if number of rds ready for packets is too low,
1318                         * set up the unready ones.
1319                         */
1320                        if (ctlr->rdfree <= Nrd - 32 || (rim & Rxdmt0))
1321                                i82563replenish(ctlr);
1322                }
1323        }
1324}
1325
1326static int i82563lim(void *ctlr)
1327{
1328        return ((struct ctlr *)ctlr)->lim != 0;
1329}
1330
1331static int phynum(struct ctlr *ctlr)
1332{
1333        if (ctlr->phynum < 0)
1334                switch (ctlr->type) {
1335                case i82577:
1336                        //      case i82578:            /* not yet implemented
1337                        //      */
1338                case i82579:
1339                case i217:
1340                case i218:
1341                        ctlr->phynum = 2; /* pcie phy */
1342                        break;
1343                default:
1344                        ctlr->phynum = 1; /* gbe phy */
1345                        break;
1346                }
1347        return ctlr->phynum;
1348}
1349
1350static unsigned int phyread(struct ctlr *ctlr, int reg)
1351{
1352        unsigned int phy, i;
1353
1354        if (reg >= 32)
1355                iprint("phyread: reg %d >= 32\n", reg);
1356        csr32w(ctlr, Mdic,
1357               MDIrop | phynum(ctlr) << MDIpSHIFT | reg << MDIrSHIFT);
1358        phy = 0;
1359        for (i = 0; i < 64; i++) {
1360                phy = csr32r(ctlr, Mdic);
1361                if (phy & (MDIe | MDIready))
1362                        break;
1363                udelay(1);
1364        }
1365        if ((phy & (MDIe | MDIready)) != MDIready)
1366                return ~0;
1367        return phy & 0xffff;
1368}
1369
1370static unsigned int phywrite(struct ctlr *ctlr, int reg, uint16_t val)
1371{
1372        unsigned int phy, i;
1373
1374        if (reg >= 32)
1375                iprint("phyread: reg %d >= 32\n", reg);
1376        csr32w(ctlr, Mdic,
1377               MDIwop | phynum(ctlr) << MDIpSHIFT | reg << MDIrSHIFT | val);
1378        phy = 0;
1379        for (i = 0; i < 64; i++) {
1380                phy = csr32r(ctlr, Mdic);
1381                if (phy & (MDIe | MDIready))
1382                        break;
1383                udelay(1);
1384        }
1385        if ((phy & (MDIe | MDIready)) != MDIready)
1386                return ~0;
1387        return 0;
1388}
1389
1390static uint32_t kmrnread(struct ctlr *ctlr, uint32_t reg_addr)
1391{
1392        /* write register address */
1393        csr32w(ctlr, Kumctrlsta,
1394               ((reg_addr << Kumctrlstaoffshift) & Kumctrlstaoff) |
1395                   Kumctrlstaren);
1396        udelay(2);
1397        /* read data */
1398        return csr32r(ctlr, Kumctrlsta);
1399}
1400
1401static void kmrnwrite(struct ctlr *ctlr, uint32_t reg_addr, uint16_t data)
1402{
1403        csr32w(ctlr, Kumctrlsta,
1404               ((reg_addr << Kumctrlstaoffshift) & Kumctrlstaoff) | data);
1405        udelay(2);
1406}
1407
1408/*
1409 * this is essentially black magic.  we blindly follow the incantations
1410 * prescribed by the god Intel:
1411 *
1412 * On ESB2, the MAC-to-PHY (Kumeran) interface must be configured after
1413 * link is up before any traffic is sent.
1414 *
1415 * workaround DMA unit hang on I218
1416 *
1417 * At 1Gbps link speed, one of the MAC's internal clocks can be stopped
1418 * for up to 4us when entering K1 (a power mode of the MAC-PHY
1419 * interconnect).  If the MAC is waiting for completion indications for 2
1420 * DMA write requests into Host memory (e.g.  descriptor writeback or Rx
1421 * packet writing) and the indications occur while the clock is stopped,
1422 * both indications will be missed by the MAC, causing the MAC to wait
1423 * for the completion indications and be unable to generate further DMA
1424 * write requests.  This results in an apparent hardware hang.
1425 *
1426 * Work-around the bug by disabling the de-assertion of the clock request
1427 * when 1Gbps link is acquired (K1 must be disabled while doing this).
1428 * Also, set appropriate Tx re-transmission timeouts for 10 and 100-half
1429 * link speeds to avoid Tx hangs.
1430 */
1431static void k1fix(struct ctlr *ctlr)
1432{
1433        int txtmout; /* units of 10Ás */
1434        uint32_t fextnvm6, status;
1435        uint16_t reg;
1436        struct ether *edev;
1437
1438        edev = ctlr->edev;
1439        fextnvm6 = csr32r(ctlr, Fextnvm6);
1440        status = csr32r(ctlr, Status);
1441        /* status speed bits are different on 217/8 than earlier ctlrs */
1442        if (edev->link && status & (Sspeed1000 >> 2)) {
1443                reg = kmrnread(ctlr, Kumctrlstak1cfg);
1444                kmrnwrite(ctlr, Kumctrlstak1cfg, reg & ~Kumctrlstak1enable);
1445                udelay(10);
1446                csr32w(ctlr, Fextnvm6, fextnvm6 | Fextnvm6reqpllclk);
1447                kmrnwrite(ctlr, Kumctrlstak1cfg, reg);
1448                ctlr->didk1fix = 1;
1449                return;
1450        }
1451        /* else uncommon cases */
1452
1453        fextnvm6 &= ~Fextnvm6reqpllclk;
1454        /*
1455         * 217 manual claims not to have Frcdplx bit in status;
1456         * 218 manual just omits the non-phy registers.
1457         */
1458        if (!edev->link || (status & (Sspeed100 >> 2 | Frcdplx)) ==
1459                               (Sspeed100 >> 2 | Frcdplx)) {
1460                csr32w(ctlr, Fextnvm6, fextnvm6);
1461                ctlr->didk1fix = 1;
1462                return;
1463        }
1464
1465        /* access other page via phy addr 1 reg 31, then access reg 16-30 */
1466        phywrite(ctlr, Phypage, I217inbandctlpage << 5);
1467        reg = phyread(ctlr, I217inbandctlreg) & ~I217inbandctllnkststxtmoutmask;
1468        if (status & (Sspeed100 >> 2)) { /* 100Mb/s half-duplex? */
1469                txtmout = 5;
1470                fextnvm6 &= ~Fextnvm6enak1entrycond;
1471        } else { /* 10Mb/s */
1472                txtmout = 50;
1473                fextnvm6 |= Fextnvm6enak1entrycond;
1474        }
1475        phywrite(ctlr, I217inbandctlreg,
1476                 reg | txtmout << I217inbandctllnkststxtmoutshift);
1477        csr32w(ctlr, Fextnvm6, fextnvm6);
1478        phywrite(ctlr, Phypage, 0 << 5); /* reset page to usual 0 */
1479        ctlr->didk1fix = 1;
1480}
1481
1482/*
1483 * watch for changes of link state
1484 */
1485static void i82563lproc(void *v)
1486{
1487        unsigned int phy, sp, a, phy79, prevlink;
1488        struct ctlr *ctlr;
1489        struct ether *edev;
1490
1491        edev = v;
1492        ctlr = edev->ctlr;
1493        phy79 = 0;
1494        switch (ctlr->type) {
1495        case i82579:
1496        case i82580:
1497        case i217:
1498        case i218:
1499        case i350:
1500                phy79 = 1;
1501                break;
1502        }
1503        /*
1504         * TODO(dcross): Extract PHY number from ctlrtab.
1505         */
1506        if (ctlr->type == i82573 && phyread(ctlr, Phyier) != ~0) {
1507                phy = phyread(ctlr, Phyier);
1508                phywrite(ctlr, Phyier, phy | Lscie | Ancie | Spdie | Panie);
1509        } else if (phy79 && phyread(ctlr, Phyier218) != ~0) {
1510                phy = phyread(ctlr, Phyier218);
1511                phywrite(ctlr, Phyier218, phy | Lscie218 | Ancie218 | Spdie218);
1512        }
1513        prevlink = 0;
1514        for (;;) {
1515                a = 0;
1516                phy = phyread(ctlr, phy79 ? Phystat : Physsr);
1517                if (phy == ~0)
1518                        goto next;
1519                if (phy79) {
1520                        sp = (phy >> 8) & 3;
1521                        // a = phy & (ctlr->type == i218? Anfs: Ans);
1522                        a = phy & Anfs;
1523                } else {
1524                        sp = (phy >> 14) & 3;
1525                        switch (ctlr->type) {
1526                        case i82563:
1527                        case i210:
1528                                a = phyread(ctlr, Phyisr) & Ane; /* a-n error */
1529                                break;
1530                        case i82571:
1531                        case i82572:
1532                        case i82575:
1533                        case i82576:
1534                                a = phyread(ctlr, Phylhr) & Anf; /* a-n fault */
1535                                sp = (sp - 1) & 3;
1536                                break;
1537                        }
1538                }
1539                if (a) /* enable & restart autoneg */ /* enable & restart
1540                                                         autoneg */
1541                        phywrite(ctlr, Phyctl,
1542                                 phyread(ctlr, Phyctl) | Ran | Ean);
1543                edev->link = (phy & (phy79 ? Link : Rtlink)) != 0;
1544                if (edev->link) {
1545                        ctlr->speeds[sp]++;
1546                        if (speedtab[sp])
1547                                edev->mbps = speedtab[sp];
1548                        if (prevlink == 0 && ctlr->type == i218)
1549                                k1fix(ctlr); /* link newly up: kludge away */
1550                        netif_carrier_on(edev);
1551                } else
1552                        ctlr->didk1fix = 0; /* force fix at next link up */
1553                prevlink = edev->link;
1554        next:
1555                ctlr->lim = 0;
1556                i82563im(ctlr, Lsc);
1557                ctlr->lsleep++;
1558                rendez_sleep(&ctlr->lrendez, i82563lim, ctlr);
1559        }
1560}
1561
1562static int return0(void *unused_void_p)
1563{
1564        return 0;
1565}
1566
1567static void i82563tproc(void *v)
1568{
1569        struct ether *edev;
1570        struct ctlr *ctlr;
1571
1572        edev = v;
1573        ctlr = edev->ctlr;
1574        for (;;) {
1575                rendez_sleep(&ctlr->trendez, return0, 0);
1576                i82563transmit(edev);
1577        }
1578}
1579
1580/*
1581 * controller is buggered; shock it back to life.
1582 */
1583static void restart(struct ctlr *ctlr)
1584{
1585        if (0) {
1586                static spinlock_t rstlock;
1587
1588                qlock(&ctlr->tlock);
1589                spin_lock_irqsave(&rstlock);
1590                iprint("#l%d: resetting...", ctlr->edev->ctlrno);
1591                i82563reset(ctlr);
1592                /* [rt]xinit reset the ring indices */
1593                i82563txinit(ctlr);
1594                i82563rxinit(ctlr);
1595                csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
1596                spin_unlock_irqsave(&rstlock);
1597                qunlock(&ctlr->tlock);
1598                iprint("reset\n");
1599        }
1600}
1601
1602static void freemem(struct ctlr *ctlr)
1603{
1604        kfree(ctlr->tb);
1605        ctlr->tb = NULL;
1606        kfree(ctlr->rb);
1607        ctlr->rb = NULL;
1608        kfree(ctlr->tdba);
1609        ctlr->tdba = NULL;
1610        kfree(ctlr->rdba);
1611        ctlr->rdba = NULL;
1612}
1613
1614static void i82563attach(struct ether *edev)
1615{
1616        ERRSTACK(2);
1617        int i;
1618        struct block *bp;
1619        struct ctlr *ctlr;
1620        char *lname, *rname, *tname;
1621
1622        ctlr = edev->ctlr;
1623        qlock(&ctlr->alock);
1624
1625        if (ctlr->attached) {
1626                qunlock(&ctlr->alock);
1627                return;
1628        }
1629
1630        if (waserror()) {
1631                freemem(ctlr);
1632                qunlock(&ctlr->alock);
1633                nexterror();
1634        }
1635
1636        ctlr->alloc = kzmalloc(
1637            Nrd * sizeof(struct rd) + Ntd * sizeof(struct td) + 255, MEM_WAIT);
1638        if (ctlr->alloc == NULL) {
1639                qunlock(&ctlr->alock);
1640                error(ENOMEM, "i82563attach: error allocating rx/tx rings");
1641        }
1642        ctlr->rdba = (struct rd *)ROUNDUP((uintptr_t)ctlr->alloc, 256);
1643        ctlr->tdba = (struct td *)(ctlr->rdba + Nrd);
1644        ctlr->rb = kzmalloc(Nrd * sizeof(struct block *), 0);
1645        ctlr->tb = kzmalloc(Ntd * sizeof(struct block *), 0);
1646        if (ctlr->rb == NULL || ctlr->tb == NULL) {
1647                qunlock(&ctlr->alock);
1648                error(ENOMEM, "i82563attach: error allocating rx/tx buffers");
1649        }
1650
1651        ctlr->edev = edev; /* point back to Ether* */
1652        ctlr->attached = 1;
1653
1654        lname = kzmalloc(KNAMELEN, MEM_WAIT);
1655        snprintf(lname, KNAMELEN, "#l%dl", edev->ctlrno);
1656        ktask(lname, i82563lproc, edev);
1657
1658        rname = kzmalloc(KNAMELEN, MEM_WAIT);
1659        snprintf(rname, KNAMELEN, "#l%dr", edev->ctlrno);
1660        ktask(rname, i82563rproc, edev);
1661
1662        tname = kzmalloc(KNAMELEN, MEM_WAIT);
1663        snprintf(tname, KNAMELEN, "#l%dt", edev->ctlrno);
1664        ktask(tname, i82563tproc, edev);
1665
1666        i82563txinit(ctlr);
1667
1668        qunlock(&ctlr->alock);
1669        poperror();
1670}
1671
1672static void i82563interrupt(struct hw_trapframe *unused_hw_trapframe, void *arg)
1673{
1674        struct ctlr *ctlr;
1675        struct ether *edev;
1676        int icr, im, i, loops;
1677
1678        edev = arg;
1679        ctlr = edev->ctlr;
1680        spin_lock_irqsave(&ctlr->imlock);
1681        csr32w(ctlr, Imc, ~0);
1682        im = ctlr->im;
1683        loops = 0;
1684        i = Nrd; /* don't livelock */
1685        for (icr = csr32r(ctlr, Icr); icr & ctlr->im && i-- > 0;
1686             icr = csr32r(ctlr, Icr)) {
1687                loops++;
1688                if (icr & Lsc) {
1689                        im &= ~Lsc;
1690                        ctlr->lim = icr & Lsc;
1691                        rendez_wakeup(&ctlr->lrendez);
1692                        ctlr->lintr++;
1693                }
1694                if (icr & (Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack)) {
1695                        ctlr->rim = icr & (Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1696                        im &= ~(Rxt0 | Rxo | Rxdmt0 | Rxseq | Ack);
1697                        rendez_wakeup(&ctlr->rrendez);
1698                        ctlr->rintr++;
1699                }
1700                if (icr & Txdw) {
1701                        im &= ~Txdw;
1702                        ctlr->tintr++;
1703                        rendez_wakeup(&ctlr->trendez);
1704                }
1705        }
1706        ctlr->im = im;
1707        csr32w(ctlr, Ims, im);
1708        spin_unlock_irqsave(&ctlr->imlock);
1709}
1710
1711/* assume misrouted interrupts and check all controllers */
1712static void i82575interrupt(struct hw_trapframe *unused_hw_trapframe,
1713                            void *unused_arg)
1714{
1715        struct ctlr *ctlr;
1716
1717        for (ctlr = i82563ctlrhead; ctlr != NULL && ctlr->edev != NULL;
1718             ctlr = ctlr->next)
1719                i82563interrupt(NULL, ctlr->edev);
1720}
1721
1722static int i82563detach0(struct ctlr *ctlr)
1723{
1724        int r, timeo;
1725
1726        /*
1727         * Perform a device reset to get the chip back to the
1728         * power-on state, followed by an EEPROM reset to read
1729         * the defaults for some internal registers.
1730         */
1731        csr32w(ctlr, Imc, ~0);
1732        csr32w(ctlr, Rctl, 0);
1733        csr32w(ctlr, Tctl, 0);
1734
1735        udelay(1000 * 1000);
1736
1737        /*
1738         * Balance Rx/Tx packet buffer.
1739         * No need to set PBA register unless using jumbo, defaults to 32KB
1740         * for receive. If it is changed, then have to do a MAC reset,
1741         * and need to do that at the the right time as it will wipe stuff.
1742         *
1743         * TODO(dcross): reconcile the following code with the above commentary.
1744         */
1745        if (0) {
1746                if (ctlr->rbsz > 8192 && ctlrtab[ctlr->type].flag & Fpba) {
1747                        ctlr->pba = csr32r(ctlr, Pba);
1748                        r = ctlr->pba >> 16;
1749                        r += ctlr->pba & 0xffff;
1750                        r >>= 1;
1751                        csr32w(ctlr, Pba, r);
1752                } else if (ctlr->type == i82573 && ctlr->rbsz > 1514)
1753                        csr32w(ctlr, Pba, 14);
1754        }
1755        ctlr->pba = csr32r(ctlr, Pba);
1756
1757        /* set packet buffer size if present.  no effect until soft reset. */
1758        switch (ctlr->type) {
1759        case i82566:
1760        case i82567:
1761        case i217:
1762                ctlr->pbs = 16; /* in KB */
1763                csr32w(ctlr, Pbs, ctlr->pbs);
1764                break;
1765        case i218:
1766                // after pxe or 9fat boot, pba is always 0xe0012 on i218 => 32K
1767                ctlr->pbs = (ctlr->pba >> 16) + (uint16_t)ctlr->pba;
1768                csr32w(ctlr, Pbs, ctlr->pbs);
1769                break;
1770        }
1771
1772        r = csr32r(ctlr, Ctrl);
1773        if (ctlr->type == i82566 || ctlr->type == i82567 ||
1774            ctlr->type == i82579)
1775                r |= Phyrst;
1776        csr32w(ctlr, Ctrl, Devrst | r);
1777        udelay(1000);
1778        for (timeo = 0; timeo < 1000; timeo++) {
1779                if (!(csr32r(ctlr, Ctrl) & Devrst))
1780                        break;
1781                udelay(1000);
1782        }
1783        if (csr32r(ctlr, Ctrl) & Devrst)
1784                return -1;
1785
1786        r = csr32r(ctlr, Ctrlext);
1787        csr32w(ctlr, Ctrlext, r | Eerst);
1788        udelay(1000);
1789        for (timeo = 0; timeo < 1000; timeo++) {
1790                if (!(csr32r(ctlr, Ctrlext) & Eerst))
1791                        break;
1792                udelay(1000);
1793        }
1794        if (csr32r(ctlr, Ctrlext) & Eerst)
1795                return -1;
1796
1797        csr32w(ctlr, Imc, ~0);
1798        udelay(1000);
1799        for (timeo = 0; timeo < 1000; timeo++) {
1800                if (!csr32r(ctlr, Icr))
1801                        break;
1802                udelay(1000);
1803        }
1804        if (csr32r(ctlr, Icr))
1805                return -1;
1806
1807        csr32w(ctlr, Ctrl, Slu | csr32r(ctlr, Ctrl));
1808        return 0;
1809}
1810
1811static int i82563detach(struct ctlr *ctlr)
1812{
1813        int r;
1814        static spinlock_t detlck;
1815
1816        spin_lock_irqsave(&detlck);
1817        r = i82563detach0(ctlr);
1818        spin_unlock_irqsave(&detlck);
1819        return r;
1820}
1821
1822static void i82563shutdown(struct ether *ether)
1823{
1824        i82563detach(ether->ctlr);
1825}
1826
1827static uint16_t eeread(struct ctlr *ctlr, int adr)
1828{
1829        uint32_t n;
1830
1831        csr32w(ctlr, Eerd, EEstart | adr << 2);
1832        n = 1000000;
1833        while (n > 0 && (csr32r(ctlr, Eerd) & EEdone) == 0)
1834                n--;
1835        if (n == 0)
1836                panic("i82563: eeread stuck");
1837        return csr32r(ctlr, Eerd) >> 16;
1838}
1839
1840/* load eeprom into ctlr */
1841static int eeload(struct ctlr *ctlr)
1842{
1843        uint16_t sum;
1844        int data, adr;
1845
1846        sum = 0;
1847        for (adr = 0; adr < 0x40; adr++) {
1848                data = eeread(ctlr, adr);
1849                ctlr->eeprom[adr] = data;
1850                sum += data;
1851        }
1852        return sum;
1853}
1854
1855static int fcycle(struct ctlr *unused_ctlr_p, struct flash *f)
1856{
1857        uint16_t s, i;
1858
1859        s = f->reg[Fsts];
1860        if ((s & Fvalid) == 0)
1861                return -1;
1862        f->reg[Fsts] |= Fcerr | Ael;
1863        for (i = 0; i < 10; i++) {
1864                if ((s & Scip) == 0) /* spi cycle done? */
1865                        return 0;
1866                udelay(1000);
1867                s = f->reg[Fsts];
1868        }
1869        return -1;
1870}
1871
1872static int fread(struct ctlr *ctlr, struct flash *f, int ladr)
1873{
1874        uint16_t s;
1875        uint32_t n;
1876
1877        udelay(1000);
1878        if (fcycle(ctlr, f) == -1)
1879                return -1;
1880        f->reg[Fsts] |= Fdone;
1881        f->reg32[Faddr] = ladr;
1882
1883        /* setup flash control register */
1884        s = f->reg[Fctl] & ~(0x1f << 8);
1885        s |= (2 - 1) << 8;   /* 2 bytes */
1886        s &= ~(2 * Flcycle); /* read */
1887        f->reg[Fctl] = s | Fgo;
1888
1889        n = 1000000;
1890        while (n > 0 && (f->reg[Fsts] & Fdone) == 0)
1891                n--;
1892        if (n == 0)
1893                panic("i82563: fread stuck");
1894        if (f->reg[Fsts] & (Fcerr | Ael))
1895                return -1;
1896        return f->reg32[Fdata] & 0xffff;
1897}
1898
1899/* load flash into ctlr */
1900static int fload(struct ctlr *ctlr)
1901{
1902        uint32_t data, r, adr;
1903        uint16_t sum;
1904        uintptr_t mmio_paddr;
1905        struct pci_device *pcidev = ctlr->pcidev;
1906        struct flash f;
1907
1908        mmio_paddr = pcidev->bar[1].mmio_base32 ? pcidev->bar[1].mmio_base32
1909                                                : pcidev->bar[1].mmio_base64;
1910        f.reg = (void *)vmap_pmem(mmio_paddr, pcidev->bar[1].mmio_sz);
1911        if (f.reg == NULL)
1912                return -1;
1913        f.reg32 = (void *)f.reg;
1914        f.base = f.reg32[Bfpr] & 0x1fff;
1915        f.lim = (f.reg32[Bfpr] >> 16) & 0x1fff;
1916        if (csr32r(ctlr, Eec) & Sec1val)
1917                f.base += (f.lim + 1 - f.base) >> 1;
1918        r = f.base << 12;
1919
1920        sum = 0;
1921        for (adr = 0; adr < 0x40; adr++) {
1922                data = fread(ctlr, &f, r + adr * 2);
1923                if (data == -1)
1924                        break;
1925                ctlr->eeprom[adr] = data;
1926                sum += data;
1927        }
1928        vunmap_vmem((uintptr_t)f.reg, pcidev->bar[1].mmio_sz);
1929        return sum;
1930}
1931
1932static int i82563reset(struct ctlr *ctlr)
1933{
1934        int i, r, type;
1935
1936        /*
1937         * TODO(dcross): Integrate ctlrtab references into this code.
1938         */
1939        if (i82563detach(ctlr)) {
1940                iprint("82563 reset: detach failed\n");
1941                return -1;
1942        }
1943        type = ctlr->type;
1944        if (ctlr->ra[Eaddrlen - 1] != 0)
1945                goto macset;
1946        switch (type) {
1947        case i82566:
1948        case i82567:
1949        case i82577:
1950                //  case i82578:            /* not yet implemented */
1951        case i82579:
1952        case i217:
1953        case i218:
1954                r = fload(ctlr);
1955                break;
1956        default:
1957                r = eeload(ctlr);
1958                break;
1959        }
1960        if (r != 0 && r != 0xBABA) {
1961                printd("%s: bad EEPROM checksum - %#.4ux\n", tname[type], r);
1962                // return -1;
1963        }
1964
1965        /* set mac addr */
1966        for (i = 0; i < Eaddrlen / 2; i++) {
1967                ctlr->ra[2 * i] = ctlr->eeprom[Ea + i];
1968                ctlr->ra[2 * i + 1] = ctlr->eeprom[Ea + i] >> 8;
1969        }
1970        /* ea ctlr[1] = ea ctlr[0]+1 */
1971        ctlr->ra[5] += (csr32r(ctlr, Status) & Lanid) >> 2;
1972        /*
1973         * zero other mac addresses.`
1974         * AV bits should be zeroed by master reset & there may only be 11
1975         * other registers on e.g., the i217.
1976         */
1977        for (i = 1; i < 12; i++) { /* `12' used to be `16' here */
1978                csr32w(ctlr, Ral + i * 8, 0);
1979                csr32w(ctlr, Rah + i * 8, 0);
1980        }
1981        memset(ctlr->mta, 0, sizeof(ctlr->mta));
1982macset:
1983        /* low mac addr */
1984        csr32w(ctlr, Ral,
1985               ctlr->ra[3] << 24 | ctlr->ra[2] << 16 | ctlr->ra[1] << 8 |
1986                   ctlr->ra[0]);
1987        /* address valid | high mac addr */
1988        csr32w(ctlr, Rah, 0x80000000 | ctlr->ra[5] << 8 | ctlr->ra[4]);
1989
1990        /* populate multicast table */
1991        for (i = 0; i < mcbitstolongs(mcastbits(ctlr)); i++)
1992                csr32w(ctlr, Mta + i * 4, ctlr->mta[i]);
1993
1994        /*
1995         * Does autonegotiation affect this manual setting?
1996         * The correct values here should depend on the PBA value
1997         * and maximum frame length, no?
1998         */
1999        /* fixed flow control ethernet address 0x0180c2000001 */
2000        csr32w(ctlr, Fcal, 0x00C28001);
2001        csr32w(ctlr, Fcah, 0x0100);
2002        if (type != i82579 && type != i210 && type != i217 && type != i218)
2003                /* flow control type, dictated by Intel */
2004                csr32w(ctlr, Fct, 0x8808);
2005        csr32w(ctlr, Fcttv, 0x0100); /* for XOFF frame */
2006        // ctlr->fcrtl = 0x00002000;        /* rcv low water mark: 8KB */
2007        /* rcv high water mark: 16KB, < rcv buffer in PBA & RXA */
2008        // ctlr->fcrth = 0x00004000;
2009        ctlr->fcrtl = ctlr->fcrth = 0;
2010        csr32w(ctlr, Fcrtl, ctlr->fcrtl);
2011        csr32w(ctlr, Fcrth, ctlr->fcrth);
2012        return 0;
2013}
2014
2015static void i82563pci(void)
2016{
2017        int type;
2018        uintptr_t io;
2019        void *mem;
2020        struct pci_device *p;
2021        struct ctlr *ctlr;
2022
2023        p = NULL;
2024        STAILQ_FOREACH (p, &pci_devices, all_dev) {
2025                if (p->ven_id != 0x8086)
2026                        continue;
2027                switch (p->dev_id) {
2028                default:
2029                        continue;
2030                case 0x1096:
2031                case 0x10ba:
2032                case 0x1098: /* serdes; not seen */
2033                case 0x10bb: /* serdes */
2034                        type = i82563;
2035                        break;
2036                case 0x1049: /* mm */
2037                case 0x104a: /* dm */
2038                case 0x104b: /* dc */
2039                case 0x104d: /* mc */
2040                case 0x10bd: /* dm */
2041                case 0x294c: /* dc-2 */
2042                        type = i82566;
2043                        break;
2044                case 0x10de: /* lm-3 */
2045                case 0x10df: /* lf ich10 */
2046                case 0x10e5: /* lm ich9 */
2047                case 0x10f5: /* lm-2 */
2048                        type = i82567;
2049                        break;
2050                case 0x10bf: /* lf ich9m */
2051                case 0x10cb: /* v ich9m */
2052                case 0x10cd: /* lf ich10 */
2053                case 0x10ce: /* v ich10 */
2054                case 0x10cc: /* lm ich10 */
2055                        type = i82567m;
2056                        break;
2057                case 0x105e: /* eb */
2058                case 0x105f: /* eb */
2059                case 0x1060: /* eb */
2060                case 0x10a4: /* eb */
2061                case 0x10a5: /* eb  fiber */
2062                case 0x10bc: /* eb */
2063                case 0x10d9: /* eb serdes */
2064                case 0x10da: /* eb serdes “ophir” */
2065                        type = i82571;
2066                        break;
2067                case 0x107d: /* eb copper */
2068                case 0x107e: /* ei fiber */
2069                case 0x107f: /* ei */
2070                case 0x10b9: /* sic, 82572gi */
2071                        type = i82572;
2072                        break;
2073                case 0x108b: /*  v */
2074                case 0x108c: /*  e (iamt) */
2075                case 0x109a: /*  l */
2076                        type = i82573;
2077                        break;
2078                case 0x10d3: /* l */
2079                        type = i82574;
2080                        break;
2081                case 0x10a7: /* 82575eb: one of a pair of controllers */
2082                case 0x10a9: /* fiber/serdes */
2083                        type = i82575;
2084                        break;
2085                case 0x10c9: /* 82576 copper */
2086                case 0x10e6: /* 82576 fiber */
2087                case 0x10e7: /* 82576 serdes */
2088                case 0x150d: /* backplane */
2089                        type = i82576;
2090                        break;
2091                case 0x10ea: /* 82577lm */
2092                        type = i82577;
2093                        break;
2094                case 0x10eb: /* lm “calpella” */
2095                        type = i82577m;
2096                        break;
2097                case 0x1502: /* 82579lm */
2098                case 0x1503: /* 82579v */
2099                        type = i82579;
2100                        break;
2101                case 0x10f0: /* dm “king's creek” */
2102                        type = i82578m;
2103                        break;
2104                case 0x150e: /* “barton hills” */
2105                case 0x150f: /* fiber */
2106                case 0x1510: /* backplane */
2107                case 0x1511: /* sfp */
2108                case 0x1516:
2109                        type = i82580;
2110                        break;
2111                case 0x1506: /* v */
2112                        type = i82583;
2113                        break;
2114                case 0x1533: /* i210-t1 */
2115                case 0x1534: /* i210 */
2116                case 0x1536: /* i210-fiber */
2117                case 0x1537: /* i210-backplane */
2118                case 0x1538:
2119                case 0x1539: /* i211 */
2120                case 0x157b: /* i210 */
2121                case 0x157c: /* i210 */
2122                        type = i210;
2123                        break;
2124                case 0x153a: /* i217-lm */
2125                case 0x153b: /* i217-v */
2126                        type = i217;
2127                        break;
2128                case 0x15a0: /* i218-lm */
2129                case 0x15a1: /* i218-v */
2130                case 0x15a2: /* i218-lm */
2131                case 0x15a3: /* i218-v */
2132                        type = i218;
2133                        break;
2134                case 0x151f: /* “powerville” eeprom-less */
2135                case 0x1521: /* copper */
2136                case 0x1522: /* fiber */
2137                case 0x1523: /* serdes */
2138                case 0x1524: /* sgmii */
2139                        type = i350;
2140                        break;
2141                }
2142
2143                io = p->bar[0].mmio_base32 ? p->bar[0].mmio_base32
2144                                           : p->bar[0].mmio_base64;
2145                mem = (void *)vmap_pmem(io, p->bar[0].mmio_sz);
2146                if (mem == NULL) {
2147                        printd("%s: can't map %.8lux\n", tname[type], io);
2148                        continue;
2149                }
2150                ctlr = kzmalloc(sizeof(struct ctlr), 0);
2151                if (ctlr == NULL) {
2152                        vunmap_vmem((uintptr_t)mem, p->bar[0].mmio_sz);
2153                        error(ENOMEM, "i82563pci: alloc for ctlr failed");
2154                }
2155                ctlr->mmio_paddr = io;
2156                ctlr->rbsz = ctlrtab[type].mtu;
2157                ctlr->pcidev = p;
2158                ctlr->type = type;
2159                ctlr->nic = mem;
2160                ctlr->phynum = -1; /* not yet known */
2161
2162                qlock_init(&ctlr->alock);
2163                spinlock_init_irqsave(&ctlr->imlock);
2164                rendez_init(&ctlr->lrendez);
2165                qlock_init(&ctlr->slock);
2166                rendez_init(&ctlr->rrendez);
2167                rendez_init(&ctlr->trendez);
2168                qlock_init(&ctlr->tlock);
2169
2170                pci_set_bus_master(p);
2171                if (i82563reset(ctlr)) {
2172                        vunmap_vmem((uintptr_t)mem, p->bar[0].mmio_sz);
2173                        kfree(ctlr);
2174                        continue;
2175                }
2176
2177                if (i82563ctlrhead != NULL)
2178                        i82563ctlrtail->next = ctlr;
2179                else
2180                        i82563ctlrhead = ctlr;
2181                i82563ctlrtail = ctlr;
2182        }
2183}
2184
2185static int pnp(struct ether *edev, int type)
2186{
2187        struct ctlr *ctlr;
2188        static int done;
2189
2190        if (!done) {
2191                i82563pci();
2192                done = 1;
2193        }
2194
2195        /*
2196         * Any adapter matches if no edev->port is supplied,
2197         * otherwise the ports must match.
2198         */
2199        for (ctlr = i82563ctlrhead; ctlr != NULL; ctlr = ctlr->next) {
2200                if (ctlr->active)
2201                        continue;
2202                if (type != Iany && ctlr->type != type)
2203                        continue;
2204                if (edev->port == 0 || edev->port == ctlr->mmio_paddr) {
2205                        ctlr->active = 1;
2206                        break;
2207                }
2208        }
2209        if (ctlr == NULL)
2210                return -1;
2211
2212        edev->ctlr = ctlr;
2213        strlcpy(edev->drv_name, "i82563", KNAMELEN);
2214        ctlr->edev = edev; /* point back to Ether* */
2215        edev->port = ctlr->mmio_paddr;
2216        edev->irq = ctlr->pcidev->irqline;
2217        edev->tbdf = pci_to_tbdf(ctlr->pcidev);
2218        edev->mbps = 1000;
2219        edev->max_mtu = ctlr->rbsz - ETHERHDRSIZE;
2220        edev->mtu = edev->mtu;
2221        memmove(edev->ea, ctlr->ra, Eaddrlen);
2222        /* Jim or whoever have this turned on already.  We might be capable of
2223         * other features. */
2224        edev->feat = NETF_RXCSUM;
2225
2226        /*
2227         * Linkage to the generic ethernet driver.
2228         */
2229        edev->attach = i82563attach;
2230        edev->transmit = i82563transmit;
2231        edev->ifstat = i82563ifstat;
2232        edev->ctl = i82563ctl;
2233
2234        edev->arg = edev;
2235        edev->promiscuous = i82563promiscuous;
2236        edev->shutdown = i82563shutdown;
2237        edev->multicast = i82563multicast;
2238
2239        register_irq(edev->irq,
2240                     ctlr->type == i82575 ? i82575interrupt : i82563interrupt,
2241                     edev, edev->tbdf);
2242        return 0;
2243}
2244
2245static int anypnp(struct ether *e)
2246{
2247        return pnp(e, Iany);
2248}
2249
2250static int i82563pnp(struct ether *e)
2251{
2252        return pnp(e, i82563);
2253}
2254
2255static int i82566pnp(struct ether *e)
2256{
2257        return pnp(e, i82566);
2258}
2259
2260static int i82571pnp(struct ether *e)
2261{
2262        return pnp(e, i82571);
2263}
2264
2265static int i82572pnp(struct ether *e)
2266{
2267        return pnp(e, i82572);
2268}
2269
2270static int i82573pnp(struct ether *e)
2271{
2272        return pnp(e, i82573);
2273}
2274
2275static int i82575pnp(struct ether *e)
2276{
2277        return pnp(e, i82575);
2278}
2279
2280static int i82579pnp(struct ether *e)
2281{
2282        return pnp(e, i82579);
2283}
2284
2285static int i210pnp(struct ether *e)
2286{
2287        return pnp(e, i210);
2288}
2289
2290static int i217pnp(struct ether *e)
2291{
2292        return pnp(e, i217);
2293}
2294
2295static int i218pnp(struct ether *e)
2296{
2297        return pnp(e, i218);
2298}
2299
2300linker_func_3(ether82563link)
2301{
2302        /* recognise lots of model numbers for debugging assistance */
2303        addethercard("i82563", i82563pnp);
2304        addethercard("i82566", i82566pnp);
2305        addethercard("i82571", i82571pnp);
2306        addethercard("i82572", i82572pnp);
2307        addethercard("i82573", i82573pnp);
2308        addethercard("i82575", i82575pnp);
2309        addethercard("i82579", i82579pnp);
2310        addethercard("i210", i210pnp);
2311        addethercard("i217", i217pnp);
2312        addethercard("i218", i218pnp);
2313        addethercard("igbepcie", anypnp);
2314}
2315