akaros/kern/arch/x86/msi.c
<<
>>
Prefs
   1/*
   2 * This file is part of the UCB release of Plan 9. It is subject to the license
   3 * terms in the LICENSE file found in the top-level directory of this
   4 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
   5 * part of the UCB release of Plan 9, including this file, may be copied,
   6 * modified, propagated, or distributed except according to the terms contained
   7 * in the LICENSE file.
   8 */
   9
  10#include <slab.h>
  11#include <kmalloc.h>
  12#include <kref.h>
  13#include <string.h>
  14#include <stdio.h>
  15#include <assert.h>
  16#include <error.h>
  17#include <cpio.h>
  18#include <pmap.h>
  19#include <smp.h>
  20#include <net/ip.h>
  21
  22enum {
  23        Dpcicap         = 1<<0,
  24        Dmsicap         = 1<<1,
  25        Dvec            = 1<<2,
  26        Debug           = 0,
  27};
  28
  29enum {
  30/* MSI address format
  31 *
  32 * +31----------------------20+19----------12+11--------4+--3--+--2--+1---0+
  33 * |       0xfee              | Dest APIC ID |  Reserved | RH  | DM  |  XX |
  34 * +--------------------------+--------------+-----------+-----+-----+-----+
  35 *
  36 * RH: Redirection Hint
  37 * DM: Destinatio Mode
  38 * XX: Probably reserved, set to 0
  39 */
  40        Msiabase        = 0xfee00000u,
  41        Msiadest        = 1<<12,        /* same as 63:56 of apic vector */
  42        Msiaedest       = 1<<4,         /* same as 55:48 of apic vector */
  43        Msialowpri      = 1<<3,         /* redirection hint */
  44        Msialogical     = 1<<2,
  45
  46/* MSI data format
  47 * +63-------------------------------------------------------------------32+
  48 * |                          Reserved                                     |
  49 * +-------------------------------+-15-+-14-+--------+10----8+7----------0+
  50 * |          Reserved             | TM | Lv | Reserv | Dmode |   Vector   |
  51 * +-------------------------------+----+----+--------+-------+------------+
  52 *
  53 * Dmode: delivery mode (like APIC/LVT messages).  Usually 000 (Fixed).
  54 * TM: Trigger mode (0 Edge, 1 Level)
  55 * Lv: Level assert (0 Deassert, 1 Assert)
  56 *
  57 *
  58 * for more info, check intel's SDMv3 (grep message signal) */
  59        Msidlevel       = 1<<15,
  60        Msidassert      = 1<<14,
  61        Msidmode        = 1<<8,         /* 3 bits; delivery mode */
  62        Msidvector      = 0xff<<0,
  63};
  64
  65enum{
  66        /* msi capabilities */
  67        Vmask           = 1<<8, /* Vectors can be masked. Optional. */
  68        Cap64           = 1<<7, /* 64-bit addresses. Optional. */
  69        Mmesgmsk        = 7<<4, /* Mask for # of messages allowed. See 6.8.1.3*/
  70        Mmcap           = 7<<1, /* # of messages the function can support. */
  71        Msienable       = 1<<0, /* Enable. */
  72        /* msix capabilities */
  73        Msixenable      = 1<<15,
  74        Msixmask        = 1<<14,
  75        Msixtblsize     = 0x7ff,
  76};
  77
  78/* Find the offset in config space of this function of the msi capability.
  79 * It is defined in 6.8.1 and is variable-sized.  Returns 0 on failure. */
  80static int msicap(struct pci_device *p)
  81{
  82        return p->caps[PCI_CAP_ID_MSI];
  83}
  84
  85/* Find the offset in config space of this function of the msi-x capability.
  86 * It is defined in 6.8.1 and is variable-sized.
  87 */
  88static int msixcap(struct pci_device *p)
  89{
  90        return p->caps[PCI_CAP_ID_MSIX];
  91}
  92
  93static int msi_blacklist(struct pci_device *p)
  94{
  95        switch (p->ven_id << 16 | p->dev_id) {
  96                case 0x11ab << 16 | 0x6485:
  97                case 0x8086 << 16 | 0x100f:
  98                        return -1;
  99        }
 100        return 0;
 101}
 102
 103static int msix_blacklist(struct pci_device *p)
 104{
 105        switch (p->ven_id << 16 | p->dev_id) {
 106//      case 0x11ab << 16 | 0x6485:     /* placeholder */
 107                return -1;
 108        }
 109        return 0;
 110}
 111
 112static uint32_t msi_make_addr_lo(uint64_t vec)
 113{
 114        unsigned int dest, lopri, logical;
 115
 116        /* The destination is the traditional 8-bit APIC id is in 63:56 of the
 117         * vector.  Later we may need to deal with extra destination bits
 118         * (Msiaedest, in this code).  I haven't seen anything in the Intel SDM
 119         * about using Msiaedest (the bits are reserved) */
 120        dest = vec >> 56;
 121        /* lopri is rarely set, and intel doesn't recommend using it.  with msi,
 122         * the lopri field is actually a redirection hint, and also must be set
 123         * when sending logical messages. */
 124        lopri = (vec & 0x700) == MTlp;
 125        logical = (vec & Lm) != 0;
 126        if (logical)
 127                lopri = 1;
 128        return Msiabase | Msiadest * dest | Msialowpri * lopri |
 129               Msialogical * logical;
 130}
 131
 132static uint32_t msi_make_data(uint64_t vec)
 133{
 134        unsigned int deliv_mode;
 135
 136        deliv_mode = (vec >> 8) & 7;
 137        /* We can only specify the lower 16 bits of the MSI message, the rest
 138         * gets forced to 0 by the device.  MSI-X can use the full 32 bits.
 139         * We're assuming edge triggered here. */
 140        return Msidmode * deliv_mode | ((unsigned int)vec & 0xff);
 141}
 142
 143/* see section 6.8.1 of the pci spec. */
 144/* Set up a single function on a single device.
 145 * We need to take the vec, bust it up into bits,
 146 * and put parts of it in the msi address and parts
 147 * in the msi data.
 148 */
 149int pci_msi_enable(struct pci_device *p, uint64_t vec)
 150{
 151        unsigned int c, f, datao;
 152
 153        spin_lock_irqsave(&p->lock);
 154        if (p->msix_ready) {
 155                printk("MSI: MSI-X is already enabled, aborting\n");
 156                spin_unlock_irqsave(&p->lock);
 157                return -1;
 158        }
 159        if (p->msi_ready) {
 160                /* only allowing one enable of MSI per device (not supporting
 161                 * multiple vectors) */
 162                printk("MSI: MSI is already enabled, aborting\n");
 163                spin_unlock_irqsave(&p->lock);
 164                return -1;
 165        }
 166        p->msi_ready = TRUE;
 167
 168        /* Get the offset of the MSI capability in the function's config space.
 169         */
 170        c = msicap(p);
 171        if (!c) {
 172                spin_unlock_irqsave(&p->lock);
 173                return -1;
 174        }
 175
 176        /* read it, clear out the Mmesgmsk bits.
 177         * This means that there will be no multiple
 178         * messages enabled.
 179         */
 180        f = pcidev_read16(p, c + 2) & ~Mmesgmsk;
 181
 182        if (msi_blacklist(p) != 0) {
 183                spin_unlock_irqsave(&p->lock);
 184                return -1;
 185        }
 186
 187        /* Data begins at 8 bytes in. */
 188        datao = 8;
 189        p->msi_msg_addr_lo = msi_make_addr_lo(vec);
 190        printd("Write to %d %08lx \n",c + 4, p->msi_msg_addr_lo);
 191        pcidev_write32(p, c + 4, p->msi_msg_addr_lo);
 192
 193        /* And even if it's 64-bit capable, we do nothing with
 194         * the high order bits. If it is 64-bit we need to offset
 195         * datao (data offset) by 4 (i.e. another 32 bits)
 196         */
 197        if(f & Cap64){
 198                datao += 4;
 199                pcidev_write32(p, c + 8, 0);
 200        }
 201        p->msi_msg_addr_hi = 0;
 202
 203        p->msi_msg_data = msi_make_data(vec);
 204        printd("Write data %d %04x\n", c + datao, p->msi_msg_data);
 205        pcidev_write16(p, c + datao, p->msi_msg_data);
 206
 207        /* If we have the option of masking the vectors,
 208         * blow all the masks to 0. It's a 32-bit mask.
 209         */
 210        if(f & Vmask)
 211                pcidev_write32(p, c + datao + 4, 0);
 212
 213        /* Now write the control bits back, with the Mmesg mask (which is a
 214         * power of 2) set to 0 (meaning one vector only).  Note we still
 215         * haven't enabled MSI.  Will do that when we unmask.  According to the
 216         * spec, we're not supposed to use the Msienable bit to mask the IRQ,
 217         * though I don't see how we can mask on non-Vmask-supported HW. */
 218        printd("write @ %d %04lx\n",c + 2, f);
 219        pcidev_write16(p, c + 2, f);
 220        spin_unlock_irqsave(&p->lock);
 221        return 0;
 222}
 223
 224static void __msix_mask_entry(struct msix_entry *entry)
 225{
 226        uintptr_t reg = (uintptr_t)&entry->vector;
 227        write_mmreg32(reg, read_mmreg32(reg) | 0x1);
 228}
 229
 230static void __msix_unmask_entry(struct msix_entry *entry)
 231{
 232        uintptr_t reg = (uintptr_t)&entry->vector;
 233        write_mmreg32(reg, read_mmreg32(reg) & ~0x1);
 234}
 235
 236static uintptr_t msix_get_capbar_paddr(struct pci_device *p, int offset)
 237{
 238        uint32_t bir, capbar_off;
 239        uintptr_t membar;
 240
 241        bir = pcidev_read32(p, offset);
 242        capbar_off = bir & ~0x7;
 243        bir &= 0x7;
 244        membar = pci_get_membar(p, bir);
 245
 246        if (!membar) {
 247                printk("MSI-X: no cap membar, bir %d\n", bir);
 248                return 0;
 249        }
 250        membar += capbar_off;
 251        return membar;
 252}
 253
 254/* One time initialization of MSI-X for a PCI device.  -1 on error.  Otherwise,
 255 * the device will be ready to assign/route MSI-X entries/vectors.  All vectors
 256 * are masked, but the overall MSI-X function is unmasked.
 257 *
 258 * Hold the pci_device lock. */
 259static int __pci_msix_init(struct pci_device *p)
 260{
 261        unsigned int c;
 262        uint16_t f;
 263        int tbl_bir, tbl_off, pba_bir, pba_off;
 264        struct msix_entry *entry;
 265
 266        if (p->msix_ready)
 267                return 0;
 268        if (p->msi_ready) {
 269                printk("MSI-X: MSI is already on, aborting\n");
 270                return -1;
 271        }
 272        if (msix_blacklist(p) != 0)
 273                return -1;
 274        c = msixcap(p);
 275        if (c == 0)
 276                return -1;
 277        f = pcidev_read16(p, c + 2);
 278        /* enable and mask the entire function/all vectors */
 279        f |= Msixenable | Msixmask;
 280        pcidev_write16(p, c + 2, f);
 281
 282        p->msix_tbl_paddr = msix_get_capbar_paddr(p, c + 4);
 283        p->msix_pba_paddr = msix_get_capbar_paddr(p, c + 8);
 284        if (!p->msix_tbl_paddr || !p->msix_pba_paddr) {
 285                /* disable msix, so we can possibly use msi */
 286                pcidev_write16(p, c + 2, f & ~Msixenable);
 287                printk("MSI-X: Missing a tbl (%p) or PBA (%p) paddr!\n",
 288                       p->msix_tbl_paddr, p->msix_pba_paddr);
 289                return -1;
 290        }
 291        p->msix_nr_vec = (f & Msixtblsize) + 1;
 292        p->msix_tbl_vaddr = vmap_pmem_nocache(p->msix_tbl_paddr,
 293                                              p->msix_nr_vec *
 294                                              sizeof(struct msix_entry));
 295        if (!p->msix_tbl_vaddr) {
 296                pcidev_write16(p, c + 2, f & ~Msixenable);
 297                printk("MSI-X: unable to vmap the Table!\n");
 298                return -1;
 299        }
 300        p->msix_pba_vaddr = vmap_pmem_nocache(p->msix_pba_paddr,
 301                                              ROUNDUP(p->msix_nr_vec, 8) / 8);
 302        if (!p->msix_pba_vaddr) {
 303                pcidev_write16(p, c + 2, f & ~Msixenable);
 304                printk("MSI-X: unable to vmap the PBA!\n");
 305                vunmap_vmem(p->msix_tbl_paddr,
 306                            p->msix_nr_vec * sizeof(struct msix_entry));
 307                return -1;
 308        }
 309        /* they should all be masked already, but remasking just in case.
 310         * likewise, we need to 0 out the data, since we'll use the lower byte
 311         * later when determining if an msix vector is free or not. */
 312        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 313        for (int i = 0; i < p->msix_nr_vec; i++, entry++) {
 314                __msix_mask_entry(entry);
 315                write_mmreg32((uintptr_t)&entry->data, 0);
 316        }
 317        /* unmask the device, now that all the vectors are masked */
 318        f &= ~Msixmask;
 319        pcidev_write16(p, c + 2, f);
 320        p->msix_ready = TRUE;
 321        return 0;
 322}
 323
 324/* Some parts of msix init need to happen during boot.  Devices can call this
 325 * during their reset methods, and then later register their IRQs during attach.
 326 * Other OS's also alloc the vector around this time, though we'll hold off on
 327 * that for now. */
 328int pci_msix_init(struct pci_device *p)
 329{
 330        int ret;
 331        spin_lock_irqsave(&p->lock);
 332        ret = __pci_msix_init(p);
 333        spin_unlock_irqsave(&p->lock);
 334        return ret;
 335}
 336
 337/* Enables an MSI-X vector for a PCI device.  vec is formatted like an ioapic
 338 * route.  This should be able to handle multiple vectors for a device.  Returns
 339 * a msix_irq_vector linkage struct on success (the connection btw an irq_h and
 340 * the specific {pcidev, entry}), and 0 on failure. */
 341struct msix_irq_vector *pci_msix_enable(struct pci_device *p, uint64_t vec)
 342{
 343        int i;
 344        struct msix_entry *entry;
 345        struct msix_irq_vector *linkage;
 346        unsigned int c, datao;
 347
 348        spin_lock_irqsave(&p->lock);
 349        /* Ensure we're init'd.  We could remove this in the future, though not
 350         * everyone calls the extern pci_msix_init. */
 351        if (__pci_msix_init(p) < 0) {
 352                spin_unlock_irqsave(&p->lock);
 353                return 0;
 354        }
 355        /* find an unused slot (no apic_vector assigned).  later, we might want
 356         * to point back to the irq_hs for each entry.  not a big deal now. */
 357        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 358        for (i = 0; i < p->msix_nr_vec; i++, entry++)
 359                if (!(read_mmreg32((uintptr_t)&entry->data) & 0xff))
 360                        break;
 361        if (i == p->msix_nr_vec) {
 362                printk("[kernel] unable to alloc an MSI-X vector (bug?)\n");
 363                spin_unlock_irqsave(&p->lock);
 364                return 0;
 365        }
 366        linkage = kmalloc(sizeof(struct msix_irq_vector), MEM_WAIT);
 367        linkage->pcidev = p;
 368        linkage->entry = entry;
 369        linkage->addr_lo = msi_make_addr_lo(vec);
 370        linkage->addr_hi = 0;
 371        linkage->data = msi_make_data(vec);
 372        write_mmreg32((uintptr_t)&entry->data, linkage->data);
 373        write_mmreg32((uintptr_t)&entry->addr_lo, linkage->addr_lo);
 374        write_mmreg32((uintptr_t)&entry->addr_hi, linkage->addr_hi);
 375        spin_unlock_irqsave(&p->lock);
 376        return linkage;
 377}
 378
 379void pci_dump_msix_table(struct pci_device *p)
 380{
 381        struct msix_entry *entry;
 382        void *tbl = (void*)p->msix_tbl_vaddr;
 383
 384        hexdump(tbl, p->msix_nr_vec * sizeof(struct msix_entry));
 385        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 386        for (int i = 0; i < p->msix_nr_vec; i++, entry++)
 387                printk("Entry %d, addr hi:lo 0x%08x:%08x data 0x%08x\n", i,
 388                       entry->addr_hi, entry->addr_lo, entry->data);
 389}
 390
 391void pci_msi_mask(struct pci_device *p)
 392{
 393        unsigned int c, f;
 394
 395        c = msicap(p);
 396        assert(c);
 397
 398        spin_lock_irqsave(&p->lock);
 399        f = pcidev_read16(p, c + 2);
 400        pcidev_write16(p, c + 2, f & ~Msienable);
 401        spin_unlock_irqsave(&p->lock);
 402}
 403
 404void pci_msi_unmask(struct pci_device *p)
 405{
 406        unsigned int c, f;
 407
 408        c = msicap(p);
 409        assert(c);
 410
 411        spin_lock_irqsave(&p->lock);
 412        f = pcidev_read16(p, c + 2);
 413        pcidev_write16(p, c + 2, f | Msienable);
 414        spin_unlock_irqsave(&p->lock);
 415}
 416
 417void pci_msi_route(struct pci_device *p, int dest)
 418{
 419        unsigned int c, f;
 420
 421        c = msicap(p);
 422        assert(c);
 423
 424        spin_lock_irqsave(&p->lock);
 425        /* mask out the old destination, replace with new */
 426        p->msi_msg_addr_lo &= ~(((1 << 8) - 1) << 12);
 427        p->msi_msg_addr_lo |= (dest & 0xff) << 12;
 428        pcidev_write32(p, c + 4, p->msi_msg_addr_lo);
 429        spin_unlock_irqsave(&p->lock);
 430}
 431
 432void pci_msix_mask_vector(struct msix_irq_vector *linkage)
 433{
 434        spin_lock_irqsave(&linkage->pcidev->lock);
 435        __msix_mask_entry(linkage->entry);
 436        spin_unlock_irqsave(&linkage->pcidev->lock);
 437}
 438
 439void pci_msix_unmask_vector(struct msix_irq_vector *linkage)
 440{
 441        spin_lock_irqsave(&linkage->pcidev->lock);
 442        __msix_unmask_entry(linkage->entry);
 443        spin_unlock_irqsave(&linkage->pcidev->lock);
 444}
 445
 446void pci_msix_route_vector(struct msix_irq_vector *linkage, int dest)
 447{
 448        spin_lock_irqsave(&linkage->pcidev->lock);
 449        /* mask out the old destination, replace with new */
 450        linkage->addr_lo &= ~(((1 << 8) - 1) << 12);
 451        linkage->addr_lo |= (dest & 0xff) << 12;
 452        write_mmreg32((uintptr_t)&linkage->entry->addr_lo, linkage->addr_lo);
 453        spin_unlock_irqsave(&linkage->pcidev->lock);
 454}
 455