akaros/kern/arch/x86/msi.c
<<
>>
Prefs
   1/*
   2 * This file is part of the UCB release of Plan 9. It is subject to the license
   3 * terms in the LICENSE file found in the top-level directory of this
   4 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
   5 * part of the UCB release of Plan 9, including this file, may be copied,
   6 * modified, propagated, or distributed except according to the terms contained
   7 * in the LICENSE file.
   8 */
   9
  10#include <slab.h>
  11#include <kmalloc.h>
  12#include <kref.h>
  13#include <string.h>
  14#include <stdio.h>
  15#include <assert.h>
  16#include <error.h>
  17#include <cpio.h>
  18#include <pmap.h>
  19#include <smp.h>
  20#include <net/ip.h>
  21
  22enum {
  23        Dpcicap         = 1<<0,
  24        Dmsicap         = 1<<1,
  25        Dvec            = 1<<2,
  26        Debug           = 0,
  27};
  28
  29enum {
  30/* MSI address format
  31 *
  32 * +31----------------------20+19----------12+11--------4+--3--+--2--+1---0+
  33 * |       0xfee              | Dest APIC ID |  Reserved | RH  | DM  |  XX |
  34 * +--------------------------+--------------+-----------+-----+-----+-----+
  35 *
  36 * RH: Redirection Hint
  37 * DM: Destinatio Mode
  38 * XX: Probably reserved, set to 0
  39 */
  40        Msiabase        = 0xfee00000u,
  41        Msiadest        = 1<<12,        /* same as 63:56 of apic vector */
  42        Msiaedest       = 1<<4,         /* same as 55:48 of apic vector */
  43        Msialowpri      = 1<<3,         /* redirection hint */
  44        Msialogical     = 1<<2,
  45
  46/* MSI data format
  47 * +63-------------------------------------------------------------------32+
  48 * |                          Reserved                                     |
  49 * +-------------------------------+-15-+-14-+--------+10----8+7----------0+
  50 * |          Reserved             | TM | Lv | Reserv | Dmode |   Vector   |
  51 * +-------------------------------+----+----+--------+-------+------------+
  52 *
  53 * Dmode: delivery mode (like APIC/LVT messages).  Usually 000 (Fixed).
  54 * TM: Trigger mode (0 Edge, 1 Level)
  55 * Lv: Level assert (0 Deassert, 1 Assert)
  56 *
  57 *
  58 * for more info, check intel's SDMv3 (grep message signal) */
  59        Msidlevel       = 1<<15,
  60        Msidassert      = 1<<14,
  61        Msidmode        = 1<<8,         /* 3 bits; delivery mode */
  62        Msidvector      = 0xff<<0,
  63};
  64
  65enum{
  66        /* msi capabilities */
  67        Vmask           = 1<<8, /* Vectors can be masked. Optional. */
  68        Cap64           = 1<<7, /* 64-bit addresses. Optional. */
  69        Mmesgmsk        = 7<<4, /* Mask for # of messages allowed. See 6.8.1.3*/
  70        Mmcap           = 7<<1, /* # of messages the function can support. */
  71        Msienable       = 1<<0, /* Enable. */
  72        /* msix capabilities */
  73        Msixenable      = 1<<15,
  74        Msixmask        = 1<<14,
  75        Msixtblsize     = 0x7ff,
  76};
  77
  78/* Find the offset in config space of this function of the msi capability.
  79 * It is defined in 6.8.1 and is variable-sized.  Returns 0 on failure. */
  80static int msicap(struct pci_device *p)
  81{
  82        return p->caps[PCI_CAP_ID_MSI];
  83}
  84
  85/* Find the offset in config space of this function of the msi-x capability.
  86 * It is defined in 6.8.1 and is variable-sized.
  87 */
  88static int msixcap(struct pci_device *p)
  89{
  90        return p->caps[PCI_CAP_ID_MSIX];
  91}
  92
  93static int msi_blacklist(struct pci_device *p)
  94{
  95        switch (p->ven_id << 16 | p->dev_id) {
  96                case 0x11ab << 16 | 0x6485:
  97                case 0x8086 << 16 | 0x100f:
  98                        return -1;
  99        }
 100        return 0;
 101}
 102
 103static int msix_blacklist(struct pci_device *p)
 104{
 105        switch (p->ven_id << 16 | p->dev_id) {
 106//      case 0x11ab << 16 | 0x6485:     /* placeholder */
 107                return -1;
 108        }
 109        return 0;
 110}
 111
 112static uint32_t msi_make_addr_lo(uint64_t vec)
 113{
 114        unsigned int dest, lopri, logical;
 115
 116        /* The destination is the traditional 8-bit APIC id is in 63:56 of the
 117         * vector.  Later we may need to deal with extra destination bits
 118         * (Msiaedest, in this code).  I haven't seen anything in the Intel SDM
 119         * about using Msiaedest (the bits are reserved) */
 120        dest = vec >> 56;
 121        /* lopri is rarely set, and intel doesn't recommend using it.  with msi,
 122         * the lopri field is actually a redirection hint, and also must be set
 123         * when sending logical messages. */
 124        lopri = (vec & 0x700) == MTlp;
 125        logical = (vec & Lm) != 0;
 126        if (logical)
 127                lopri = 1;
 128        return Msiabase | Msiadest * dest | Msialowpri * lopri |
 129               Msialogical * logical;
 130}
 131
 132static uint32_t msi_make_data(uint64_t vec)
 133{
 134        unsigned int deliv_mode;
 135
 136        deliv_mode = (vec >> 8) & 7;
 137        /* We can only specify the lower 16 bits of the MSI message, the rest
 138         * gets forced to 0 by the device.  MSI-X can use the full 32 bits.
 139         * We're assuming edge triggered here. */
 140        return Msidmode * deliv_mode | ((unsigned int)vec & 0xff);
 141}
 142
 143/* TODO: do we need to be careful of reserved bits?  SDM says to preserve those
 144 * fields on write. */
 145static void __msi_set_addr_data(struct pci_device *p, int cap)
 146{
 147        unsigned int f, datao;
 148
 149        /* read it, clear out the Mmesgmsk bits.
 150         * This means that there will be no multiple
 151         * messages enabled.
 152         */
 153        f = pcidev_read16(p, cap + 2) & ~Mmesgmsk;
 154
 155        /* Data begins at 8 bytes in. */
 156        datao = 8;
 157        pcidev_write32(p, cap + 4, p->msi_msg_addr_lo);
 158
 159        /* And even if it's 64-bit capable, we do nothing with
 160         * the high order bits. If it is 64-bit we need to offset
 161         * datao (data offset) by 4 (i.e. another 32 bits)
 162         */
 163        if (f & Cap64) {
 164                datao += 4;
 165                pcidev_write32(p, cap + 8, 0);
 166        }
 167
 168        pcidev_write16(p, cap + datao, p->msi_msg_data);
 169
 170        /* If we have the option of masking the vectors,
 171         * blow all the masks to 0. It's a 32-bit mask.
 172         */
 173        if (f & Vmask)
 174                pcidev_write32(p, cap + datao + 4, 0);
 175
 176        /* Now write the control bits back, with the Mmesg mask (which is a
 177         * power of 2) set to 0 (meaning one vector only).  Note we still
 178         * haven't enabled MSI.  Will do that when we unmask.  According to the
 179         * spec, we're not supposed to use the Msienable bit to mask the IRQ,
 180         * though I don't see how we can mask on non-Vmask-supported HW. */
 181        pcidev_write16(p, cap + 2, f);
 182}
 183
 184/* see section 6.8.1 of the pci spec. */
 185/* Set up a single function on a single device.
 186 * We need to take the vec, bust it up into bits,
 187 * and put parts of it in the msi address and parts
 188 * in the msi data.
 189 */
 190int pci_msi_enable(struct pci_device *p, uint64_t vec)
 191{
 192        unsigned int c;
 193
 194        spin_lock_irqsave(&p->lock);
 195        if (p->msix_ready) {
 196                printk("MSI: MSI-X is already enabled, aborting\n");
 197                spin_unlock_irqsave(&p->lock);
 198                return -1;
 199        }
 200        /* msi_ready means "has an IRQ vector assigned, loaded, and masked".
 201         * We're only allowing one MSI vector per device.  In comparison,
 202         * msix_ready means "has all the stuff set up for MSI-X so you can get
 203         * some IRQ vector, load the msix_entry, and go." */
 204        if (p->msi_ready) {
 205                printk("MSI: MSI is already enabled, aborting\n");
 206                spin_unlock_irqsave(&p->lock);
 207                return -1;
 208        }
 209        /* Get the offset of the MSI capability in the function's config space.
 210         */
 211        c = msicap(p);
 212        if (!c) {
 213                spin_unlock_irqsave(&p->lock);
 214                return -1;
 215        }
 216        if (msi_blacklist(p) != 0) {
 217                spin_unlock_irqsave(&p->lock);
 218                return -1;
 219        }
 220        p->msi_msg_addr_lo = msi_make_addr_lo(vec);
 221        p->msi_msg_addr_hi = 0;
 222        p->msi_msg_data = msi_make_data(vec);
 223        __msi_set_addr_data(p, c);
 224        p->msi_ready = true;
 225        spin_unlock_irqsave(&p->lock);
 226        return 0;
 227}
 228
 229static void __msix_mask_entry(struct msix_entry *entry)
 230{
 231        uintptr_t reg = (uintptr_t)&entry->vector;
 232        write_mmreg32(reg, read_mmreg32(reg) | 0x1);
 233}
 234
 235static void __msix_unmask_entry(struct msix_entry *entry)
 236{
 237        uintptr_t reg = (uintptr_t)&entry->vector;
 238        write_mmreg32(reg, read_mmreg32(reg) & ~0x1);
 239}
 240
 241static uintptr_t msix_get_capbar_paddr(struct pci_device *p, int offset)
 242{
 243        uint32_t bir, capbar_off;
 244        uintptr_t membar;
 245
 246        bir = pcidev_read32(p, offset);
 247        capbar_off = bir & ~0x7;
 248        bir &= 0x7;
 249        membar = pci_get_membar(p, bir);
 250
 251        if (!membar) {
 252                printk("MSI-X: no cap membar, bir %d\n", bir);
 253                return 0;
 254        }
 255        membar += capbar_off;
 256        return membar;
 257}
 258
 259static void __msix_reset_entry(struct msix_entry *entry)
 260{
 261        __msix_mask_entry(entry);
 262        write_mmreg32((uintptr_t)&entry->data, 0);
 263}
 264
 265/* One time initialization of MSI-X for a PCI device.  -1 on error.  Otherwise,
 266 * the device will be ready to assign/route MSI-X entries/vectors.  All vectors
 267 * are masked, but the overall MSI-X function is unmasked.
 268 *
 269 * Hold the pci_device lock. */
 270static int __pci_msix_init(struct pci_device *p)
 271{
 272        unsigned int c;
 273        uint16_t f;
 274        int tbl_bir, tbl_off, pba_bir, pba_off;
 275        struct msix_entry *entry;
 276
 277        if (p->msix_ready)
 278                return 0;
 279        if (p->msi_ready) {
 280                printk("MSI-X: MSI is already on, aborting\n");
 281                return -1;
 282        }
 283        if (msix_blacklist(p) != 0)
 284                return -1;
 285        c = msixcap(p);
 286        if (c == 0)
 287                return -1;
 288        f = pcidev_read16(p, c + 2);
 289        /* enable and mask the entire function/all vectors */
 290        f |= Msixenable | Msixmask;
 291        pcidev_write16(p, c + 2, f);
 292
 293        p->msix_tbl_paddr = msix_get_capbar_paddr(p, c + 4);
 294        p->msix_pba_paddr = msix_get_capbar_paddr(p, c + 8);
 295        if (!p->msix_tbl_paddr || !p->msix_pba_paddr) {
 296                /* disable msix, so we can possibly use msi */
 297                pcidev_write16(p, c + 2, f & ~Msixenable);
 298                printk("MSI-X: Missing a tbl (%p) or PBA (%p) paddr!\n",
 299                       p->msix_tbl_paddr, p->msix_pba_paddr);
 300                return -1;
 301        }
 302        p->msix_nr_vec = (f & Msixtblsize) + 1;
 303        p->msix_tbl_vaddr = vmap_pmem_nocache(p->msix_tbl_paddr,
 304                                              p->msix_nr_vec *
 305                                              sizeof(struct msix_entry));
 306        if (!p->msix_tbl_vaddr) {
 307                pcidev_write16(p, c + 2, f & ~Msixenable);
 308                printk("MSI-X: unable to vmap the Table!\n");
 309                return -1;
 310        }
 311        p->msix_pba_vaddr = vmap_pmem_nocache(p->msix_pba_paddr,
 312                                              ROUNDUP(p->msix_nr_vec, 8) / 8);
 313        if (!p->msix_pba_vaddr) {
 314                pcidev_write16(p, c + 2, f & ~Msixenable);
 315                printk("MSI-X: unable to vmap the PBA!\n");
 316                vunmap_vmem(p->msix_tbl_paddr,
 317                            p->msix_nr_vec * sizeof(struct msix_entry));
 318                return -1;
 319        }
 320        /* they should all be masked already, but remasking just in case.
 321         * likewise, we need to 0 out the data, since we'll use the lower byte
 322         * later when determining if an msix vector is free or not. */
 323        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 324        for (int i = 0; i < p->msix_nr_vec; i++, entry++)
 325                __msix_reset_entry(entry);
 326        /* unmask the device, now that all the vectors are masked */
 327        f &= ~Msixmask;
 328        pcidev_write16(p, c + 2, f);
 329        p->msix_ready = TRUE;
 330        return 0;
 331}
 332
 333/* Some parts of msix init need to happen during boot.  Devices can call this
 334 * during their reset methods, and then later register their IRQs during attach.
 335 * Other OS's also alloc the vector around this time, though we'll hold off on
 336 * that for now. */
 337int pci_msix_init(struct pci_device *p)
 338{
 339        int ret;
 340        spin_lock_irqsave(&p->lock);
 341        ret = __pci_msix_init(p);
 342        spin_unlock_irqsave(&p->lock);
 343        return ret;
 344}
 345
 346/* Enables an MSI-X vector for a PCI device.  vec is formatted like an ioapic
 347 * route.  This should be able to handle multiple vectors for a device.  Returns
 348 * a msix_irq_vector linkage struct on success (the connection btw an irq_h and
 349 * the specific {pcidev, entry}), and 0 on failure. */
 350struct msix_irq_vector *pci_msix_enable(struct pci_device *p, uint64_t vec)
 351{
 352        int i;
 353        struct msix_entry *entry;
 354        struct msix_irq_vector *linkage;
 355        unsigned int c, datao;
 356
 357        spin_lock_irqsave(&p->lock);
 358        /* Ensure we're init'd.  We could remove this in the future, though not
 359         * everyone calls the extern pci_msix_init. */
 360        if (__pci_msix_init(p) < 0) {
 361                spin_unlock_irqsave(&p->lock);
 362                return 0;
 363        }
 364        /* find an unused slot (no apic_vector assigned).  later, we might want
 365         * to point back to the irq_hs for each entry.  not a big deal now. */
 366        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 367        for (i = 0; i < p->msix_nr_vec; i++, entry++)
 368                if (!(read_mmreg32((uintptr_t)&entry->data) & 0xff))
 369                        break;
 370        if (i == p->msix_nr_vec) {
 371                printk("[kernel] unable to alloc an MSI-X vector (bug?)\n");
 372                spin_unlock_irqsave(&p->lock);
 373                return 0;
 374        }
 375        linkage = kmalloc(sizeof(struct msix_irq_vector), MEM_WAIT);
 376        linkage->pcidev = p;
 377        linkage->entry = entry;
 378        linkage->addr_lo = msi_make_addr_lo(vec);
 379        linkage->addr_hi = 0;
 380        linkage->data = msi_make_data(vec);
 381        write_mmreg32((uintptr_t)&entry->data, linkage->data);
 382        write_mmreg32((uintptr_t)&entry->addr_lo, linkage->addr_lo);
 383        write_mmreg32((uintptr_t)&entry->addr_hi, linkage->addr_hi);
 384        spin_unlock_irqsave(&p->lock);
 385        return linkage;
 386}
 387
 388void pci_dump_msix_table(struct pci_device *p)
 389{
 390        struct msix_entry *entry;
 391        void *tbl = (void*)p->msix_tbl_vaddr;
 392
 393        hexdump(tbl, p->msix_nr_vec * sizeof(struct msix_entry));
 394        entry = (struct msix_entry*)p->msix_tbl_vaddr;
 395        for (int i = 0; i < p->msix_nr_vec; i++, entry++)
 396                printk("Entry %d, addr hi:lo 0x%08x:%08x data 0x%08x\n", i,
 397                       entry->addr_hi, entry->addr_lo, entry->data);
 398}
 399
 400void pci_msi_mask(struct pci_device *p)
 401{
 402        unsigned int c, f;
 403
 404        c = msicap(p);
 405        assert(c);
 406
 407        spin_lock_irqsave(&p->lock);
 408        f = pcidev_read16(p, c + 2);
 409        pcidev_write16(p, c + 2, f & ~Msienable);
 410        spin_unlock_irqsave(&p->lock);
 411}
 412
 413void pci_msi_unmask(struct pci_device *p)
 414{
 415        unsigned int c, f;
 416
 417        c = msicap(p);
 418        assert(c);
 419
 420        spin_lock_irqsave(&p->lock);
 421        f = pcidev_read16(p, c + 2);
 422        pcidev_write16(p, c + 2, f | Msienable);
 423        spin_unlock_irqsave(&p->lock);
 424}
 425
 426void pci_msi_route(struct pci_device *p, int dest)
 427{
 428        unsigned int c, f;
 429
 430        c = msicap(p);
 431        assert(c);
 432
 433        spin_lock_irqsave(&p->lock);
 434        /* mask out the old destination, replace with new */
 435        p->msi_msg_addr_lo &= ~(((1 << 8) - 1) << 12);
 436        p->msi_msg_addr_lo |= (dest & 0xff) << 12;
 437        pcidev_write32(p, c + 4, p->msi_msg_addr_lo);
 438        spin_unlock_irqsave(&p->lock);
 439}
 440
 441void pci_msi_reset_vector(struct pci_device *p)
 442{
 443        /* Might be overly paranoid.  We're clearing out any old vector set in
 444         * the device. */
 445        spin_lock_irqsave(&p->lock);
 446        p->msi_msg_addr_lo = 0;
 447        p->msi_msg_addr_hi = 0;
 448        p->msi_msg_data = 0;
 449        __msi_set_addr_data(p, msicap(p));
 450        p->msi_ready = false;
 451        spin_unlock_irqsave(&p->lock);
 452}
 453
 454void pci_msix_mask_vector(struct msix_irq_vector *linkage)
 455{
 456        spin_lock_irqsave(&linkage->pcidev->lock);
 457        __msix_mask_entry(linkage->entry);
 458        spin_unlock_irqsave(&linkage->pcidev->lock);
 459}
 460
 461void pci_msix_unmask_vector(struct msix_irq_vector *linkage)
 462{
 463        spin_lock_irqsave(&linkage->pcidev->lock);
 464        __msix_unmask_entry(linkage->entry);
 465        spin_unlock_irqsave(&linkage->pcidev->lock);
 466}
 467
 468void pci_msix_route_vector(struct msix_irq_vector *linkage, int dest)
 469{
 470        spin_lock_irqsave(&linkage->pcidev->lock);
 471        /* mask out the old destination, replace with new */
 472        linkage->addr_lo &= ~(((1 << 8) - 1) << 12);
 473        linkage->addr_lo |= (dest & 0xff) << 12;
 474        write_mmreg32((uintptr_t)&linkage->entry->addr_lo, linkage->addr_lo);
 475        spin_unlock_irqsave(&linkage->pcidev->lock);
 476}
 477
 478void pci_msix_reset_vector(struct msix_irq_vector *linkage)
 479{
 480        spin_lock_irqsave(&linkage->pcidev->lock);
 481        __msix_reset_entry(linkage->entry);
 482        spin_unlock_irqsave(&linkage->pcidev->lock);
 483}
 484