akaros/kern/arch/x86/mp.c
<<
>>
Prefs
   1/* This file is part of the UCB release of Plan 9. It is subject to the license
   2 * terms in the LICENSE file found in the top-level directory of this
   3 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
   4 * part of the UCB release of Plan 9, including this file, may be copied,
   5 * modified, propagated, or distributed except according to the terms contained
   6 * in the LICENSE file. */
   7
   8#include <slab.h>
   9#include <kmalloc.h>
  10#include <kref.h>
  11#include <string.h>
  12#include <stdio.h>
  13#include <assert.h>
  14#include <error.h>
  15#include <cpio.h>
  16#include <pmap.h>
  17#include <smp.h>
  18#include <net/ip.h>
  19#include <arch/mptables.h>
  20#include <arch/ioapic.h>
  21
  22/*
  23 * MultiProcessor Specification Version 1.[14].
  24 */
  25typedef struct {                                /* MP Floating Pointer */
  26        uint8_t signature[4];                   /* "_MP_" */
  27        uint8_t addr[4];                        /* PCMP */
  28        uint8_t length;                         /* 1 */
  29        uint8_t revision;                       /* [14] */
  30        uint8_t checksum;
  31        uint8_t feature[5];
  32} _MP_;
  33
  34typedef struct {                                /* MP Configuration Table */
  35        uint8_t signature[4];                   /* "PCMP" */
  36        uint8_t length[2];
  37        uint8_t revision;                       /* [14] */
  38        uint8_t checksum;
  39        uint8_t string[20];                     /* OEM + Product ID */
  40        uint8_t oaddr[4];                       /* OEM table pointer */
  41        uint8_t olength[2];                     /* OEM table length */
  42        uint8_t entry[2];                       /* entry count */
  43        uint8_t apicpa[4];                      /* local APIC address */
  44        uint8_t xlength[2];                     /* extended table length */
  45        uint8_t xchecksum;                      /* extended table checksum */
  46        uint8_t reserved;
  47
  48        uint8_t entries[];
  49} PCMP;
  50
  51typedef struct {
  52        char type[6];
  53        int polarity;                           /* default for this bus */
  54        int trigger;                            /* default for this bus */
  55} Mpbus;
  56
  57static Mpbus mpbusdef[] = {
  58        {"PCI   ", IPlow, TMlevel,},
  59        {"ISA   ", IPhigh, TMedge,},
  60};
  61
  62/* Editable version of the MP tables so we can fix botched entries.  Kmalloced,
  63 * never freed.  Might be NULL if pcmp checks failed.*/
  64static PCMP *pcmp;
  65
  66static Mpbus *mpbus[Nbus];
  67int mpisabusno = -1;
  68#define MP_VERBOSE_DEBUG 0
  69
  70static void mpintrprint(char *s, uint8_t * p)
  71{
  72        char buf[128], *b, *e;
  73        char format[] = " type %d flags %p bus %d IRQ %d APIC %d INTIN %d\n";
  74
  75        b = buf;
  76        e = b + sizeof(buf);
  77/* can't use seprintf yet!
  78        b = seprintf(b, e, "mpparse: intr:");
  79        if(s != NULL)
  80                b = seprintf(b, e, " %s:", s);
  81        seprintf(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
  82        printd(buf);
  83*/
  84        printk("mpparse: intr:");
  85        if (s != NULL)
  86                printk(" %s:", s);
  87        printk(format, p[1], l16get(p + 2), p[4], p[5], p[6], p[7]);
  88}
  89
  90/* I've seen busted MP tables routes with invalid IOAPIC ids and INTINs that are
  91 * out of range.  We can look at the INTINs to try to figure out which IOAPIC
  92 * they meant, and then adjust the INTINs too.
  93 *
  94 * Specifically, the machine I saw had two IOAPICs, neither of which had good
  95 * iointr APIC IDs.  ACPI and the MP tables said I had IOAPICS 8 and 9.  The
  96 * IOINTRs APIC IDs were 0 and 2.  Additionally, 2's INTINs were all beyond the
  97 * range of the 24 nrtds for that IOAPIC.  However, that IOAPIC's ibase was 24
  98 * too.
  99 *
 100 * Combined, these two clues mean the INTINs are in the global ibase/route
 101 * space, and we can tell which IOAPIC to use based on the INTIN.  This works at
 102 * least for the IOAPIC 0 (8) on my hardware (IRQ routing works).  I haven't
 103 * been able to test on devices on the upper APIC (9). */
 104static int repair_iointr(uint8_t *iointr)
 105{
 106        struct apic *ioapic;
 107        int ioapic_id;
 108        int intin = iointr[7];
 109
 110        for (int i = 0; i < Napic; i++) {
 111                ioapic = &xioapic[i];
 112                if (!ioapic->useable)
 113                        continue;
 114                if (ioapic->ibase <= intin &&
 115                    intin < ioapic->ibase + ioapic->nrdt) {
 116                        iointr[6] = i;
 117                        iointr[7] = intin - ioapic->ibase;
 118                        return 0;
 119                }
 120        }
 121        return -1;
 122}
 123
 124static uint32_t mpmkintr(uint8_t * p)
 125{
 126        uint32_t v;
 127        struct apic *apic;
 128        int n, polarity, trigger;
 129
 130        /*
 131         * Check valid bus, interrupt input pin polarity
 132         * and trigger mode. If the APIC ID is 0xff it means
 133         * all APICs of this type so those checks for useable
 134         * APIC and valid INTIN must also be done later in
 135         * the appropriate init routine in that case. It's hard
 136         * to imagine routing a signal to all IOAPICs, the
 137         * usual case is routing NMI and ExtINT to all LAPICs.
 138         */
 139        if (mpbus[p[4]] == NULL) {
 140                mpintrprint("no source bus", p);
 141                return 0;
 142        }
 143        if (p[6] != 0xff) {
 144                if (Napic < 256 && p[6] >= Napic) {
 145                        mpintrprint("APIC ID out of range", p);
 146                        return 0;
 147                }
 148                switch (p[0]) {
 149                default:
 150                        mpintrprint("INTIN botch", p);
 151                        return 0;
 152                case 3: /* IOINTR */
 153                        apic = &xioapic[p[6]];
 154                        if (!apic->useable) {
 155                                mpintrprint("unuseable ioapic", p);
 156                                if (repair_iointr(p)) {
 157                                        mpintrprint("unrepairable iointr", p);
 158                                        return 0;
 159                                }
 160                                mpintrprint("repaired iointr", p);
 161                                /* Repair found a usable apic */
 162                                apic = &xioapic[p[6]];
 163                        }
 164                        if (p[7] >= apic->nrdt) {
 165                                mpintrprint("IO INTIN out of range", p);
 166                                return 0;
 167                        }
 168                        break;
 169                case 4: /* LINTR */
 170                        apic = &xlapic[p[6]];
 171                        if (!apic->useable) {
 172                                mpintrprint("unuseable lapic", p);
 173                                return 0;
 174                        }
 175                        if (p[7] >= ARRAY_SIZE(apic->lvt)) {
 176                                mpintrprint("LOCAL INTIN out of range", p);
 177                                return 0;
 178                        }
 179                        break;
 180                }
 181        }
 182        n = l16get(p + 2);
 183        if ((polarity = (n & 0x03)) == 2 || (trigger = ((n >> 2) & 0x03)) == 2)
 184        {
 185                mpintrprint("invalid polarity/trigger", p);
 186                return 0;
 187        }
 188
 189        /*
 190         * Create the low half of the vector table entry (LVT or RDT).
 191         * For the NMI, SMI and ExtINT cases, the polarity and trigger
 192         * are fixed (but are not always consistent over IA-32 generations).
 193         * For the INT case, either the polarity/trigger are given or
 194         * it defaults to that of the source bus;
 195         * whether INT is Fixed or Lowest Priority is left until later.
 196         */
 197        v = Im;
 198        switch (p[1]) {
 199        default:
 200                mpintrprint("invalid type", p);
 201                return 0;
 202        case 0: /* INT */
 203                switch (polarity) {
 204                case 0:
 205                        v |= mpbus[p[4]]->polarity;
 206                        break;
 207                case 1:
 208                        v |= IPhigh;
 209                        break;
 210                case 3:
 211                        v |= IPlow;
 212                        break;
 213                }
 214                switch (trigger) {
 215                case 0:
 216                        v |= mpbus[p[4]]->trigger;
 217                        break;
 218                case 1:
 219                        v |= TMedge;
 220                        break;
 221                case 3:
 222                        v |= TMlevel;
 223                        break;
 224                }
 225                break;
 226        case 1: /* NMI */
 227                v |= TMedge | IPhigh | MTnmi;
 228                break;
 229        case 2: /* SMI */
 230                v |= TMedge | IPhigh | MTsmi;
 231                break;
 232        case 3: /* ExtINT */
 233                v |= TMedge | IPhigh | MTei;
 234                break;
 235        }
 236
 237        return v;
 238}
 239
 240static int mpparse(PCMP * pcmp, int maxcores)
 241{
 242        uint32_t lo;
 243        uint8_t *e, *p;
 244        int devno, i, n;
 245
 246        p = pcmp->entries;
 247        e = ((uint8_t *) pcmp) + l16get(pcmp->length);
 248        while (p < e)
 249                switch (*p) {
 250                default:
 251                        printd("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p,
 252                               e - p);
 253                        for (i = 0; p < e; i++) {
 254                                if (i && ((i & 0x0f) == 0))
 255                                        printd("\n");
 256                                printd(" 0x%#2.2x", *p);
 257                                p++;
 258                        }
 259                        printd("\n");
 260                        break;
 261                case 0: /* processor */
 262                        /*
 263                         * Initialise the APIC if it is enabled (p[3] & 0x01).
 264                         * p[1] is the APIC ID, the memory mapped address comes
 265                         * from the PCMP structure as the addess is local to the
 266                         * CPU and identical for all. Indicate whether this is
 267                         * the bootstrap processor (p[3] & 0x02).
 268                         */
 269                        printd("mpparse: cpu %d pa %p bp %d\n",
 270                                   p[1], l32get(pcmp->apicpa), p[3] & 0x02);
 271                        if ((p[3] & 0x01) != 0 && maxcores > 0) {
 272                                maxcores--;
 273                                apicinit(p[1], l32get(pcmp->apicpa), p[3] &
 274                                         0x02);
 275                        }
 276                        p += 20;
 277                        break;
 278                case 1: /* bus */
 279                        printd("mpparse: bus: %d type %6.6s\n", p[1], (char *)p
 280                               + 2);
 281                        if (p[1] >= Nbus) {
 282                                printk("mpparse: bus %d out of range\n", p[1]);
 283                                p += 8;
 284                                break;
 285                        }
 286                        if (mpbus[p[1]] != NULL) {
 287                                printk("mpparse: bus %d already allocated\n",
 288                                       p[1]);
 289                                p += 8;
 290                                break;
 291                        }
 292                        for (i = 0; i < ARRAY_SIZE(mpbusdef); i++) {
 293                                if (memcmp(p + 2, mpbusdef[i].type, 6) != 0)
 294                                        continue;
 295                                if (memcmp(p + 2, "ISA   ", 6) == 0) {
 296                                        if (mpisabusno != -1) {
 297                                                printk("mpparse: bus %d already have ISA bus %d\n",
 298                                                       p[1], mpisabusno);
 299                                                continue;
 300                                        }
 301                                        mpisabusno = p[1];
 302                                }
 303                                mpbus[p[1]] = &mpbusdef[i];
 304                                break;
 305                        }
 306                        if (mpbus[p[1]] == NULL)
 307                                printk("mpparse: bus %d type %6.6s unknown\n",
 308                                           p[1], (char *)p + 2);
 309
 310                        p += 8;
 311                        break;
 312                case 2: /* IOAPIC */
 313                        /*
 314                         * Initialise the IOAPIC if it is enabled (p[3] & 0x01).
 315                         * p[1] is the APIC ID, p[4-7] is the memory mapped
 316                         * address.
 317                         */
 318                        if (p[3] & 0x01)
 319                                ioapicinit(p[1], -1, l32get(p + 4));
 320
 321                        p += 8;
 322                        break;
 323                case 3: /* IOINTR */
 324                        /*
 325                         * p[1] is the interrupt type;
 326                         * p[2-3] contains the polarity and trigger mode;
 327                         * p[4] is the source bus;
 328                         * p[5] is the IRQ on the source bus;
 329                         * p[6] is the destination IOAPIC;
 330                         * p[7] is the INITIN pin on the destination IOAPIC.
 331                         */
 332                        if (p[6] == 0xff) {
 333                                mpintrprint("routed to all IOAPICs", p);
 334                                p += 8;
 335                                break;
 336                        }
 337                        if ((lo = mpmkintr(p)) == 0) {
 338                                if (MP_VERBOSE_DEBUG)
 339                                        mpintrprint("iointr skipped", p);
 340                                p += 8;
 341                                break;
 342                        }
 343                        if (MP_VERBOSE_DEBUG)
 344                                mpintrprint("iointr", p);
 345
 346                        /*
 347                         * Always present the device number in the style
 348                         * of a PCI Interrupt Assignment Entry. For the ISA
 349                         * bus the IRQ is the device number but unencoded.
 350                         * May need to handle other buses here in the future
 351                         * (but unlikely).
 352                         *
 353                         * For PCI devices, this field's lowest two bits are
 354                         * INT#A == 0, INT#B == 1, etc.  Bits 2-6 are the PCI
 355                         * device number.
 356                         */
 357                        devno = p[5];
 358                        if (memcmp(mpbus[p[4]]->type, "PCI   ", 6) != 0)
 359                                devno <<= 2;
 360                        ioapicintrinit(p[4], p[6], p[7], devno, lo);
 361
 362                        p += 8;
 363                        break;
 364                case 4: /* LINTR */
 365                        /*
 366                         * Format is the same as IOINTR above.
 367                         */
 368                        if ((lo = mpmkintr(p)) == 0) {
 369                                p += 8;
 370                                break;
 371                        }
 372                        if (MP_VERBOSE_DEBUG)
 373                                mpintrprint("LINTR", p);
 374
 375                        /*
 376                         * Everything was checked in mpmkintr above.
 377                         */
 378                        if (p[6] == 0xff) {
 379                                for (i = 0; i < Napic; i++) {
 380                                        if (!xlapic[i].useable ||
 381                                            xlapic[i].addr)
 382                                                continue;
 383                                        xlapic[i].lvt[p[7]] = lo;
 384                                }
 385                        } else
 386                                xlapic[p[6]].lvt[p[7]] = lo;
 387                        p += 8;
 388                        break;
 389                }
 390
 391        /*
 392         * There's nothing of interest in the extended table,
 393         * but check it for consistency.
 394         */
 395        p = e;
 396        e = p + l16get(pcmp->xlength);
 397        while (p < e)
 398                switch (*p) {
 399                default:
 400                        n = p[1];
 401                        printd("mpparse: unknown extended entry %d length %d\n",
 402                               *p, n);
 403                        for (i = 0; i < n; i++) {
 404                                if (i && ((i & 0x0f) == 0))
 405                                        printd("\n");
 406                                printd(" %#2.2ux", *p);
 407                                p++;
 408                        }
 409                        printd("\n");
 410                        break;
 411                case 128:
 412                        printd("address space mapping\n");
 413                        printd(" bus %d type %d base %p length %p\n",
 414                                   p[2], p[3], l64get(p + 4), l64get(p + 12));
 415                        p += p[1];
 416                        break;
 417                case 129:
 418                        printd("bus hierarchy descriptor\n");
 419                        printd(" bus %d sd %d parent bus %d\n", p[2], p[3],
 420                               p[4]);
 421                        p += p[1];
 422                        break;
 423                case 130:
 424                        printd("compatibility bus address space modifier\n");
 425                        printd(" bus %d pr %d range list %d\n",
 426                                   p[2], p[3], l32get(p + 4));
 427                        p += p[1];
 428                        break;
 429                }
 430        return maxcores;
 431}
 432
 433static void *sigsearch(char *signature)
 434{
 435        uintptr_t p;
 436        uint8_t *bda;
 437        void *r;
 438#if 0
 439        /*
 440         * Search for the data structure:
 441         * 1) in the first KB of the EBDA;
 442         * 2) in the last KB of system base memory;
 443         * 3) in the BIOS ROM between 0xe0000 and 0xfffff.
 444         */
 445        bda = BIOSSEG(0x40);
 446        if (memcmp(KADDR(0xfffd9), "EISA", 4) == 0) {
 447                if ((p = (bda[0x0f] << 8) | bda[0x0e])) {
 448                        if ((r = sigscan(BIOSSEG(p), 1024, signature)) != NULL)
 449                                return r;
 450                }
 451        }
 452
 453        p = ((bda[0x14] << 8) | bda[0x13]) * 1024;
 454        if ((r = sigscan(KADDR(p - 1024), 1024, signature)) != NULL)
 455                return r;
 456#endif
 457        r = sigscan(KADDR(0xe0000), 0x20000, signature);
 458        printk("Found MP table at %p\n", r);
 459        if (r != NULL)
 460                return r;
 461
 462        return NULL;
 463        /* and virtualbox hidden mp tables... */
 464//  return sigscan(KADDR(0xa0000 - 1024), 1024, signature);
 465}
 466
 467static PCMP *copy_pcmp(PCMP *pcmp)
 468{
 469        PCMP *new_pcmp;
 470        size_t n = l16get(pcmp->length) + l16get(pcmp->xlength);
 471
 472        new_pcmp = kmalloc(n, MEM_ATOMIC);
 473        assert(new_pcmp);
 474        memcpy(new_pcmp, pcmp, n);
 475        return new_pcmp;
 476}
 477
 478int mpsinit(int maxcores)
 479{
 480        uint8_t *p;
 481        int i;
 482        _MP_ *mp;
 483
 484        if ((mp = sigsearch("_MP_")) == NULL) {
 485                printk("No mp tables found, might have issues!\n");
 486                return maxcores;
 487        }
 488        /* TODO: if an IMCR exists, we should set it to 1, though i've heard
 489         * that ACPI-capable HW doesn't have the IMCR anymore. */
 490
 491        if (MP_VERBOSE_DEBUG) {
 492                printk("_MP_ @ %#p, addr %p length %ud rev %d",
 493                           mp, l32get(mp->addr), mp->length, mp->revision);
 494                for (i = 0; i < sizeof(mp->feature); i++)
 495                        printk(" %2.2p", mp->feature[i]);
 496                printk("\n");
 497        }
 498        if (mp->revision != 1 && mp->revision != 4)
 499                return maxcores;
 500        if (sigchecksum(mp, mp->length * 16) != 0)
 501                return maxcores;
 502        if ((pcmp = KADDR_NOCHECK(l32get(mp->addr))) == NULL)
 503                return maxcores;
 504        if (pcmp->revision != 1 && pcmp->revision != 4) {
 505                pcmp = NULL;
 506                return maxcores;
 507        }
 508        if (sigchecksum(pcmp, l16get(pcmp->length)) != 0) {
 509                pcmp = NULL;
 510                return maxcores;
 511        }
 512
 513        pcmp = copy_pcmp(pcmp);
 514
 515        if (MP_VERBOSE_DEBUG) {
 516                printk("PCMP @ %#p length %p revision %d\n",
 517                           pcmp, l16get(pcmp->length), pcmp->revision);
 518                printk(" %20.20s oaddr %p olength %p\n",
 519                           (char *)pcmp->string, l32get(pcmp->oaddr),
 520                           l16get(pcmp->olength));
 521                printk(" entry %d apicpa %p\n",
 522                           l16get(pcmp->entry), l32get(pcmp->apicpa));
 523
 524                printk(" xlength %p xchecksum %p\n",
 525                           l16get(pcmp->xlength), pcmp->xchecksum);
 526        }
 527        if (pcmp->xchecksum != 0) {
 528                p = ((uint8_t *) pcmp) + l16get(pcmp->length);
 529                i = sigchecksum(p, l16get(pcmp->xlength));
 530                if (((i + pcmp->xchecksum) & 0xff) != 0) {
 531                        printd("extended table checksums to %p\n", i);
 532                        return maxcores;
 533                }
 534        }
 535
 536        /*
 537         * Parse the PCMP table and set up the datastructures
 538         * for later interrupt enabling and application processor
 539         * startup.
 540         */
 541        return mpparse(pcmp, maxcores);
 542}
 543