x86: MP table cleanup, multiple IOAPICs
[akaros.git] / kern / arch / x86 / mp.c
1 /* This file is part of the UCB release of Plan 9. It is subject to the license
2  * terms in the LICENSE file found in the top-level directory of this
3  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
4  * part of the UCB release of Plan 9, including this file, may be copied,
5  * modified, propagated, or distributed except according to the terms contained
6  * in the LICENSE file. */
7
8 #include <vfs.h>
9 #include <kfs.h>
10 #include <slab.h>
11 #include <kmalloc.h>
12 #include <kref.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <assert.h>
16 #include <error.h>
17 #include <cpio.h>
18 #include <pmap.h>
19 #include <smp.h>
20 #include <ip.h>
21
22 /*
23  * MultiProcessor Specification Version 1.[14].
24  */
25 typedef struct {                                /* MP Floating Pointer */
26         uint8_t signature[4];           /* "_MP_" */
27         uint8_t addr[4];                        /* PCMP */
28         uint8_t length;                         /* 1 */
29         uint8_t revision;                       /* [14] */
30         uint8_t checksum;
31         uint8_t feature[5];
32 } _MP_;
33
34 typedef struct {                                /* MP Configuration Table */
35         uint8_t signature[4];           /* "PCMP" */
36         uint8_t length[2];
37         uint8_t revision;                       /* [14] */
38         uint8_t checksum;
39         uint8_t string[20];                     /* OEM + Product ID */
40         uint8_t oaddr[4];                       /* OEM table pointer */
41         uint8_t olength[2];                     /* OEM table length */
42         uint8_t entry[2];                       /* entry count */
43         uint8_t apicpa[4];                      /* local APIC address */
44         uint8_t xlength[2];                     /* extended table length */
45         uint8_t xchecksum;                      /* extended table checksum */
46         uint8_t reserved;
47
48         uint8_t entries[];
49 } PCMP;
50
51 typedef struct {
52         char type[6];
53         int polarity;                           /* default for this bus */
54         int trigger;                            /* default for this bus */
55 } Mpbus;
56
57 static Mpbus mpbusdef[] = {
58         {"PCI   ", IPlow, TMlevel,},
59         {"ISA   ", IPhigh, TMedge,},
60 };
61
62 static Mpbus *mpbus[Nbus];
63 int mpisabusno = -1;
64 #define MP_VERBOSE_DEBUG 0
65
66 static void mpintrprint(char *s, uint8_t * p)
67 {
68         char buf[128], *b, *e;
69         char format[] = " type %d flags %p bus %d IRQ %d APIC %d INTIN %d\n";
70
71         b = buf;
72         e = b + sizeof(buf);
73 /* can't use seprintf yet!
74         b = seprintf(b, e, "mpparse: intr:");
75         if(s != NULL)
76                 b = seprintf(b, e, " %s:", s);
77         seprintf(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
78         printd(buf);
79 */
80         printk("mpparse: intr:");
81         if (s != NULL)
82                 printk(" %s:", s);
83         printk(format, p[1], l16get(p + 2), p[4], p[5], p[6], p[7]);
84 }
85
86 static uint32_t mpmkintr(uint8_t * p)
87 {
88         uint32_t v;
89         struct apic *apic;
90         int n, polarity, trigger;
91
92         /*
93          * Check valid bus, interrupt input pin polarity
94          * and trigger mode. If the APIC ID is 0xff it means
95          * all APICs of this type so those checks for useable
96          * APIC and valid INTIN must also be done later in
97          * the appropriate init routine in that case. It's hard
98          * to imagine routing a signal to all IOAPICs, the
99          * usual case is routing NMI and ExtINT to all LAPICs.
100          */
101         if (mpbus[p[4]] == NULL) {
102                 mpintrprint("no source bus", p);
103                 return 0;
104         }
105         if (p[6] != 0xff) {
106                 if (Napic < 256 && p[6] >= Napic) {
107                         mpintrprint("APIC ID out of range", p);
108                         return 0;
109                 }
110                 switch (p[0]) {
111                         default:
112                                 mpintrprint("INTIN botch", p);
113                                 return 0;
114                         case 3: /* IOINTR */
115                                 apic = &xioapic[p[6]];
116                                 if (!apic->useable) {
117                                         mpintrprint("unuseable ioapic", p);
118                                         return 0;
119                                 }
120                                 if (p[7] >= apic->nrdt) {
121                                         mpintrprint("IO INTIN out of range", p);
122                                         return 0;
123                                 }
124                                 break;
125                         case 4: /* LINTR */
126                                 apic = &xlapic[p[6]];
127                                 if (!apic->useable) {
128                                         mpintrprint("unuseable lapic", p);
129                                         return 0;
130                                 }
131                                 if (p[7] >= ARRAY_SIZE(apic->lvt)) {
132                                         mpintrprint("LOCAL INTIN out of range", p);
133                                         return 0;
134                                 }
135                                 break;
136                 }
137         }
138         n = l16get(p + 2);
139         if ((polarity = (n & 0x03)) == 2 || (trigger = ((n >> 2) & 0x03)) == 2) {
140                 mpintrprint("invalid polarity/trigger", p);
141                 return 0;
142         }
143
144         /*
145          * Create the low half of the vector table entry (LVT or RDT).
146          * For the NMI, SMI and ExtINT cases, the polarity and trigger
147          * are fixed (but are not always consistent over IA-32 generations).
148          * For the INT case, either the polarity/trigger are given or
149          * it defaults to that of the source bus;
150          * whether INT is Fixed or Lowest Priority is left until later.
151          */
152         v = Im;
153         switch (p[1]) {
154                 default:
155                         mpintrprint("invalid type", p);
156                         return 0;
157                 case 0: /* INT */
158                         switch (polarity) {
159                                 case 0:
160                                         v |= mpbus[p[4]]->polarity;
161                                         break;
162                                 case 1:
163                                         v |= IPhigh;
164                                         break;
165                                 case 3:
166                                         v |= IPlow;
167                                         break;
168                         }
169                         switch (trigger) {
170                                 case 0:
171                                         v |= mpbus[p[4]]->trigger;
172                                         break;
173                                 case 1:
174                                         v |= TMedge;
175                                         break;
176                                 case 3:
177                                         v |= TMlevel;
178                                         break;
179                         }
180                         break;
181                 case 1: /* NMI */
182                         v |= TMedge | IPhigh | MTnmi;
183                         break;
184                 case 2: /* SMI */
185                         v |= TMedge | IPhigh | MTsmi;
186                         break;
187                 case 3: /* ExtINT */
188                         v |= TMedge | IPhigh | MTei;
189                         break;
190         }
191
192         return v;
193 }
194
195 static int mpparse(PCMP * pcmp, int maxcores)
196 {
197         uint32_t lo;
198         uint8_t *e, *p;
199         int devno, i, n;
200
201         p = pcmp->entries;
202         e = ((uint8_t *) pcmp) + l16get(pcmp->length);
203         while (p < e)
204                 switch (*p) {
205                         default:
206                                 printd("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e - p);
207                                 for (i = 0; p < e; i++) {
208                                         if (i && ((i & 0x0f) == 0))
209                                                 printd("\n");
210                                         printd(" 0x%#2.2x", *p);
211                                         p++;
212                                 }
213                                 printd("\n");
214                                 break;
215                         case 0: /* processor */
216                                 /*
217                                  * Initialise the APIC if it is enabled (p[3] & 0x01).
218                                  * p[1] is the APIC ID, the memory mapped address comes
219                                  * from the PCMP structure as the addess is local to the
220                                  * CPU and identical for all. Indicate whether this is
221                                  * the bootstrap processor (p[3] & 0x02).
222                                  */
223                                 printk("mpparse: cpu %d pa %p bp %d\n",
224                                            p[1], l32get(pcmp->apicpa), p[3] & 0x02);
225                                 if ((p[3] & 0x01) != 0 && maxcores > 0) {
226                                         maxcores--;
227                                         apicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02);
228                                 }
229                                 p += 20;
230                                 break;
231                         case 1: /* bus */
232                                 printk("mpparse: bus: %d type %6.6s\n", p[1], (char *)p + 2);
233                                 if (p[1] >= Nbus) {
234                                         printd("mpparse: bus %d out of range\n", p[1]);
235                                         p += 8;
236                                         break;
237                                 }
238                                 if (mpbus[p[1]] != NULL) {
239                                         printd("mpparse: bus %d already allocated\n", p[1]);
240                                         p += 8;
241                                         break;
242                                 }
243                                 for (i = 0; i < ARRAY_SIZE(mpbusdef); i++) {
244                                         if (memcmp(p + 2, mpbusdef[i].type, 6) != 0)
245                                                 continue;
246                                         if (memcmp(p + 2, "ISA   ", 6) == 0) {
247                                                 if (mpisabusno != -1) {
248                                                         printd("mpparse: bus %d already have ISA bus %d\n",
249                                                                    p[1], mpisabusno);
250                                                         continue;
251                                                 }
252                                                 mpisabusno = p[1];
253                                         }
254                                         mpbus[p[1]] = &mpbusdef[i];
255                                         break;
256                                 }
257                                 if (mpbus[p[1]] == NULL)
258                                         printd("mpparse: bus %d type %6.6s unknown\n",
259                                                    p[1], (char *unused_char_p_t)p + 2);
260
261                                 p += 8;
262                                 break;
263                         case 2: /* IOAPIC */
264                                 /*
265                                  * Initialise the IOAPIC if it is enabled (p[3] & 0x01).
266                                  * p[1] is the APIC ID, p[4-7] is the memory mapped address.
267                                  */
268                                 if (p[3] & 0x01)
269                                         ioapicinit(p[1], -1, l32get(p + 4));
270
271                                 p += 8;
272                                 break;
273                         case 3: /* IOINTR */
274                                 /*
275                                  * p[1] is the interrupt type;
276                                  * p[2-3] contains the polarity and trigger mode;
277                                  * p[4] is the source bus;
278                                  * p[5] is the IRQ on the source bus;
279                                  * p[6] is the destination IOAPIC;
280                                  * p[7] is the INITIN pin on the destination IOAPIC.
281                                  */
282                                 if (p[6] == 0xff) {
283                                         mpintrprint("routed to all IOAPICs", p);
284                                         p += 8;
285                                         break;
286                                 }
287                                 if ((lo = mpmkintr(p)) == 0) {
288                                         p += 8;
289                                         break;
290                                 }
291                                 if (MP_VERBOSE_DEBUG)
292                                         mpintrprint(NULL, p);
293
294                                 /*
295                                  * Always present the device number in the style
296                                  * of a PCI Interrupt Assignment Entry. For the ISA
297                                  * bus the IRQ is the device number but unencoded.
298                                  * May need to handle other buses here in the future
299                                  * (but unlikely).
300                                  *
301                                  * For PCI devices, this field's lowest two bits are INT#A == 0,
302                                  * INT#B == 1, etc.  Bits 2-6 are the PCI device number.
303                                  */
304                                 devno = p[5];
305                                 if (memcmp(mpbus[p[4]]->type, "PCI   ", 6) != 0)
306                                         devno <<= 2;
307                                 void ioapicintrinit(int busno, int apicno, int intin, int devno,
308                                                                         int lo);
309                                 ioapicintrinit(p[4], p[6], p[7], devno, lo);
310
311                                 p += 8;
312                                 break;
313                         case 4: /* LINTR */
314                                 /*
315                                  * Format is the same as IOINTR above.
316                                  */
317                                 if ((lo = mpmkintr(p)) == 0) {
318                                         p += 8;
319                                         break;
320                                 }
321                                 if (MP_VERBOSE_DEBUG)
322                                         mpintrprint("LINTR", p);
323
324                                 /*
325                                  * Everything was checked in mpmkintr above.
326                                  */
327                                 if (p[6] == 0xff) {
328                                         for (i = 0; i < Napic; i++) {
329                                                 if (!xlapic[i].useable || xlapic[i].addr)
330                                                         continue;
331                                                 xlapic[i].lvt[p[7]] = lo;
332                                         }
333                                 } else
334                                         xlapic[p[6]].lvt[p[7]] = lo;
335                                 p += 8;
336                                 break;
337                 }
338
339         /*
340          * There's nothing of interest in the extended table,
341          * but check it for consistency.
342          */
343         p = e;
344         e = p + l16get(pcmp->xlength);
345         while (p < e)
346                 switch (*p) {
347                         default:
348                                 n = p[1];
349                                 printd("mpparse: unknown extended entry %d length %d\n", *p, n);
350                                 for (i = 0; i < n; i++) {
351                                         if (i && ((i & 0x0f) == 0))
352                                                 printd("\n");
353                                         printd(" %#2.2ux", *p);
354                                         p++;
355                                 }
356                                 printd("\n");
357                                 break;
358                         case 128:
359                                 printk("address space mapping\n");
360                                 printk(" bus %d type %d base %#llux length %#llux\n",
361                                            p[2], p[3], l64get(p + 4), l64get(p + 12));
362                                 p += p[1];
363                                 break;
364                         case 129:
365                                 printk("bus hierarchy descriptor\n");
366                                 printk(" bus %d sd %d parent bus %d\n", p[2], p[3], p[4]);
367                                 p += p[1];
368                                 break;
369                         case 130:
370                                 printk("compatibility bus address space modifier\n");
371                                 printk(" bus %d pr %d range list %d\n",
372                                            p[2], p[3], l32get(p + 4));
373                                 p += p[1];
374                                 break;
375                 }
376         return maxcores;
377 }
378
379 static void *sigsearch(char *signature)
380 {
381         uintptr_t p;
382         uint8_t *bda;
383         void *r;
384 #if 0
385         /*
386          * Search for the data structure:
387          * 1) in the first KB of the EBDA;
388          * 2) in the last KB of system base memory;
389          * 3) in the BIOS ROM between 0xe0000 and 0xfffff.
390          */
391         bda = BIOSSEG(0x40);
392         if (memcmp(KADDR(0xfffd9), "EISA", 4) == 0) {
393                 if ((p = (bda[0x0f] << 8) | bda[0x0e])) {
394                         if ((r = sigscan(BIOSSEG(p), 1024, signature)) != NULL)
395                                 return r;
396                 }
397         }
398
399         p = ((bda[0x14] << 8) | bda[0x13]) * 1024;
400         if ((r = sigscan(KADDR(p - 1024), 1024, signature)) != NULL)
401                 return r;
402 #endif
403         r = sigscan(KADDR(0xe0000), 0x20000, signature);
404         printk("Found mp table at %p\n", r);
405         if (r != NULL)
406                 return r;
407
408         return NULL;
409         /* and virtualbox hidden mp tables... */
410 //  return sigscan(KADDR(0xa0000 - 1024), 1024, signature);
411 }
412
413 int mpsinit(int maxcores)
414 {
415         uint8_t *p;
416         int i, n, ncleft = 254;
417         _MP_ *mp;
418         PCMP *pcmp;
419
420         if ((mp = sigsearch("_MP_")) == NULL) {
421                 printk("No mp tables found, might have issues!\n");
422                 return ncleft;
423         }
424         /* TODO: if an IMCR exists, we should set it to 1, though i've heard that
425          * ACPI-capable HW doesn't have the IMCR anymore. */
426
427         if (MP_VERBOSE_DEBUG) {
428                 printk("_MP_ @ %#p, addr %p length %ud rev %d",
429                            mp, l32get(mp->addr), mp->length, mp->revision);
430                 for (i = 0; i < sizeof(mp->feature); i++)
431                         printk(" %2.2p", mp->feature[i]);
432                 printk("\n");
433         }
434         if (mp->revision != 1 && mp->revision != 4)
435                 return ncleft;
436         if (sigchecksum(mp, mp->length * 16) != 0)
437                 return ncleft;
438 #define vmap(x,y) KADDR((x))
439 #define vunmap(x,y)
440
441         if ((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == NULL)
442                 return ncleft;
443         if (pcmp->revision != 1 && pcmp->revision != 4) {
444                 return ncleft;
445         }
446         n = l16get(pcmp->length) + l16get(pcmp->xlength);
447         vunmap(pcmp, sizeof(PCMP));
448         if ((pcmp = vmap(l32get(mp->addr), n)) == NULL)
449                 return ncleft;
450         if (sigchecksum(pcmp, l16get(pcmp->length)) != 0) {
451                 vunmap(pcmp, n);
452                 return ncleft;
453         }
454         if (MP_VERBOSE_DEBUG) {
455                 printk("PCMP @ %#p length %p revision %d\n",
456                            pcmp, l16get(pcmp->length), pcmp->revision);
457                 printk(" %20.20s oaddr %p olength %p\n",
458                            (char *)pcmp->string, l32get(pcmp->oaddr),
459                            l16get(pcmp->olength));
460                 printk(" entry %d apicpa %p\n",
461                            l16get(pcmp->entry), l32get(pcmp->apicpa));
462
463                 printk(" xlength %p xchecksum %p\n",
464                            l16get(pcmp->xlength), pcmp->xchecksum);
465         }
466         if (pcmp->xchecksum != 0) {
467                 p = ((uint8_t *) pcmp) + l16get(pcmp->length);
468                 i = sigchecksum(p, l16get(pcmp->xlength));
469                 if (((i + pcmp->xchecksum) & 0xff) != 0) {
470                         printd("extended table checksums to %p\n", i);
471                         vunmap(pcmp, n);
472                         return ncleft;
473                 }
474         }
475
476         /*
477          * Parse the PCMP table and set up the datastructures
478          * for later interrupt enabling and application processor
479          * startup.
480          */
481         ncleft = mpparse(pcmp, maxcores);
482         return ncleft;
483 //  mpacpi(ncleft);
484
485 //  apicdump();
486 //  ioapicdump();
487 }