Cleans up vmap()
[akaros.git] / kern / arch / x86 / mp.c
1 /* This file is part of the UCB release of Plan 9. It is subject to the license
2  * terms in the LICENSE file found in the top-level directory of this
3  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
4  * part of the UCB release of Plan 9, including this file, may be copied,
5  * modified, propagated, or distributed except according to the terms contained
6  * in the LICENSE file. */
7
8 #include <vfs.h>
9 #include <kfs.h>
10 #include <slab.h>
11 #include <kmalloc.h>
12 #include <kref.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <assert.h>
16 #include <error.h>
17 #include <cpio.h>
18 #include <pmap.h>
19 #include <smp.h>
20 #include <ip.h>
21 #include <arch/mptables.h>
22 #include <arch/ioapic.h>
23
24 /*
25  * MultiProcessor Specification Version 1.[14].
26  */
27 typedef struct {                                /* MP Floating Pointer */
28         uint8_t signature[4];           /* "_MP_" */
29         uint8_t addr[4];                        /* PCMP */
30         uint8_t length;                         /* 1 */
31         uint8_t revision;                       /* [14] */
32         uint8_t checksum;
33         uint8_t feature[5];
34 } _MP_;
35
36 typedef struct {                                /* MP Configuration Table */
37         uint8_t signature[4];           /* "PCMP" */
38         uint8_t length[2];
39         uint8_t revision;                       /* [14] */
40         uint8_t checksum;
41         uint8_t string[20];                     /* OEM + Product ID */
42         uint8_t oaddr[4];                       /* OEM table pointer */
43         uint8_t olength[2];                     /* OEM table length */
44         uint8_t entry[2];                       /* entry count */
45         uint8_t apicpa[4];                      /* local APIC address */
46         uint8_t xlength[2];                     /* extended table length */
47         uint8_t xchecksum;                      /* extended table checksum */
48         uint8_t reserved;
49
50         uint8_t entries[];
51 } PCMP;
52
53 typedef struct {
54         char type[6];
55         int polarity;                           /* default for this bus */
56         int trigger;                            /* default for this bus */
57 } Mpbus;
58
59 static Mpbus mpbusdef[] = {
60         {"PCI   ", IPlow, TMlevel,},
61         {"ISA   ", IPhigh, TMedge,},
62 };
63
64 static Mpbus *mpbus[Nbus];
65 int mpisabusno = -1;
66 #define MP_VERBOSE_DEBUG 0
67
68 static void mpintrprint(char *s, uint8_t * p)
69 {
70         char buf[128], *b, *e;
71         char format[] = " type %d flags %p bus %d IRQ %d APIC %d INTIN %d\n";
72
73         b = buf;
74         e = b + sizeof(buf);
75 /* can't use seprintf yet!
76         b = seprintf(b, e, "mpparse: intr:");
77         if(s != NULL)
78                 b = seprintf(b, e, " %s:", s);
79         seprintf(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
80         printd(buf);
81 */
82         printk("mpparse: intr:");
83         if (s != NULL)
84                 printk(" %s:", s);
85         printk(format, p[1], l16get(p + 2), p[4], p[5], p[6], p[7]);
86 }
87
88 static uint32_t mpmkintr(uint8_t * p)
89 {
90         uint32_t v;
91         struct apic *apic;
92         int n, polarity, trigger;
93
94         /*
95          * Check valid bus, interrupt input pin polarity
96          * and trigger mode. If the APIC ID is 0xff it means
97          * all APICs of this type so those checks for useable
98          * APIC and valid INTIN must also be done later in
99          * the appropriate init routine in that case. It's hard
100          * to imagine routing a signal to all IOAPICs, the
101          * usual case is routing NMI and ExtINT to all LAPICs.
102          */
103         if (mpbus[p[4]] == NULL) {
104                 mpintrprint("no source bus", p);
105                 return 0;
106         }
107         if (p[6] != 0xff) {
108                 if (Napic < 256 && p[6] >= Napic) {
109                         mpintrprint("APIC ID out of range", p);
110                         return 0;
111                 }
112                 switch (p[0]) {
113                         default:
114                                 mpintrprint("INTIN botch", p);
115                                 return 0;
116                         case 3: /* IOINTR */
117                                 apic = &xioapic[p[6]];
118                                 if (!apic->useable) {
119                                         mpintrprint("unuseable ioapic", p);
120                                         return 0;
121                                 }
122                                 if (p[7] >= apic->nrdt) {
123                                         mpintrprint("IO INTIN out of range", p);
124                                         return 0;
125                                 }
126                                 break;
127                         case 4: /* LINTR */
128                                 apic = &xlapic[p[6]];
129                                 if (!apic->useable) {
130                                         mpintrprint("unuseable lapic", p);
131                                         return 0;
132                                 }
133                                 if (p[7] >= ARRAY_SIZE(apic->lvt)) {
134                                         mpintrprint("LOCAL INTIN out of range", p);
135                                         return 0;
136                                 }
137                                 break;
138                 }
139         }
140         n = l16get(p + 2);
141         if ((polarity = (n & 0x03)) == 2 || (trigger = ((n >> 2) & 0x03)) == 2) {
142                 mpintrprint("invalid polarity/trigger", p);
143                 return 0;
144         }
145
146         /*
147          * Create the low half of the vector table entry (LVT or RDT).
148          * For the NMI, SMI and ExtINT cases, the polarity and trigger
149          * are fixed (but are not always consistent over IA-32 generations).
150          * For the INT case, either the polarity/trigger are given or
151          * it defaults to that of the source bus;
152          * whether INT is Fixed or Lowest Priority is left until later.
153          */
154         v = Im;
155         switch (p[1]) {
156                 default:
157                         mpintrprint("invalid type", p);
158                         return 0;
159                 case 0: /* INT */
160                         switch (polarity) {
161                                 case 0:
162                                         v |= mpbus[p[4]]->polarity;
163                                         break;
164                                 case 1:
165                                         v |= IPhigh;
166                                         break;
167                                 case 3:
168                                         v |= IPlow;
169                                         break;
170                         }
171                         switch (trigger) {
172                                 case 0:
173                                         v |= mpbus[p[4]]->trigger;
174                                         break;
175                                 case 1:
176                                         v |= TMedge;
177                                         break;
178                                 case 3:
179                                         v |= TMlevel;
180                                         break;
181                         }
182                         break;
183                 case 1: /* NMI */
184                         v |= TMedge | IPhigh | MTnmi;
185                         break;
186                 case 2: /* SMI */
187                         v |= TMedge | IPhigh | MTsmi;
188                         break;
189                 case 3: /* ExtINT */
190                         v |= TMedge | IPhigh | MTei;
191                         break;
192         }
193
194         return v;
195 }
196
197 static int mpparse(PCMP * pcmp, int maxcores)
198 {
199         uint32_t lo;
200         uint8_t *e, *p;
201         int devno, i, n;
202
203         p = pcmp->entries;
204         e = ((uint8_t *) pcmp) + l16get(pcmp->length);
205         while (p < e)
206                 switch (*p) {
207                         default:
208                                 printd("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e - p);
209                                 for (i = 0; p < e; i++) {
210                                         if (i && ((i & 0x0f) == 0))
211                                                 printd("\n");
212                                         printd(" 0x%#2.2x", *p);
213                                         p++;
214                                 }
215                                 printd("\n");
216                                 break;
217                         case 0: /* processor */
218                                 /*
219                                  * Initialise the APIC if it is enabled (p[3] & 0x01).
220                                  * p[1] is the APIC ID, the memory mapped address comes
221                                  * from the PCMP structure as the addess is local to the
222                                  * CPU and identical for all. Indicate whether this is
223                                  * the bootstrap processor (p[3] & 0x02).
224                                  */
225                                 printd("mpparse: cpu %d pa %p bp %d\n",
226                                            p[1], l32get(pcmp->apicpa), p[3] & 0x02);
227                                 if ((p[3] & 0x01) != 0 && maxcores > 0) {
228                                         maxcores--;
229                                         apicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02);
230                                 }
231                                 p += 20;
232                                 break;
233                         case 1: /* bus */
234                                 printd("mpparse: bus: %d type %6.6s\n", p[1], (char *)p + 2);
235                                 if (p[1] >= Nbus) {
236                                         printk("mpparse: bus %d out of range\n", p[1]);
237                                         p += 8;
238                                         break;
239                                 }
240                                 if (mpbus[p[1]] != NULL) {
241                                         printk("mpparse: bus %d already allocated\n", p[1]);
242                                         p += 8;
243                                         break;
244                                 }
245                                 for (i = 0; i < ARRAY_SIZE(mpbusdef); i++) {
246                                         if (memcmp(p + 2, mpbusdef[i].type, 6) != 0)
247                                                 continue;
248                                         if (memcmp(p + 2, "ISA   ", 6) == 0) {
249                                                 if (mpisabusno != -1) {
250                                                         printk("mpparse: bus %d already have ISA bus %d\n",
251                                                                    p[1], mpisabusno);
252                                                         continue;
253                                                 }
254                                                 mpisabusno = p[1];
255                                         }
256                                         mpbus[p[1]] = &mpbusdef[i];
257                                         break;
258                                 }
259                                 if (mpbus[p[1]] == NULL)
260                                         printk("mpparse: bus %d type %6.6s unknown\n",
261                                                    p[1], (char *)p + 2);
262
263                                 p += 8;
264                                 break;
265                         case 2: /* IOAPIC */
266                                 /*
267                                  * Initialise the IOAPIC if it is enabled (p[3] & 0x01).
268                                  * p[1] is the APIC ID, p[4-7] is the memory mapped address.
269                                  */
270                                 if (p[3] & 0x01)
271                                         ioapicinit(p[1], -1, l32get(p + 4));
272
273                                 p += 8;
274                                 break;
275                         case 3: /* IOINTR */
276                                 /*
277                                  * p[1] is the interrupt type;
278                                  * p[2-3] contains the polarity and trigger mode;
279                                  * p[4] is the source bus;
280                                  * p[5] is the IRQ on the source bus;
281                                  * p[6] is the destination IOAPIC;
282                                  * p[7] is the INITIN pin on the destination IOAPIC.
283                                  */
284                                 if (p[6] == 0xff) {
285                                         mpintrprint("routed to all IOAPICs", p);
286                                         p += 8;
287                                         break;
288                                 }
289                                 if ((lo = mpmkintr(p)) == 0) {
290                                         if (MP_VERBOSE_DEBUG)
291                                                 mpintrprint("iointr skipped", p);
292                                         p += 8;
293                                         break;
294                                 }
295                                 if (MP_VERBOSE_DEBUG)
296                                         mpintrprint("iointr", p);
297
298                                 /*
299                                  * Always present the device number in the style
300                                  * of a PCI Interrupt Assignment Entry. For the ISA
301                                  * bus the IRQ is the device number but unencoded.
302                                  * May need to handle other buses here in the future
303                                  * (but unlikely).
304                                  *
305                                  * For PCI devices, this field's lowest two bits are INT#A == 0,
306                                  * INT#B == 1, etc.  Bits 2-6 are the PCI device number.
307                                  */
308                                 devno = p[5];
309                                 if (memcmp(mpbus[p[4]]->type, "PCI   ", 6) != 0)
310                                         devno <<= 2;
311                                 ioapicintrinit(p[4], p[6], p[7], devno, lo);
312
313                                 p += 8;
314                                 break;
315                         case 4: /* LINTR */
316                                 /*
317                                  * Format is the same as IOINTR above.
318                                  */
319                                 if ((lo = mpmkintr(p)) == 0) {
320                                         p += 8;
321                                         break;
322                                 }
323                                 if (MP_VERBOSE_DEBUG)
324                                         mpintrprint("LINTR", p);
325
326                                 /*
327                                  * Everything was checked in mpmkintr above.
328                                  */
329                                 if (p[6] == 0xff) {
330                                         for (i = 0; i < Napic; i++) {
331                                                 if (!xlapic[i].useable || xlapic[i].addr)
332                                                         continue;
333                                                 xlapic[i].lvt[p[7]] = lo;
334                                         }
335                                 } else
336                                         xlapic[p[6]].lvt[p[7]] = lo;
337                                 p += 8;
338                                 break;
339                 }
340
341         /*
342          * There's nothing of interest in the extended table,
343          * but check it for consistency.
344          */
345         p = e;
346         e = p + l16get(pcmp->xlength);
347         while (p < e)
348                 switch (*p) {
349                         default:
350                                 n = p[1];
351                                 printd("mpparse: unknown extended entry %d length %d\n", *p, n);
352                                 for (i = 0; i < n; i++) {
353                                         if (i && ((i & 0x0f) == 0))
354                                                 printd("\n");
355                                         printd(" %#2.2ux", *p);
356                                         p++;
357                                 }
358                                 printd("\n");
359                                 break;
360                         case 128:
361                                 printd("address space mapping\n");
362                                 printd(" bus %d type %d base %#llux length %#llux\n",
363                                            p[2], p[3], l64get(p + 4), l64get(p + 12));
364                                 p += p[1];
365                                 break;
366                         case 129:
367                                 printd("bus hierarchy descriptor\n");
368                                 printd(" bus %d sd %d parent bus %d\n", p[2], p[3], p[4]);
369                                 p += p[1];
370                                 break;
371                         case 130:
372                                 printd("compatibility bus address space modifier\n");
373                                 printd(" bus %d pr %d range list %d\n",
374                                            p[2], p[3], l32get(p + 4));
375                                 p += p[1];
376                                 break;
377                 }
378         return maxcores;
379 }
380
381 static void *sigsearch(char *signature)
382 {
383         uintptr_t p;
384         uint8_t *bda;
385         void *r;
386 #if 0
387         /*
388          * Search for the data structure:
389          * 1) in the first KB of the EBDA;
390          * 2) in the last KB of system base memory;
391          * 3) in the BIOS ROM between 0xe0000 and 0xfffff.
392          */
393         bda = BIOSSEG(0x40);
394         if (memcmp(KADDR(0xfffd9), "EISA", 4) == 0) {
395                 if ((p = (bda[0x0f] << 8) | bda[0x0e])) {
396                         if ((r = sigscan(BIOSSEG(p), 1024, signature)) != NULL)
397                                 return r;
398                 }
399         }
400
401         p = ((bda[0x14] << 8) | bda[0x13]) * 1024;
402         if ((r = sigscan(KADDR(p - 1024), 1024, signature)) != NULL)
403                 return r;
404 #endif
405         r = sigscan(KADDR(0xe0000), 0x20000, signature);
406         printk("Found MP table at %p\n", r);
407         if (r != NULL)
408                 return r;
409
410         return NULL;
411         /* and virtualbox hidden mp tables... */
412 //  return sigscan(KADDR(0xa0000 - 1024), 1024, signature);
413 }
414
415 int mpsinit(int maxcores)
416 {
417         uint8_t *p;
418         int i, n, ncleft = 254;
419         _MP_ *mp;
420         PCMP *pcmp;
421
422         if ((mp = sigsearch("_MP_")) == NULL) {
423                 printk("No mp tables found, might have issues!\n");
424                 return ncleft;
425         }
426         /* TODO: if an IMCR exists, we should set it to 1, though i've heard that
427          * ACPI-capable HW doesn't have the IMCR anymore. */
428
429         if (MP_VERBOSE_DEBUG) {
430                 printk("_MP_ @ %#p, addr %p length %ud rev %d",
431                            mp, l32get(mp->addr), mp->length, mp->revision);
432                 for (i = 0; i < sizeof(mp->feature); i++)
433                         printk(" %2.2p", mp->feature[i]);
434                 printk("\n");
435         }
436         if (mp->revision != 1 && mp->revision != 4)
437                 return ncleft;
438         if (sigchecksum(mp, mp->length * 16) != 0)
439                 return ncleft;
440         if ((pcmp = KADDR_NOCHECK(l32get(mp->addr))) == NULL)
441                 return ncleft;
442         if (pcmp->revision != 1 && pcmp->revision != 4) {
443                 return ncleft;
444         }
445         n = l16get(pcmp->length) + l16get(pcmp->xlength);
446         if ((pcmp = KADDR_NOCHECK(l32get(mp->addr))) == NULL)
447                 return ncleft;
448         if (sigchecksum(pcmp, l16get(pcmp->length)) != 0) {
449                 return ncleft;
450         }
451         if (MP_VERBOSE_DEBUG) {
452                 printk("PCMP @ %#p length %p revision %d\n",
453                            pcmp, l16get(pcmp->length), pcmp->revision);
454                 printk(" %20.20s oaddr %p olength %p\n",
455                            (char *)pcmp->string, l32get(pcmp->oaddr),
456                            l16get(pcmp->olength));
457                 printk(" entry %d apicpa %p\n",
458                            l16get(pcmp->entry), l32get(pcmp->apicpa));
459
460                 printk(" xlength %p xchecksum %p\n",
461                            l16get(pcmp->xlength), pcmp->xchecksum);
462         }
463         if (pcmp->xchecksum != 0) {
464                 p = ((uint8_t *) pcmp) + l16get(pcmp->length);
465                 i = sigchecksum(p, l16get(pcmp->xlength));
466                 if (((i + pcmp->xchecksum) & 0xff) != 0) {
467                         printd("extended table checksums to %p\n", i);
468                         return ncleft;
469                 }
470         }
471
472         /*
473          * Parse the PCMP table and set up the datastructures
474          * for later interrupt enabling and application processor
475          * startup.
476          */
477         ncleft = mpparse(pcmp, maxcores);
478         return ncleft;
479 //  mpacpi(ncleft);
480
481 //  apicdump();
482 //  ioapicdump();
483 }