One final round of fixup/cleanup.
[akaros.git] / kern / arch / x86 / mp.c
1 #include <vfs.h>
2 #include <kfs.h>
3 #include <slab.h>
4 #include <kmalloc.h>
5 #include <kref.h>
6 #include <string.h>
7 #include <stdio.h>
8 #include <assert.h>
9 #include <error.h>
10 #include <cpio.h>
11 #include <pmap.h>
12 #include <smp.h>
13 #include <ip.h>
14
15 /*
16  * MultiProcessor Specification Version 1.[14].
17  */
18 typedef struct {                                /* MP Floating Pointer */
19         uint8_t signature[4];                   /* "_MP_" */
20         uint8_t addr[4];                        /* PCMP */
21         uint8_t length;                         /* 1 */
22         uint8_t revision;                       /* [14] */
23         uint8_t checksum;
24         uint8_t feature[5];
25 } _MP_;
26
27 typedef struct {                                /* MP Configuration Table */
28         uint8_t signature[4];                   /* "PCMP" */
29         uint8_t length[2];
30         uint8_t revision;                       /* [14] */
31         uint8_t checksum;
32         uint8_t string[20];                     /* OEM + Product ID */
33         uint8_t oaddr[4];                       /* OEM table pointer */
34         uint8_t olength[2];                     /* OEM table length */
35         uint8_t entry[2];                       /* entry count */
36         uint8_t apicpa[4];                      /* local APIC address */
37         uint8_t xlength[2];                     /* extended table length */
38         uint8_t xchecksum;                      /* extended table checksum */
39         uint8_t reserved;
40
41         uint8_t entries[];
42 } PCMP;
43
44 typedef struct {
45         char    type[6];
46         int     polarity;                       /* default for this bus */
47         int     trigger;                        /* default for this bus */
48 } Mpbus;
49
50 static Mpbus mpbusdef[] = {
51         { "PCI   ", IPlow, TMlevel, },
52         { "ISA   ", IPhigh, TMedge, },
53 };
54 static Mpbus* mpbus[Nbus];
55 int mpisabusno = -1;
56
57 static void
58 mpintrprint(char* s, uint8_t* p)
59 {
60         char buf[128], *b, *e;
61         char format[] = " type %d flags %p bus %d IRQ %d APIC %d INTIN %d\n";
62
63         b = buf;
64         e = b + sizeof(buf);
65 /* can't use seprintf yet!
66         b = seprintf(b, e, "mpparse: intr:");
67         if(s != NULL)
68                 b = seprintf(b, e, " %s:", s);
69         seprintf(b, e, format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
70         printd(buf);
71 */
72         printk("mpparse: intr:");
73         if(s != NULL)
74                 printk(" %s:", s);
75         printk(format, p[1], l16get(p+2), p[4], p[5], p[6], p[7]);
76 }
77
78 static uint32_t
79 mpmkintr(uint8_t* p)
80 {
81         uint32_t v;
82         struct apic *apic;
83         int n, polarity, trigger;
84
85         /*
86          * Check valid bus, interrupt input pin polarity
87          * and trigger mode. If the APIC ID is 0xff it means
88          * all APICs of this type so those checks for useable
89          * APIC and valid INTIN must also be done later in
90          * the appropriate init routine in that case. It's hard
91          * to imagine routing a signal to all IOAPICs, the
92          * usual case is routing NMI and ExtINT to all LAPICs.
93          */
94         if(mpbus[p[4]] == NULL){
95                 mpintrprint("no source bus", p);
96                 return 0;
97         }
98         if(p[6] != 0xff){
99                 if(Napic < 256 && p[6] >= Napic){
100                         mpintrprint("APIC ID out of range", p);
101                         return 0;
102                 }
103                 switch(p[0]){
104                 default:
105                         mpintrprint("INTIN botch", p);
106                         return 0;
107                 case 3:                         /* IOINTR */
108                         apic = &xioapic[p[6]];
109                         if(!apic->useable){
110                                 mpintrprint("unuseable ioapic", p);
111                                 return 0;
112                         }
113                         if(p[7] >= apic->nrdt){
114                                 mpintrprint("IO INTIN out of range", p);
115                                 return 0;
116                         }
117                         break;
118                 case 4:                         /* LINTR */
119                         apic = &xlapic[p[6]];
120                         if(!apic->useable){
121                                 mpintrprint("unuseable lapic", p);
122                                 return 0;
123                         }
124                         if(p[7] >= ARRAY_SIZE(apic->lvt)){
125                                 mpintrprint("LOCAL INTIN out of range", p);
126                                 return 0;
127                         }
128                         break;
129                 }
130         }
131         n = l16get(p+2);
132         if((polarity = (n & 0x03)) == 2 || (trigger = ((n>>2) & 0x03)) == 2){
133                 mpintrprint("invalid polarity/trigger", p);
134                 return 0;
135         }
136
137         /*
138          * Create the low half of the vector table entry (LVT or RDT).
139          * For the NMI, SMI and ExtINT cases, the polarity and trigger
140          * are fixed (but are not always consistent over IA-32 generations).
141          * For the INT case, either the polarity/trigger are given or
142          * it defaults to that of the source bus;
143          * whether INT is Fixed or Lowest Priority is left until later.
144          */
145         v = Im;
146         switch(p[1]){
147         default:
148                 mpintrprint("invalid type", p);
149                 return 0;
150         case 0:                                 /* INT */
151                 switch(polarity){
152                 case 0:
153                         v |= mpbus[p[4]]->polarity;
154                         break;
155                 case 1:
156                         v |= IPhigh;
157                         break;
158                 case 3:
159                         v |= IPlow;
160                         break;
161                 }
162                 switch(trigger){
163                 case 0:
164                         v |= mpbus[p[4]]->trigger;
165                         break;
166                 case 1:
167                         v |= TMedge;
168                         break;
169                 case 3:
170                         v |= TMlevel;
171                         break;
172                 }
173                 break;
174         case 1:                                 /* NMI */
175                 v |= TMedge|IPhigh|MTnmi;
176                 break;
177         case 2:                                 /* SMI */
178                 v |= TMedge|IPhigh|MTsmi;
179                 break;
180         case 3:                                 /* ExtINT */
181                 v |= TMedge|IPhigh|MTei;
182                 break;
183         }
184
185         return v;
186 }
187
188 static int
189 mpparse(PCMP* pcmp, int maxcores)
190 {
191         uint32_t lo;
192         uint8_t *e, *p;
193         int devno, i, n;
194
195         p = pcmp->entries;
196         e = (( uint8_t *)pcmp)+l16get(pcmp->length);
197         while(p < e) switch(*p){
198         default:
199                 printd("mpparse: unknown PCMP type %d (e-p %#ld)\n", *p, e-p);
200                 for(i = 0; p < e; i++){
201                         if(i && ((i & 0x0f) == 0))
202                                 printd("\n");
203                         printd(" %#2.2ux", *p);
204                         p++;
205                 }
206                 printd("\n");
207                 break;
208         case 0:                                 /* processor */
209                 /*
210                  * Initialise the APIC if it is enabled (p[3] & 0x01).
211                  * p[1] is the APIC ID, the memory mapped address comes
212                  * from the PCMP structure as the addess is local to the
213                  * CPU and identical for all. Indicate whether this is
214                  * the bootstrap processor (p[3] & 0x02).
215                  */
216                 printk("mpparse: cpu %d pa %p bp %d\n",
217                         p[1], l32get(pcmp->apicpa), p[3] & 0x02);
218                 if((p[3] & 0x01) != 0 && maxcores > 0){
219                                 maxcores--;
220                                 apicinit(p[1], l32get(pcmp->apicpa), p[3] & 0x02);
221                 }
222                 p += 20;
223                 break;
224         case 1:                                 /* bus */
225                 printk("mpparse: bus: %d type %6.6s\n", p[1], ( char *)p+2);
226                 if(p[1] >= Nbus){
227                         printd("mpparse: bus %d out of range\n", p[1]);
228                         p += 8;
229                         break;
230                 }
231                 if(mpbus[p[1]] != NULL){
232                         printd("mpparse: bus %d already allocated\n", p[1]);
233                         p += 8;
234                         break;
235                 }
236                 for(i = 0; i < ARRAY_SIZE(mpbusdef); i++){
237                         if(memcmp(p+2, mpbusdef[i].type, 6) != 0)
238                                 continue;
239                         if(memcmp(p+2, "ISA   ", 6) == 0){
240                                 if(mpisabusno != -1){
241                                         printd("mpparse: bus %d already have ISA bus %d\n",
242                                                 p[1], mpisabusno);
243                                         continue;
244                                 }
245                                 mpisabusno = p[1];
246                         }
247                         mpbus[p[1]] = &mpbusdef[i];
248                         break;
249                 }
250                 if(mpbus[p[1]] == NULL)
251                         printd("mpparse: bus %d type %6.6s unknown\n",
252                                 p[1], ( char *unused_char_p_t)p+2);
253
254                 p += 8;
255                 break;
256         case 2:                                 /* IOAPIC */
257                 /*
258                  * Initialise the IOAPIC if it is enabled (p[3] & 0x01).
259                  * p[1] is the APIC ID, p[4-7] is the memory mapped address.
260                  */
261                 if(p[3] & 0x01)
262                         ioapicinit(p[1], -1, l32get(p+4));
263
264                 p += 8;
265                 break;
266         case 3:                                 /* IOINTR */
267                 /*
268                  * p[1] is the interrupt type;
269                  * p[2-3] contains the polarity and trigger mode;
270                  * p[4] is the source bus;
271                  * p[5] is the IRQ on the source bus;
272                  * p[6] is the destination APIC;
273                  * p[7] is the INITIN pin on the destination APIC.
274                  */
275                 if(p[6] == 0xff){
276                         mpintrprint("routed to all IOAPICs", p);
277                         p += 8;
278                         break;
279                 }
280                 if((lo = mpmkintr(p)) == 0){
281                         p += 8;
282                         break;
283                 }
284                 if(2)
285                         mpintrprint(NULL, p);
286
287                 /*
288                  * Always present the device number in the style
289                  * of a PCI Interrupt Assignment Entry. For the ISA
290                  * bus the IRQ is the device number but unencoded.
291                  * May need to handle other buses here in the future
292                  * (but unlikely).
293                  */
294                 devno = p[5];
295                 if(memcmp(mpbus[p[4]]->type, "PCI   ", 6) != 0)
296                         devno <<= 2;
297         void
298                 ioapicintrinit(int busno, int apicno, int intin, int devno, int lo);
299         ioapicintrinit(p[4], p[6], p[7], devno, lo);
300
301                 p += 8;
302                 break;
303         case 4:                                 /* LINTR */
304                 /*
305                  * Format is the same as IOINTR above.
306                  */
307                 if((lo = mpmkintr(p)) == 0){
308                         p += 8;
309                         break;
310                 }
311                 if(2)
312                         mpintrprint(NULL, p);
313
314                 /*
315                  * Everything was checked in mpmkintr above.
316                  */
317                 if(p[6] == 0xff){
318                         for(i = 0; i < Napic; i++){
319                                 if(!xlapic[i].useable || xlapic[i].addr != NULL)
320                                         continue;
321                                 xlapic[i].lvt[p[7]] = lo;
322                         }
323                 }
324                 else
325                         xlapic[p[6]].lvt[p[7]] = lo;
326                 p += 8;
327                 break;
328         }
329
330         /*
331          * There's nothing of interest in the extended table,
332          * but check it for consistency.
333          */
334         p = e;
335         e = p + l16get(pcmp->xlength);
336         while(p < e) switch(*p){
337         default:
338                 n = p[1];
339                 printd("mpparse: unknown extended entry %d length %d\n", *p, n);
340                 for(i = 0; i < n; i++){
341                         if(i && ((i & 0x0f) == 0))
342                                 printd("\n");
343                         printd(" %#2.2ux", *p);
344                         p++;
345                 }
346                 printd("\n");
347                 break;
348         case 128:
349                 printk("address space mapping\n");
350                 printk(" bus %d type %d base %#llux length %#llux\n",
351                         p[2], p[3], l64get(p+4), l64get(p+12));
352                 p += p[1];
353                 break;
354         case 129:
355                 printk("bus hierarchy descriptor\n");
356                 printk(" bus %d sd %d parent bus %d\n",
357                         p[2], p[3], p[4]);
358                 p += p[1];
359                 break;
360         case 130:
361                 printk("compatibility bus address space modifier\n");
362                 printk(" bus %d pr %d range list %d\n",
363                         p[2], p[3], l32get(p+4));
364                 p += p[1];
365                 break;
366         }
367         return maxcores;
368 }
369
370 static int
371 sigchecksum(void* address, int length)
372 {
373         uint8_t *p, sum;
374
375         sum = 0;
376         for(p = address; length-- > 0; p++)
377                 sum += *p;
378
379         return sum;
380 }
381
382 static void*
383 sigscan(uint8_t* address, int length, char* signature)
384 {
385         uint8_t *e, *p;
386         int siglength;
387
388         e = address+length;
389         siglength = strlen(signature);
390         for(p = address; p+siglength < e; p += 16){
391                 if(memcmp(p, signature, siglength))
392                         continue;
393                 return p;
394         }
395
396         return NULL;
397 }
398
399 static void*
400 sigsearch(char* signature)
401 {
402         uintptr_t p;
403         uint8_t *bda;
404         void *r;
405 #if 0
406         /*
407          * Search for the data structure:
408          * 1) in the first KB of the EBDA;
409          * 2) in the last KB of system base memory;
410          * 3) in the BIOS ROM between 0xe0000 and 0xfffff.
411          */
412         bda = BIOSSEG(0x40);
413         if(memcmp(KADDR(0xfffd9), "EISA", 4) == 0){
414                 if((p = (bda[0x0f]<<8)|bda[0x0e])){
415                         if((r = sigscan(BIOSSEG(p), 1024, signature)) != NULL)
416                                 return r;
417                 }
418         }
419
420         p = ((bda[0x14]<<8)|bda[0x13])*1024;
421         if((r = sigscan(KADDR(p-1024), 1024, signature)) != NULL)
422                 return r;
423 #endif
424         r = sigscan(KADDR(0xe0000), 0x20000, signature);
425         printk("Found mp table at %p\n", r);
426         if(r != NULL)
427                 return r;
428
429         return NULL;
430         /* and virtualbox hidden mp tables... */
431 //      return sigscan(KADDR(0xa0000 - 1024), 1024, signature);
432 }
433
434 int
435 mpsinit(int maxcores)
436 {
437         uint8_t *p;
438         int i, n, ncleft = 254;
439         _MP_ *mp;
440         PCMP *pcmp;
441
442         if((mp = sigsearch("_MP_")) == NULL){
443                 printd("no mp tables\n");
444                 return ncleft;
445         }
446
447         if(2){
448                 printk("_MP_ @ %#p, addr %p length %ud rev %d",
449                         mp, l32get(mp->addr), mp->length, mp->revision);
450                 for(i = 0; i < sizeof(mp->feature); i++)
451                         printk(" %2.2p", mp->feature[i]);
452                 printk("\n");
453         }
454         if(mp->revision != 1 && mp->revision != 4)
455                 return ncleft;
456         if(sigchecksum(mp, mp->length*16) != 0)
457                 return ncleft;
458 #define vmap(x,y) KADDR((x))
459 #define vunmap(x,y) 
460
461         if((pcmp = vmap(l32get(mp->addr), sizeof(PCMP))) == NULL)
462                 return ncleft;
463         if(pcmp->revision != 1 && pcmp->revision != 4){
464                 return ncleft;
465         }
466         n = l16get(pcmp->length) + l16get(pcmp->xlength);
467         vunmap(pcmp, sizeof(PCMP));
468         if((pcmp = vmap(l32get(mp->addr), n)) == NULL)
469                 return ncleft;
470         if(sigchecksum(pcmp, l16get(pcmp->length)) != 0){
471                 vunmap(pcmp, n);
472                 return ncleft;
473         }
474         if(2){
475                 printk("PCMP @ %#p length %p revision %d\n",
476                         pcmp, l16get(pcmp->length), pcmp->revision);
477                 printk(" %20.20s oaddr %p olength %p\n",
478                         ( char *)pcmp->string, l32get(pcmp->oaddr),
479                         l16get(pcmp->olength));
480                 printk(" entry %d apicpa %p\n",
481                         l16get(pcmp->entry), l32get(pcmp->apicpa));
482
483                 printk(" xlength %p xchecksum %p\n",
484                         l16get(pcmp->xlength), pcmp->xchecksum);
485         }
486         if(pcmp->xchecksum != 0){
487                 p = (( uint8_t *)pcmp) + l16get(pcmp->length);
488                 i = sigchecksum(p, l16get(pcmp->xlength));
489                 if(((i+pcmp->xchecksum) & 0xff) != 0){
490                         printd("extended table checksums to %p\n", i);
491                         vunmap(pcmp, n);
492                         return ncleft;
493                 }
494         }
495
496         /*
497          * Parse the PCMP table and set up the datastructures
498          * for later interrupt enabling and application processor
499          * startup.
500          */
501         ncleft = mpparse(pcmp, maxcores);
502         return ncleft;
503 //      mpacpi(ncleft);
504
505 //      apicdump();
506 //      ioapicdump();
507 }