2 * Copyright (c) 2009 The Regents of the University of California
3 * Barret Rhoden <brho@cs.berkeley.edu>
4 * See LICENSE for details.
13 #include <arch/arch.h>
15 #include <arch/console.h>
16 #include <arch/apic.h>
17 #include <arch/perfmon.h>
31 extern handler_wrapper_t (RO handler_wrappers)[NUM_HANDLER_WRAPPERS];
32 volatile uint32_t num_cpus = 0xee;
33 uintptr_t RO smp_stack_top;
34 barrier_t generic_barrier;
36 #define DECLARE_HANDLER_CHECKLISTS(vector) \
37 INIT_CHECKLIST(f##vector##_cpu_list, MAX_NUM_CPUS);
39 #define INIT_HANDLER_WRAPPER(v) \
41 handler_wrappers[(v)].vector = 0xf##v; \
42 handler_wrappers[(v)].cpu_list = &f##v##_cpu_list; \
43 handler_wrappers[(v)].cpu_list->mask.size = num_cpus; \
46 DECLARE_HANDLER_CHECKLISTS(0);
47 DECLARE_HANDLER_CHECKLISTS(1);
48 DECLARE_HANDLER_CHECKLISTS(2);
49 DECLARE_HANDLER_CHECKLISTS(3);
50 DECLARE_HANDLER_CHECKLISTS(4);
52 static void init_smp_call_function(void)
54 INIT_HANDLER_WRAPPER(0);
55 INIT_HANDLER_WRAPPER(1);
56 INIT_HANDLER_WRAPPER(2);
57 INIT_HANDLER_WRAPPER(3);
58 INIT_HANDLER_WRAPPER(4);
61 /******************************************************************************/
63 void smp_final_core_init(void)
65 #ifdef CONFIG_FAST_COREID
66 /* Need to bootstrap the rdtscp MSR with our OS coreid */
67 int coreid = get_os_coreid(hw_core_id());
68 write_msr(MSR_TSC_AUX, coreid);
70 /* Busted versions of qemu bug out here (32 bit) */
72 asm volatile ("rdtscp" : "=c"(rdtscp_ecx) : : "eax", "edx");
73 if (read_msr(MSR_TSC_AUX) != rdtscp_ecx) {
74 printk("Broken rdtscp detected! Rebuild without CONFIG_FAST_COREID\n");
77 /* note this panic may think it is not core 0, and core 0 might not have
78 * an issue (seems random) */
82 setup_default_mtrrs(&generic_barrier);
84 waiton_barrier(&generic_barrier);
87 // this needs to be set in smp_entry too...
88 #define trampoline_pg 0x00001000UL
89 extern char (SNT SREADONLY smp_entry)[];
90 extern char (SNT SREADONLY smp_entry_end)[];
91 extern char (SNT SREADONLY smp_boot_lock)[];
92 extern char (SNT SREADONLY smp_semaphore)[];
94 static inline uint16_t *get_smp_semaphore()
96 return (uint16_t *)(smp_semaphore - smp_entry + trampoline_pg);
99 static void __spin_bootlock_raw(void)
101 uint16_t *bootlock = (uint16_t*)(smp_boot_lock - smp_entry + trampoline_pg);
102 /* Same lock code as in smp_entry */
103 asm volatile ("movw $1, %%ax; "
107 "jne 1b;" : : "m"(*bootlock) : "eax", "cc", "memory");
110 /* hw_coreid_lookup will get packed, but keep it's hw values.
111 * os_coreid_lookup will remain sparse, but it's values will be consecutive.
112 * for both arrays, -1 means an empty slot. hw_step tracks the next valid entry
113 * in hw_coreid_lookup, jumping over gaps of -1's. */
114 static void smp_remap_coreids(void)
116 for (int i = 0, hw_step = 0; i < num_cpus; i++, hw_step++) {
117 if (hw_coreid_lookup[i] == -1) {
118 while (hw_coreid_lookup[hw_step] == -1) {
120 if (hw_step == MAX_NUM_CPUS)
121 panic("Mismatch in num_cpus and hw_step");
123 hw_coreid_lookup[i] = hw_coreid_lookup[hw_step];
124 hw_coreid_lookup[hw_step] = -1;
125 os_coreid_lookup[hw_step] = i;
132 /* set core0's mappings */
133 assert(lapic_get_id() == 0);
134 os_coreid_lookup[0] = 0;
135 hw_coreid_lookup[0] = 0;
138 // NEED TO GRAB A LOWMEM FREE PAGE FOR AP BOOTUP CODE
139 // page1 (2nd page) is reserved, hardcoded in pmap.c
140 memset(KADDR(trampoline_pg), 0, PGSIZE);
141 memcpy(KADDR(trampoline_pg), (void *COUNT(PGSIZE))TC(smp_entry),
142 smp_entry_end - smp_entry);
144 /* 64 bit already has the tramp pg mapped (1 GB of lowmem) */
145 #ifndef CONFIG_X86_64
146 // This mapping allows access to the trampoline with paging on and off
148 page_insert(boot_pgdir, pa2page(trampoline_pg), (void*SNT)trampoline_pg, PTE_W);
151 // Allocate a stack for the cores starting up. One for all, must share
152 if (kpage_alloc(&smp_stack))
153 panic("No memory for SMP boot stack!");
154 smp_stack_top = SINIT((uintptr_t)(page2kva(smp_stack) + PGSIZE));
156 // Start the IPI process (INIT, wait, SIPI, wait, SIPI, wait)
158 // SDM 3A is a little wonky wrt the proper delays. These are my best guess.
161 send_startup_ipi(0x01);
162 /* BOCHS does not like this second SIPI.
165 send_startup_ipi(0x01);
169 // Each core will also increment smp_semaphore, and decrement when it is done,
170 // all in smp_entry. It's purpose is to keep Core0 from competing for the
171 // smp_boot_lock. So long as one AP increments the sem before the final
172 // LAPIC timer goes off, all available cores will be initialized.
173 while (*get_smp_semaphore())
176 // From here on, no other cores are coming up. Grab the lock to ensure it.
177 // Another core could be in it's prelock phase and be trying to grab the lock
179 // The lock exists on the trampoline, so it can be grabbed right away in
180 // real mode. If core0 wins the race and blocks other CPUs from coming up
181 // it can crash the machine if the other cores are allowed to proceed with
182 // booting. Specifically, it's when they turn on paging and have that temp
183 // mapping pulled out from under them. Now, if a core loses, it will spin
184 // on the trampoline (which we must be careful to not deallocate)
185 __spin_bootlock_raw();
186 printk("Number of Cores Detected: %d\n", num_cpus);
187 #ifdef CONFIG_DISABLE_SMT
188 assert(!(num_cpus % 2));
189 printk("Using only %d Idlecores (SMT Disabled)\n", num_cpus >> 1);
190 #endif /* CONFIG_DISABLE_SMT */
193 /* cleans up the trampoline page, and any other low boot mem mappings */
194 x86_cleanup_bootmem();
195 // It had a refcount of 2 earlier, so we need to dec once more to free it
196 // but only if all cores are in (or we reset / reinit those that failed)
197 // TODO after we parse ACPI tables
198 if (num_cpus == 8) // TODO - ghetto coded for our 8 way SMPs
199 page_decref(pa2page(trampoline_pg));
200 // Dealloc the temp shared stack
201 page_decref(smp_stack);
203 // Set up the generic remote function call facility
204 init_smp_call_function();
206 /* Final core initialization */
207 init_barrier(&generic_barrier, num_cpus);
208 /* This will break the cores out of their hlt in smp_entry.S */
209 send_broadcast_ipi(254);
210 smp_final_core_init(); /* need to init ourselves as well */
213 /* This is called from smp_entry by each core to finish the core bootstrapping.
214 * There is a spinlock around this entire function in smp_entry, for a few
215 * reasons, the most important being that all cores use the same stack when
218 * Do not use per_cpu_info in here. Do whatever you need in smp_percpu_init().
220 uintptr_t smp_main(void)
223 // Print some diagnostics. Uncomment if there're issues.
224 cprintf("Good morning Vietnam!\n");
225 cprintf("This core's Default APIC ID: 0x%08x\n", lapic_get_default_id());
226 cprintf("This core's Current APIC ID: 0x%08x\n", lapic_get_id());
227 if (read_msr(IA32_APIC_BASE) & 0x00000100)
228 cprintf("I am the Boot Strap Processor\n");
230 cprintf("I am an Application Processor\n");
231 cprintf("Num_Cpus: %d\n\n", num_cpus);
233 /* set up initial mappings. core0 will adjust it later */
234 unsigned long my_hw_id = lapic_get_id();
235 os_coreid_lookup[my_hw_id] = my_hw_id;
236 hw_coreid_lookup[my_hw_id] = my_hw_id;
238 // Get a per-core kernel stack
239 uintptr_t my_stack_top = get_kstack();
241 /* This blob is the GDT, the GDT PD, and the TSS. */
242 unsigned int blob_size = sizeof(segdesc_t) * SEG_COUNT +
243 sizeof(pseudodesc_t) + sizeof(taskstate_t);
244 /* TODO: don't use kmalloc - might have issues in the future */
245 void *gdt_etc = kmalloc(blob_size, 0); /* we'll never free this btw */
246 taskstate_t *my_ts = gdt_etc;
247 pseudodesc_t *my_gdt_pd = (void*)my_ts + sizeof(taskstate_t);
248 segdesc_t *my_gdt = (void*)my_gdt_pd + sizeof(pseudodesc_t);
249 /* This is a bit ghetto: we need to communicate our GDT and TSS's location
250 * to smp_percpu_init(), but we can't trust our coreid (since they haven't
251 * been remapped yet (so we can't write it directly to per_cpu_info)). So
252 * we use the bottom of the stack page... */
253 *kstack_bottom_addr(my_stack_top) = (uintptr_t)gdt_etc;
255 // Build and load the gdt / gdt_pd
256 memcpy(my_gdt, gdt, sizeof(segdesc_t)*SEG_COUNT);
257 *my_gdt_pd = (pseudodesc_t) {
258 sizeof(segdesc_t)*SEG_COUNT - 1, (uintptr_t) my_gdt };
259 asm volatile("lgdt %0" : : "m"(*my_gdt_pd));
261 /* Set up our kernel stack when changing rings */
262 x86_set_stacktop_tss(my_ts, my_stack_top);
263 // Initialize the TSS field of my_gdt.
264 syssegdesc_t *ts_slot = (syssegdesc_t*)&my_gdt[GD_TSS >> 3];
265 *ts_slot = (syssegdesc_t)SEG_SYS_SMALL(STS_T32A, (uintptr_t)my_ts,
266 sizeof(taskstate_t), 0);
270 // Loads the same IDT used by the other cores
271 asm volatile("lidt %0" : : "m"(idt_pd));
274 // set LINT0 to receive ExtINTs (KVM's default). At reset they are 0x1000.
275 write_mmreg32(LAPIC_LVT_LINT0, 0x700);
276 // mask it to shut it up for now. Doesn't seem to matter yet, since both
277 // KVM and Bochs seem to only route the PIC to core0.
278 mask_lapic_lvt(LAPIC_LVT_LINT0);
279 // and then turn it on
282 // set a default logical id for now
283 lapic_set_logid(lapic_get_id());
285 return my_stack_top; // will be loaded in smp_entry.S
288 /* Perform any initialization needed by per_cpu_info. Make sure every core
289 * calls this at some point in the smp_boot process. If you don't smp_boot, you
290 * must still call this for core 0. This must NOT be called from smp_main,
291 * since it relies on the kernel stack pointer to find the gdt. Be careful not
292 * to call it on too deep of a stack frame. */
293 void __arch_pcpu_init(uint32_t coreid)
295 uintptr_t *my_stack_bot;
296 struct per_cpu_info *pcpui = &per_cpu_info[coreid];
298 /* Flushes any potentially old mappings from smp_boot() (note the page table
301 /* Ensure the FPU units are initialized */
302 asm volatile ("fninit");
304 /* Enable SSE instructions. We might have to do more, like masking certain
305 * flags or exceptions in the MXCSR, or at least handle the SIMD exceptions.
306 * We don't do it for FP yet either, so YMMV. */
307 lcr4(rcr4() | CR4_OSFXSR | CR4_OSXMME);
309 /* core 0 sets up via the global gdt symbol */
314 my_stack_bot = kstack_bottom_addr(ROUNDUP(read_sp() - 1, PGSIZE));
315 pcpui->tss = (taskstate_t*)(*my_stack_bot);
316 pcpui->gdt = (segdesc_t*)(*my_stack_bot +
317 sizeof(taskstate_t) + sizeof(pseudodesc_t));
320 /* Core 0 set up the base MSRs in entry64 */
322 assert(read_msr(MSR_GS_BASE) == (uint64_t)pcpui);
323 assert(read_msr(MSR_KERN_GS_BASE) == (uint64_t)pcpui);
325 write_msr(MSR_GS_BASE, (uint64_t)pcpui);
326 write_msr(MSR_KERN_GS_BASE, (uint64_t)pcpui);
329 /* Don't try setting up til after setting GS */
330 x86_sysenter_init(x86_get_stacktop_tss(pcpui->tss));
331 /* need to init perfctr before potentiall using it in timer handler */