9 #include <arch/console.h>
10 #include <arch/apic.h>
11 #include <ros/common.h>
25 /* Interrupt descriptor table. (Must be built at run time because
26 * shifted function addresses can't be represented in relocation records.)
28 // Aligned on an 8 byte boundary (SDM V3A 5-13)
29 gatedesc_t __attribute__ ((aligned (8))) (RO idt)[256] = { { 0 } };
30 pseudodesc_t RO idt_pd = {
31 sizeof(idt) - 1, (uint32_t) idt
34 /* global handler table, used by core0 (for now). allows the registration
35 * of functions to be called when servicing an interrupt. other cores
36 * can set up their own later.
39 #pragma cilnoremove("iht_lock")
42 handler_t TP(TV(t)) LCKD(&iht_lock) (RO interrupt_handlers)[NUM_INTERRUPT_HANDLERS];
44 static const char *NTS trapname(int trapno)
46 // zra: excnames is SREADONLY because Ivy doesn't trust const
47 static const char *NT const (RO excnames)[] = {
50 "Non-Maskable Interrupt",
53 "BOUND Range Exceeded",
55 "Device Not Available",
57 "Coprocessor Segment Overrun",
59 "Segment Not Present",
64 "x87 FPU Floating-Point Error",
67 "SIMD Floating-Point Exception"
70 if (trapno < sizeof(excnames)/sizeof(excnames[0]))
71 return excnames[trapno];
72 if (trapno == T_SYSCALL)
74 return "(unknown trap)";
81 extern segdesc_t (RO gdt)[];
83 // This table is made in trapentry.S by each macro in that file.
84 // It is layed out such that the ith entry is the ith's traphandler's
85 // (uint32_t) trap addr, then (uint32_t) trap number
86 struct trapinfo { uint32_t trapaddr; uint32_t trapnumber; };
87 extern struct trapinfo (BND(__this,trap_tbl_end) RO trap_tbl)[];
88 extern struct trapinfo (SNT RO trap_tbl_end)[];
89 int i, trap_tbl_size = trap_tbl_end - trap_tbl;
90 extern void ISR_default(void);
92 // set all to default, to catch everything
93 for(i = 0; i < 256; i++)
94 ROSETGATE(idt[i], 0, GD_KT, &ISR_default, 0);
96 // set all entries that have real trap handlers
97 // we need to stop short of the last one, since the last is the default
98 // handler with a fake interrupt number (500) that is out of bounds of
100 // if we set these to trap gates, be sure to handle the IRQs separately
101 // and we might need to break our pretty tables
102 for(i = 0; i < trap_tbl_size - 1; i++)
103 ROSETGATE(idt[trap_tbl[i].trapnumber], 0, GD_KT, trap_tbl[i].trapaddr, 0);
105 // turn on syscall handling and other user-accessible ints
106 // DPL 3 means this can be triggered by the int instruction
107 // STS_TG32 sets the IDT type to a Trap Gate (interrupts enabled)
108 idt[T_SYSCALL].gd_dpl = SINIT(3);
109 idt[T_SYSCALL].gd_type = SINIT(STS_TG32);
110 idt[T_BRKPT].gd_dpl = SINIT(3);
112 // Setup a TSS so that we get the right stack
113 // when we trap to the kernel.
114 ts.ts_esp0 = SINIT(KSTACKTOP);
115 ts.ts_ss0 = SINIT(GD_KD);
117 // Initialize the TSS field of the gdt.
118 SEG16ROINIT(gdt[GD_TSS >> 3],STS_T32A, (uint32_t)(&ts),sizeof(taskstate_t),0);
119 //gdt[GD_TSS >> 3] = (segdesc_t)SEG16(STS_T32A, (uint32_t) (&ts),
120 // sizeof(taskstate_t), 0);
121 gdt[GD_TSS >> 3].sd_s = SINIT(0);
127 asm volatile("lidt idt_pd");
129 // This will go away when we start using the IOAPIC properly
131 // set LINT0 to receive ExtINTs (KVM's default). At reset they are 0x1000.
132 write_mmreg32(LAPIC_LVT_LINT0, 0x700);
133 // mask it to shut it up for now
134 mask_lapic_lvt(LAPIC_LVT_LINT0);
140 print_regs(push_regs_t *regs)
142 cprintf(" edi 0x%08x\n", regs->reg_edi);
143 cprintf(" esi 0x%08x\n", regs->reg_esi);
144 cprintf(" ebp 0x%08x\n", regs->reg_ebp);
145 cprintf(" oesp 0x%08x\n", regs->reg_oesp);
146 cprintf(" ebx 0x%08x\n", regs->reg_ebx);
147 cprintf(" edx 0x%08x\n", regs->reg_edx);
148 cprintf(" ecx 0x%08x\n", regs->reg_ecx);
149 cprintf(" eax 0x%08x\n", regs->reg_eax);
153 print_trapframe(trapframe_t *tf)
155 static spinlock_t ptf_lock;
157 spin_lock_irqsave(&ptf_lock);
158 printk("TRAP frame at %p on core %d\n", tf, core_id());
159 print_regs(&tf->tf_regs);
160 printk(" gs 0x----%04x\n", tf->tf_gs);
161 printk(" fs 0x----%04x\n", tf->tf_fs);
162 printk(" es 0x----%04x\n", tf->tf_es);
163 printk(" ds 0x----%04x\n", tf->tf_ds);
164 printk(" trap 0x%08x %s\n", tf->tf_trapno, trapname(tf->tf_trapno));
165 printk(" err 0x%08x\n", tf->tf_err);
166 printk(" eip 0x%08x\n", tf->tf_eip);
167 printk(" cs 0x----%04x\n", tf->tf_cs);
168 printk(" flag 0x%08x\n", tf->tf_eflags);
169 /* Prevents us from thinking these mean something for nested interrupts. */
170 if (tf->tf_cs != GD_KT) {
171 printk(" esp 0x%08x\n", tf->tf_esp);
172 printk(" ss 0x----%04x\n", tf->tf_ss);
174 spin_unlock_irqsave(&ptf_lock);
178 trap_dispatch(trapframe_t *tf)
180 // Handle processor exceptions.
181 switch(tf->tf_trapno) {
186 page_fault_handler(tf);
189 // check for userspace, for now
190 assert(tf->tf_cs != GD_KT);
192 // syscall code wants an edible reference for current
193 proc_incref(current, 1);
194 tf->tf_regs.reg_eax =
195 syscall(current, tf->tf_regs.reg_eax, tf->tf_regs.reg_edx,
196 tf->tf_regs.reg_ecx, tf->tf_regs.reg_ebx,
197 tf->tf_regs.reg_edi, tf->tf_regs.reg_esi);
198 proc_decref(current, 1);
201 // Unexpected trap: The user process or the kernel has a bug.
203 if (tf->tf_cs == GD_KT)
204 panic("Damn Damn! Unhandled trap in the kernel!");
206 warn("Unexpected trap from userspace");
207 proc_incref(current, 1);
208 proc_destroy(current);
216 env_push_ancillary_state(env_t* e)
218 // TODO: (HSS) handle silly state (don't really want this per-process)
219 // Here's where you'll save FP/MMX/XMM regs
223 env_pop_ancillary_state(env_t* e)
225 // Here's where you'll restore FP/MMX/XMM regs
229 trap(trapframe_t *tf)
231 //printk("Incoming TRAP frame on core %d at %p\n", core_id(), tf);
233 /* Note we are not preemptively saving the TF in the env_tf. We do maintain
234 * a reference to it in current_tf (a per-cpu pointer).
235 * In general, only save the tf and any silly state once you know it
236 * is necessary (blocking). And only save it in env_tf when you know you
237 * are single core (PROC_RUNNING_S) */
240 if ((tf->tf_cs & ~3) != GD_UT && (tf->tf_cs & ~3) != GD_KT) {
242 panic("Trapframe with invalid CS!");
245 // Dispatch based on what type of trap occurred
248 // Return to the current process, which should be runnable.
249 proc_startcore(current, tf); // Note the comment in syscall.c
253 irq_handler(trapframe_t *tf)
255 // save a per-core reference to the tf
258 // cprintf("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, core_id());
259 // merge this with alltraps? other than the EOI... or do the same in all traps
261 extern handler_wrapper_t (RO handler_wrappers)[NUM_HANDLER_WRAPPERS];
263 // determine the interrupt handler table to use. for now, pick the global
264 handler_t TP(TV(t)) LCKD(&iht_lock) * handler_tbl = interrupt_handlers;
266 if (handler_tbl[tf->tf_trapno].isr != 0)
267 handler_tbl[tf->tf_trapno].isr(tf, handler_tbl[tf->tf_trapno].data);
268 // if we're a general purpose IPI function call, down the cpu_list
269 if ((I_SMP_CALL0 <= tf->tf_trapno) && (tf->tf_trapno <= I_SMP_CALL_LAST))
270 down_checklist(handler_wrappers[tf->tf_trapno & 0x0f].cpu_list);
272 // Send EOI. might want to do this in assembly, and possibly earlier
273 // This is set up to work with an old PIC for now
274 // Convention is that all IRQs between 32 and 47 are for the PIC.
275 // All others are LAPIC (timer, IPIs, perf, non-ExtINT LINTS, etc)
276 // For now, only 235-255 are available
277 assert(tf->tf_trapno >= 32); // slows us down, but we should never have this
282 //Old PIC relatd code. Should be gone for good, but leaving it just incase.
283 if (tf->tf_trapno < 48)
284 pic_send_eoi(tf->tf_trapno - PIC1_OFFSET);
292 register_interrupt_handler(handler_t TP(TV(t)) table[],
293 uint8_t int_num, poly_isr_t handler, TV(t) data)
295 table[int_num].isr = handler;
296 table[int_num].data = data;
300 page_fault_handler(trapframe_t *tf)
304 // Read processor's CR2 register to find the faulting address
307 // Handle kernel-mode page faults.
309 // TODO - one day, we'll want to handle this.
310 if ((tf->tf_cs & 3) == 0) {
312 panic("Page Fault in the Kernel at 0x%08x!", fault_va);
315 // We've already handled kernel-mode exceptions, so if we get here,
316 // the page fault happened in user mode.
318 // Call the environment's page fault upcall, if one exists. Set up a
319 // page fault stack frame on the user exception stack (below
320 // UXSTACKTOP), then branch to current->env_pgfault_upcall.
322 // The page fault upcall might cause another page fault, in which case
323 // we branch to the page fault upcall recursively, pushing another
324 // page fault stack frame on top of the user exception stack.
326 // The trap handler needs one word of scratch space at the top of the
327 // trap-time stack in order to return. In the non-recursive case, we
328 // don't have to worry about this because the top of the regular user
329 // stack is free. In the recursive case, this means we have to leave
330 // an extra word between the current top of the exception stack and
331 // the new stack frame because the exception stack _is_ the trap-time
334 // If there's no page fault upcall, the environment didn't allocate a
335 // page for its exception stack, or the exception stack overflows,
336 // then destroy the environment that caused the fault.
339 // user_mem_assert() and env_run() are useful here.
340 // To change what the user environment runs, modify 'current->env_tf'
341 // (the 'tf' variable points at 'current->env_tf').
343 // LAB 4: Your code here.
345 // TODO: compute correct access type
346 if(handle_page_fault(current,fault_va,PROT_READ))
348 // Destroy the environment that caused the fault.
349 cprintf("[%08x] user fault va %08x ip %08x from core %d\n",
350 current->pid, fault_va, tf->tf_eip, core_id());
352 proc_incref(current, 1);
353 proc_destroy(current);
357 void sysenter_init(void)
359 write_msr(MSR_IA32_SYSENTER_CS, GD_KT);
360 write_msr(MSR_IA32_SYSENTER_ESP, ts.ts_esp0);
361 write_msr(MSR_IA32_SYSENTER_EIP, (uint32_t) &sysenter_handler);
364 /* This is called from sysenter's asm, with the tf on the kernel stack. */
365 void sysenter_callwrapper(struct trapframe *tf)
367 // save a per-core reference to the tf
370 // syscall code wants an edible reference for current
371 proc_incref(current, 1);
372 tf->tf_regs.reg_eax = (intreg_t) syscall(current,
379 proc_decref(current, 1);
381 * careful here - we need to make sure that this current is the right
382 * process, which could be weird if the syscall blocked. it would need to
383 * restore the proper value in current before returning to here.
384 * likewise, tf could be pointing to random gibberish.
386 proc_startcore(current, tf);
389 struct kmem_cache *kernel_msg_cache;
390 void kernel_msg_init(void)
392 kernel_msg_cache = kmem_cache_create("kernel_msgs",
393 sizeof(struct kernel_message), HW_CACHE_ALIGN, 0, 0, 0);
396 uint32_t send_kernel_message(uint32_t dst, amr_t pc, TV(a0t) arg0, TV(a1t) arg1,
397 TV(a2t) arg2, int type)
399 kernel_message_t *k_msg;
401 // note this will be freed on the destination core
402 k_msg = (kernel_message_t *CT(1))TC(kmem_cache_alloc(kernel_msg_cache, 0));
403 k_msg->srcid = core_id();
410 spin_lock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
411 STAILQ_INSERT_TAIL(&per_cpu_info[dst].immed_amsgs, k_msg, link);
412 spin_unlock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
415 spin_lock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
416 STAILQ_INSERT_TAIL(&per_cpu_info[dst].routine_amsgs, k_msg, link);
417 spin_unlock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
420 panic("Unknown type of kernel message!");
422 // since we touched memory the other core will touch (the lock), we don't
424 send_ipi(get_hw_coreid(dst), I_KERNEL_MSG);
428 /* Helper function. Returns 0 if the list was empty. */
429 static kernel_message_t *get_next_amsg(struct kernel_msg_list *list_head,
430 spinlock_t *list_lock)
432 kernel_message_t *k_msg;
433 spin_lock_irqsave(list_lock);
434 k_msg = STAILQ_FIRST(list_head);
436 STAILQ_REMOVE_HEAD(list_head, link);
437 spin_unlock_irqsave(list_lock);
441 /* Kernel message handler. Extensive documentation is in
442 * Documentation/kernel_messages.txt.
444 * In general: this processes immediate messages, then routine messages.
445 * Routine messages might not return (__startcore, etc), so we need to be
446 * careful about a few things.
448 * Note that all of this happens from interrupt context, and interrupts are
449 * currently disabled for this gate. Interrupts need to be disabled so that the
450 * self-ipi doesn't preempt the execution of this kernel message. */
451 void __kernel_message(struct trapframe *tf)
453 per_cpu_info_t *myinfo = &per_cpu_info[core_id()];
454 kernel_message_t msg_cp, *k_msg;
457 while (1) { // will break out when there are no more messages
458 /* Try to get an immediate message. Exec and free it. */
459 k_msg = get_next_amsg(&myinfo->immed_amsgs, &myinfo->immed_amsg_lock);
462 k_msg->pc(tf, k_msg->srcid, k_msg->arg0, k_msg->arg1, k_msg->arg2);
463 kmem_cache_free(kernel_msg_cache, (void*)k_msg);
464 } else { // no immediate, might be a routine
466 return; // don't execute routine msgs if we were in the kernel
467 k_msg = get_next_amsg(&myinfo->routine_amsgs,
468 &myinfo->routine_amsg_lock);
469 if (!k_msg) // no routines either
471 /* copy in, and then free, in case we don't return */
473 kmem_cache_free(kernel_msg_cache, (void*)k_msg);
474 /* make sure an IPI is pending if we have more work */
475 /* techincally, we don't need to lock when checking */
476 if (!STAILQ_EMPTY(&myinfo->routine_amsgs) &&
477 !ipi_is_pending(I_KERNEL_MSG))
478 send_self_ipi(I_KERNEL_MSG);
479 /* Execute the kernel message */
481 msg_cp.pc(tf, msg_cp.srcid, msg_cp.arg0, msg_cp.arg1, msg_cp.arg2);
486 /* Runs any outstanding routine kernel messages from within the kernel. Will
487 * make sure immediates still run first (or when they arrive, if processing a
488 * bunch of these messages). This will disable interrupts, and restore them to
489 * whatever state you left them. */
490 void process_routine_kmsg(void)
492 per_cpu_info_t *myinfo = &per_cpu_info[core_id()];
493 kernel_message_t msg_cp, *k_msg;
494 int8_t irq_state = 0;
496 disable_irqsave(&irq_state);
498 /* normally, we want ints disabled, so we don't have an empty self-ipi
499 * for every routine message. (imagine a long list of routines). But we
500 * do want immediates to run ahead of routines. This enabling should
501 * work (might not in some shitty VMs). Also note we can receive an
502 * extra self-ipi for routine messages before we turn off irqs again.
503 * Not a big deal, since we will process it right away.
504 * TODO: consider calling __kernel_message() here. */
505 if (!STAILQ_EMPTY(&myinfo->immed_amsgs)) {
510 k_msg = get_next_amsg(&myinfo->routine_amsgs,
511 &myinfo->routine_amsg_lock);
513 enable_irqsave(&irq_state);
516 /* copy in, and then free, in case we don't return */
518 kmem_cache_free(kernel_msg_cache, (void*)k_msg);
519 /* make sure an IPI is pending if we have more work */
520 if (!STAILQ_EMPTY(&myinfo->routine_amsgs) &&
521 !ipi_is_pending(I_KERNEL_MSG))
522 send_self_ipi(I_KERNEL_MSG);
523 /* Execute the kernel message */
525 msg_cp.pc(0, msg_cp.srcid, msg_cp.arg0, msg_cp.arg1, msg_cp.arg2);