return "(unknown trap)";
}
+/* Set stacktop for the current core to be the stack the kernel will start on
+ * when trapping/interrupting from userspace. Don't use this til after
+ * smp_percpu_init(). We can probably get the TSS by reading the task register
+ * and then the GDT. Still, it's a pain. */
+void set_stack_top(uintptr_t stacktop)
+{
+ struct per_cpu_info *pcpu = &per_cpu_info[core_id()];
+ /* No need to reload the task register, this takes effect immediately */
+ pcpu->tss->ts_esp0 = stacktop;
+ /* Also need to make sure sysenters come in correctly */
+ write_msr(MSR_IA32_SYSENTER_ESP, stacktop);
+}
-void
-idt_init(void)
+/* Note the check implies we only are on a one page stack (or the first page) */
+uintptr_t get_stack_top(void)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ uintptr_t stacktop;
+ /* so we can check this in interrupt handlers (before smp_boot()) */
+ if (!pcpui->tss)
+ return ROUNDUP(read_esp(), PGSIZE);
+ stacktop = pcpui->tss->ts_esp0;
+ if (stacktop != ROUNDUP(read_esp(), PGSIZE))
+ panic("Bad stacktop: %08p esp one is %08p\n", stacktop,
+ ROUNDUP(read_esp(), PGSIZE));
+ return stacktop;
+}
+
+/* Starts running the current TF, just using ret. */
+void pop_kernel_tf(struct trapframe *tf)
+{
+ asm volatile ("movl %1,%%esp; " /* move to future stack */
+ "pushl %2; " /* push cs */
+ "movl %0,%%esp; " /* move to TF */
+ "addl $0x20,%%esp; " /* move to tf_gs slot */
+ "movl %1,(%%esp); " /* write future esp */
+ "subl $0x20,%%esp; " /* move back to tf start */
+ "popal; " /* restore regs */
+ "popl %%esp; " /* set stack ptr */
+ "subl $0x4,%%esp; " /* jump down past CS */
+ "ret " /* return to the EIP */
+ :
+ : "g"(tf), "r"(tf->tf_esp), "r"(tf->tf_eip) : "memory");
+ panic("ret failed"); /* mostly to placate your mom */
+}
+
+void idt_init(void)
{
extern segdesc_t (RO gdt)[];
idt[T_SYSCALL].gd_type = SINIT(STS_TG32);
idt[T_BRKPT].gd_dpl = SINIT(3);
- // Setup a TSS so that we get the right stack
- // when we trap to the kernel.
- ts.ts_esp0 = SINIT(KSTACKTOP);
+ /* Setup a TSS so that we get the right stack when we trap to the kernel.
+ * We need to use the KVA for stacktop, and not the memlayout virtual
+ * address, so we can free it later (and check for other bugs). */
+ pte_t *pte = pgdir_walk(boot_pgdir, (void*)KSTACKTOP - PGSIZE, 0);
+ uintptr_t stacktop_kva = (uintptr_t)ppn2kva(PTE2PPN(*pte)) + PGSIZE;
+ ts.ts_esp0 = stacktop_kva;
ts.ts_ss0 = SINIT(GD_KD);
// Initialize the TSS field of the gdt.
mask_lapic_lvt(LAPIC_LVT_LINT0);
// and turn it on
lapic_enable();
+ /* register the generic timer_interrupt() handler for the per-core timers */
+ register_interrupt_handler(interrupt_handlers, LAPIC_TIMER_DEFAULT_VECTOR,
+ timer_interrupt, NULL);
}
void
case T_SYSCALL:
// check for userspace, for now
assert(tf->tf_cs != GD_KT);
-
- // syscall code wants an edible reference for current
- proc_incref(current, 1);
- tf->tf_regs.reg_eax =
- syscall(current, tf->tf_regs.reg_eax, tf->tf_regs.reg_edx,
- tf->tf_regs.reg_ecx, tf->tf_regs.reg_ebx,
- tf->tf_regs.reg_edi, tf->tf_regs.reg_esi);
- proc_decref(current, 1);
+ /* Set up and run the async calls */
+ prep_syscalls(current, (struct syscall*)tf->tf_regs.reg_eax,
+ tf->tf_regs.reg_edx);
break;
default:
// Unexpected trap: The user process or the kernel has a bug.
warn("Unexpected trap from userspace");
proc_incref(current, 1);
proc_destroy(current);
+ assert(0);
return;
}
}
// Here's where you'll restore FP/MMX/XMM regs
}
-void
-trap(trapframe_t *tf)
+/* Helper. For now, this copies out the TF to pcpui, and sets the tf to use it.
+ * Eventually, we ought to do this in trapentry.S */
+static void set_current_tf(struct per_cpu_info *pcpui, struct trapframe **tf)
{
- //printk("Incoming TRAP frame on core %d at %p\n", core_id(), tf);
+ pcpui->actual_tf = **tf;
+ pcpui->cur_tf = &pcpui->actual_tf;
+ *tf = &pcpui->actual_tf;
+}
- /* Note we are not preemptively saving the TF in the env_tf. We do maintain
- * a reference to it in current_tf (a per-cpu pointer).
- * In general, only save the tf and any silly state once you know it
- * is necessary (blocking). And only save it in env_tf when you know you
- * are single core (PROC_RUNNING_S) */
- set_current_tf(tf);
+void trap(struct trapframe *tf)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ /* Copy out the TF for now, set tf to point to it. */
+ if (!in_kernel(tf))
+ set_current_tf(pcpui, &tf);
+ printd("Incoming TRAP %d on core %d, TF at %p\n", tf->tf_trapno, core_id(),
+ tf);
if ((tf->tf_cs & ~3) != GD_UT && (tf->tf_cs & ~3) != GD_KT) {
print_trapframe(tf);
panic("Trapframe with invalid CS!");
}
-
- // Dispatch based on what type of trap occurred
trap_dispatch(tf);
-
- // Return to the current process, which should be runnable.
- proc_startcore(current, tf); // Note the comment in syscall.c
+ /* Return to the current process, which should be runnable. If we're the
+ * kernel, we should just return naturally. Note that current and tf need
+ * to still be okay (might not be after blocking) */
+ if (in_kernel(tf))
+ return; /* TODO: think about this, might want a helper instead. */
+ proc_restartcore();
+ assert(0);
}
-void
-irq_handler(trapframe_t *tf)
+void irq_handler(struct trapframe *tf)
{
- // save a per-core reference to the tf
- set_current_tf(tf);
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ /* Copy out the TF for now, set tf to point to it. */
+ if (!in_kernel(tf))
+ set_current_tf(pcpui, &tf);
+
//if (core_id())
- // cprintf("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, core_id());
- // merge this with alltraps? other than the EOI... or do the same in all traps
+ printd("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, core_id());
extern handler_wrapper_t (RO handler_wrappers)[NUM_HANDLER_WRAPPERS];
// All others are LAPIC (timer, IPIs, perf, non-ExtINT LINTS, etc)
// For now, only 235-255 are available
assert(tf->tf_trapno >= 32); // slows us down, but we should never have this
-
+
+#ifdef __CONFIG_ENABLE_MPTABLES__
+ /* TODO: this should be for any IOAPIC EOI, not just MPTABLES */
lapic_send_eoi();
-
- /*
+#else
//Old PIC relatd code. Should be gone for good, but leaving it just incase.
if (tf->tf_trapno < 48)
pic_send_eoi(tf->tf_trapno - PIC1_OFFSET);
else
lapic_send_eoi();
- */
-
+#endif
+ /* Return to the current process, which should be runnable. If we're the
+ * kernel, we should just return naturally. Note that current and tf need
+ * to still be okay (might not be after blocking) */
+ if (in_kernel(tf))
+ return; /* TODO: think about this, might want a helper instead. */
+ proc_restartcore();
+ assert(0);
}
void
table[int_num].data = data;
}
-void
-page_fault_handler(trapframe_t *tf)
+void page_fault_handler(struct trapframe *tf)
{
- uint32_t fault_va;
-
- // Read processor's CR2 register to find the faulting address
- fault_va = rcr2();
-
- // Handle kernel-mode page faults.
+ uint32_t fault_va = rcr2();
+ int prot = tf->tf_err & PF_ERROR_WRITE ? PROT_WRITE : PROT_READ;
+ int err;
- // TODO - one day, we'll want to handle this.
+ /* TODO - handle kernel page faults */
if ((tf->tf_cs & 3) == 0) {
print_trapframe(tf);
panic("Page Fault in the Kernel at 0x%08x!", fault_va);
}
-
- // We've already handled kernel-mode exceptions, so if we get here,
- // the page fault happened in user mode.
-
- // Call the environment's page fault upcall, if one exists. Set up a
- // page fault stack frame on the user exception stack (below
- // UXSTACKTOP), then branch to current->env_pgfault_upcall.
- //
- // The page fault upcall might cause another page fault, in which case
- // we branch to the page fault upcall recursively, pushing another
- // page fault stack frame on top of the user exception stack.
- //
- // The trap handler needs one word of scratch space at the top of the
- // trap-time stack in order to return. In the non-recursive case, we
- // don't have to worry about this because the top of the regular user
- // stack is free. In the recursive case, this means we have to leave
- // an extra word between the current top of the exception stack and
- // the new stack frame because the exception stack _is_ the trap-time
- // stack.
- //
- // If there's no page fault upcall, the environment didn't allocate a
- // page for its exception stack, or the exception stack overflows,
- // then destroy the environment that caused the fault.
- //
- // Hints:
- // user_mem_assert() and env_run() are useful here.
- // To change what the user environment runs, modify 'current->env_tf'
- // (the 'tf' variable points at 'current->env_tf').
-
- // LAB 4: Your code here.
-
- // TODO: compute correct access type
- if(handle_page_fault(current,fault_va,PROT_READ))
- {
- // Destroy the environment that caused the fault.
- cprintf("[%08x] user fault va %08x ip %08x from core %d\n",
- current->pid, fault_va, tf->tf_eip, core_id());
+ if ((err = handle_page_fault(current, fault_va, prot))) {
+ /* Destroy the faulting process */
+ printk("[%08x] user %s fault va %08x ip %08x on core %d with err %d\n",
+ current->pid, prot & PROT_READ ? "READ" : "WRITE", fault_va,
+ tf->tf_eip, core_id(), err);
print_trapframe(tf);
proc_incref(current, 1);
proc_destroy(current);
+ assert(0);
}
}
/* This is called from sysenter's asm, with the tf on the kernel stack. */
void sysenter_callwrapper(struct trapframe *tf)
{
- // save a per-core reference to the tf
- set_current_tf(tf);
-
- // syscall code wants an edible reference for current
- proc_incref(current, 1);
- tf->tf_regs.reg_eax = (intreg_t) syscall(current,
- tf->tf_regs.reg_eax,
- tf->tf_regs.reg_esi,
- tf->tf_regs.reg_ecx,
- tf->tf_regs.reg_ebx,
- tf->tf_regs.reg_edi,
- 0);
- proc_decref(current, 1);
- /*
- * careful here - we need to make sure that this current is the right
- * process, which could be weird if the syscall blocked. it would need to
- * restore the proper value in current before returning to here.
- * likewise, tf could be pointing to random gibberish.
- */
- proc_startcore(current, tf);
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ /* Copy out the TF for now, set tf to point to it. */
+ if (!in_kernel(tf))
+ set_current_tf(pcpui, &tf);
+
+ if (in_kernel(tf))
+ panic("sysenter from a kernel TF!!");
+ /* Set up and run the async calls */
+ prep_syscalls(current, (struct syscall*)tf->tf_regs.reg_eax,
+ tf->tf_regs.reg_esi);
+ proc_restartcore();
}
-struct kmem_cache *active_msg_cache;
-void active_msg_init(void)
+struct kmem_cache *kernel_msg_cache;
+void kernel_msg_init(void)
{
- active_msg_cache = kmem_cache_create("active_msgs",
- sizeof(struct active_message), HW_CACHE_ALIGN, 0, 0, 0);
+ kernel_msg_cache = kmem_cache_create("kernel_msgs",
+ sizeof(struct kernel_message), HW_CACHE_ALIGN, 0, 0, 0);
}
-uint32_t send_active_message(uint32_t dst, amr_t pc,
- TV(a0t) arg0, TV(a1t) arg1, TV(a2t) arg2)
+uint32_t send_kernel_message(uint32_t dst, amr_t pc, TV(a0t) arg0, TV(a1t) arg1,
+ TV(a2t) arg2, int type)
{
- active_message_t *a_msg;
+ kernel_message_t *k_msg;
assert(pc);
// note this will be freed on the destination core
- a_msg = (active_message_t *CT(1))TC(kmem_cache_alloc(active_msg_cache, 0));
- a_msg->srcid = core_id();
- a_msg->pc = pc;
- a_msg->arg0 = arg0;
- a_msg->arg1 = arg1;
- a_msg->arg2 = arg2;
- spin_lock_irqsave(&per_cpu_info[dst].amsg_lock);
- STAILQ_INSERT_TAIL(&per_cpu_info[dst].active_msgs, a_msg, link);
- spin_unlock_irqsave(&per_cpu_info[dst].amsg_lock);
- // since we touched memory the other core will touch (the lock), we don't
- // need an wmb_f()
- send_ipi(get_hw_coreid(dst), I_ACTIVE_MSG);
+ k_msg = (kernel_message_t *CT(1))TC(kmem_cache_alloc(kernel_msg_cache, 0));
+ k_msg->srcid = core_id();
+ k_msg->pc = pc;
+ k_msg->arg0 = arg0;
+ k_msg->arg1 = arg1;
+ k_msg->arg2 = arg2;
+ switch (type) {
+ case KMSG_IMMEDIATE:
+ spin_lock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
+ STAILQ_INSERT_TAIL(&per_cpu_info[dst].immed_amsgs, k_msg, link);
+ spin_unlock_irqsave(&per_cpu_info[dst].immed_amsg_lock);
+ break;
+ case KMSG_ROUTINE:
+ spin_lock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
+ STAILQ_INSERT_TAIL(&per_cpu_info[dst].routine_amsgs, k_msg, link);
+ spin_unlock_irqsave(&per_cpu_info[dst].routine_amsg_lock);
+ break;
+ default:
+ panic("Unknown type of kernel message!");
+ }
+ /* since we touched memory the other core will touch (the lock), we don't
+ * need an wmb_f() */
+ /* if we're sending a routine message locally, we don't want/need an IPI */
+ if ((dst != k_msg->srcid) || (type == KMSG_IMMEDIATE))
+ send_ipi(get_hw_coreid(dst), I_KERNEL_MSG);
return 0;
}
-/* Active message handler. We don't want to block other AMs from coming in, so
- * we'll copy out the message and let go of the lock. This won't return until
- * all pending AMs are executed. If the PC is 0, then this was an extra IPI and
- * we already handled the message (or someone is sending IPIs without loading
- * the active message...)
+/* Helper function. Returns 0 if the list was empty. */
+static kernel_message_t *get_next_amsg(struct kernel_msg_list *list_head,
+ spinlock_t *list_lock)
+{
+ kernel_message_t *k_msg;
+ spin_lock_irqsave(list_lock);
+ k_msg = STAILQ_FIRST(list_head);
+ if (k_msg)
+ STAILQ_REMOVE_HEAD(list_head, link);
+ spin_unlock_irqsave(list_lock);
+ return k_msg;
+}
+
+/* Kernel message handler. Extensive documentation is in
+ * Documentation/kernel_messages.txt.
+ *
+ * In general: this processes immediate messages, then routine messages.
+ * Routine messages might not return (__startcore, etc), so we need to be
+ * careful about a few things.
+ *
* Note that all of this happens from interrupt context, and interrupts are
* currently disabled for this gate. Interrupts need to be disabled so that the
- * self-ipi doesn't preempt the execution of this active message. */
-void __active_message(trapframe_t *tf)
+ * self-ipi doesn't preempt the execution of this kernel message. */
+void __kernel_message(struct trapframe *tf)
{
- per_cpu_info_t RO*myinfo = &per_cpu_info[core_id()];
- active_message_t my_msg, *a_msg;
+ per_cpu_info_t *myinfo = &per_cpu_info[core_id()];
+ kernel_message_t msg_cp, *k_msg;
+
+ /* Copy out the TF for now, set tf to point to it. */
+ if (!in_kernel(tf))
+ set_current_tf(myinfo, &tf);
lapic_send_eoi();
while (1) { // will break out when there are no more messages
- /* Get the message */
- spin_lock_irqsave(&myinfo->amsg_lock);
- a_msg = STAILQ_FIRST(&myinfo->active_msgs);
- /* No messages to execute, so break out, etc. */
- if (!a_msg) {
- spin_unlock_irqsave(&myinfo->amsg_lock);
+ /* Try to get an immediate message. Exec and free it. */
+ k_msg = get_next_amsg(&myinfo->immed_amsgs, &myinfo->immed_amsg_lock);
+ if (k_msg) {
+ assert(k_msg->pc);
+ k_msg->pc(tf, k_msg->srcid, k_msg->arg0, k_msg->arg1, k_msg->arg2);
+ kmem_cache_free(kernel_msg_cache, (void*)k_msg);
+ } else { // no immediate, might be a routine
+ if (in_kernel(tf))
+ return; // don't execute routine msgs if we were in the kernel
+ k_msg = get_next_amsg(&myinfo->routine_amsgs,
+ &myinfo->routine_amsg_lock);
+ if (!k_msg) // no routines either
+ return;
+ /* copy in, and then free, in case we don't return */
+ msg_cp = *k_msg;
+ kmem_cache_free(kernel_msg_cache, (void*)k_msg);
+ /* make sure an IPI is pending if we have more work */
+ /* techincally, we don't need to lock when checking */
+ if (!STAILQ_EMPTY(&myinfo->routine_amsgs) &&
+ !ipi_is_pending(I_KERNEL_MSG))
+ send_self_ipi(I_KERNEL_MSG);
+ /* Execute the kernel message */
+ assert(msg_cp.pc);
+ /* TODO: when batching syscalls, this should be reread from cur_tf*/
+ msg_cp.pc(tf, msg_cp.srcid, msg_cp.arg0, msg_cp.arg1, msg_cp.arg2);
+ }
+ }
+}
+
+/* Runs any outstanding routine kernel messages from within the kernel. Will
+ * make sure immediates still run first (or when they arrive, if processing a
+ * bunch of these messages). This will disable interrupts, and restore them to
+ * whatever state you left them. */
+void process_routine_kmsg(struct trapframe *tf)
+{
+ per_cpu_info_t *myinfo = &per_cpu_info[core_id()];
+ kernel_message_t msg_cp, *k_msg;
+ int8_t irq_state = 0;
+
+ disable_irqsave(&irq_state);
+ /* If we were told what our TF was, use that. o/w, go with current_tf. */
+ tf = tf ? tf : current_tf;
+ while (1) {
+ /* normally, we want ints disabled, so we don't have an empty self-ipi
+ * for every routine message. (imagine a long list of routines). But we
+ * do want immediates to run ahead of routines. This enabling should
+ * work (might not in some shitty VMs). Also note we can receive an
+ * extra self-ipi for routine messages before we turn off irqs again.
+ * Not a big deal, since we will process it right away.
+ * TODO: consider calling __kernel_message() here. */
+ if (!STAILQ_EMPTY(&myinfo->immed_amsgs)) {
+ enable_irq();
+ cpu_relax();
+ disable_irq();
+ }
+ k_msg = get_next_amsg(&myinfo->routine_amsgs,
+ &myinfo->routine_amsg_lock);
+ if (!k_msg) {
+ enable_irqsave(&irq_state);
return;
}
- STAILQ_REMOVE_HEAD(&myinfo->active_msgs, link);
- spin_unlock_irqsave(&myinfo->amsg_lock);
- // copy in, and then free, in case we don't return
- my_msg = *a_msg;
- kmem_cache_free(active_msg_cache, (void *CT(1))TC(a_msg));
- assert(my_msg.pc);
- /* In case the function doesn't return (which is common: __startcore,
- * __death, etc), there is a chance we could lose an amsg. We can only
- * have up to two interrupts outstanding, and if we never return, we
- * never deal with any other amsgs. This extra IPI hurts performance
- * but is only necessary if there is another outstanding message in the
- * buffer, but makes sure we never miss out on an amsg. */
- spin_lock_irqsave(&myinfo->amsg_lock);
- if (!STAILQ_EMPTY(&myinfo->active_msgs))
- send_self_ipi(I_ACTIVE_MSG);
- spin_unlock_irqsave(&myinfo->amsg_lock);
- /* Execute the active message */
- my_msg.pc(tf, my_msg.srcid, my_msg.arg0, my_msg.arg1, my_msg.arg2);
+ /* copy in, and then free, in case we don't return */
+ msg_cp = *k_msg;
+ kmem_cache_free(kernel_msg_cache, (void*)k_msg);
+ /* make sure an IPI is pending if we have more work */
+ if (!STAILQ_EMPTY(&myinfo->routine_amsgs) &&
+ !ipi_is_pending(I_KERNEL_MSG))
+ send_self_ipi(I_KERNEL_MSG);
+ /* Execute the kernel message */
+ assert(msg_cp.pc);
+ msg_cp.pc(tf, msg_cp.srcid, msg_cp.arg0, msg_cp.arg1, msg_cp.arg2);
}
}