Proc data structure management, env gutting
[akaros.git] / kern / arch / i386 / trap.c
index 35a458d..b865fe7 100644 (file)
@@ -1,5 +1,6 @@
 #ifdef __SHARC__
 #ifdef __SHARC__
-//#pragma nosharc
+#pragma nosharc
+#define SINIT(x) x
 #endif
 
 #include <arch/mmu.h>
 #endif
 
 #include <arch/mmu.h>
@@ -15,7 +16,7 @@
 #include <monitor.h>
 #include <process.h>
 #include <stdio.h>
 #include <monitor.h>
 #include <process.h>
 #include <stdio.h>
-
+#include <slab.h>
 #include <syscall.h>
 
 taskstate_t RO ts;
 #include <syscall.h>
 
 taskstate_t RO ts;
@@ -183,9 +184,8 @@ trap_dispatch(trapframe_t *tf)
                case T_SYSCALL:
                        // check for userspace, for now
                        assert(tf->tf_cs != GD_KT);
                case T_SYSCALL:
                        // check for userspace, for now
                        assert(tf->tf_cs != GD_KT);
-                       // Note we pass the tf ptr along, in case syscall needs to block
                        tf->tf_regs.reg_eax =
                        tf->tf_regs.reg_eax =
-                               syscall(current, tf, tf->tf_regs.reg_eax, tf->tf_regs.reg_edx,
+                               syscall(current, tf->tf_regs.reg_eax, tf->tf_regs.reg_edx,
                                        tf->tf_regs.reg_ecx, tf->tf_regs.reg_ebx,
                                        tf->tf_regs.reg_edi, tf->tf_regs.reg_esi);
                        proc_startcore(current, tf); // Note the comment in syscall.c
                                        tf->tf_regs.reg_ecx, tf->tf_regs.reg_ebx,
                                        tf->tf_regs.reg_edi, tf->tf_regs.reg_esi);
                        proc_startcore(current, tf); // Note the comment in syscall.c
@@ -207,6 +207,7 @@ trap_dispatch(trapframe_t *tf)
 void
 env_push_ancillary_state(env_t* e)
 {
 void
 env_push_ancillary_state(env_t* e)
 {
+       // TODO: (HSS) handle silly state (don't really want this per-process)
        // Here's where you'll save FP/MMX/XMM regs
 }
 
        // Here's where you'll save FP/MMX/XMM regs
 }
 
@@ -221,22 +222,18 @@ trap(trapframe_t *tf)
 {
        //printk("Incoming TRAP frame on core %d at %p\n", core_id(), tf);
 
 {
        //printk("Incoming TRAP frame on core %d at %p\n", core_id(), tf);
 
-       // TODO: do this once we know we are are not returning to the current
-       // context.  doing it now is safe. (HSS)
-       // we also need to sort this wrt multiple contexts
-       env_push_ancillary_state(current);
+       /* Note we are not preemptively saving the TF in the env_tf.  We do maintain
+        * a reference to it in current_tf (a per-cpu pointer).
+        * In general, only save the tf and any silly state once you know it
+        * is necessary (blocking).  And only save it in env_tf when you know you
+        * are single core (PROC_RUNNING_S) */
+       set_current_tf(tf);
 
        if ((tf->tf_cs & ~3) != GD_UT && (tf->tf_cs & ~3) != GD_KT) {
                print_trapframe(tf);
                panic("Trapframe with invalid CS!");
        }
 
 
        if ((tf->tf_cs & ~3) != GD_UT && (tf->tf_cs & ~3) != GD_KT) {
                print_trapframe(tf);
                panic("Trapframe with invalid CS!");
        }
 
-       /* If we're vcore0, save the trapframe in the proc's env_tf.  make sure
-        * silly state is sorted (HSS). This applies to any RUNNING_* state. */
-       if (current->vcoremap[0] == core_id()) {
-               current->env_tf = *tf;
-               tf = &current->env_tf;
-       }
        // Dispatch based on what type of trap occurred
        trap_dispatch(tf);
 
        // Dispatch based on what type of trap occurred
        trap_dispatch(tf);
 
@@ -252,14 +249,12 @@ trap(trapframe_t *tf)
 void
 irq_handler(trapframe_t *tf)
 {
 void
 irq_handler(trapframe_t *tf)
 {
+       // save a per-core reference to the tf
+       set_current_tf(tf);
        //if (core_id())
        //      cprintf("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, core_id());
        // merge this with alltraps?  other than the EOI... or do the same in all traps
 
        //if (core_id())
        //      cprintf("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, core_id());
        // merge this with alltraps?  other than the EOI... or do the same in all traps
 
-       // TODO: do this once we know we are are not returning to the current
-       // context.  doing it now is safe. (HSS)
-       env_push_ancillary_state(current);
-
        extern handler_wrapper_t (RO handler_wrappers)[NUM_HANDLER_WRAPPERS];
 
        // determine the interrupt handler table to use.  for now, pick the global
        extern handler_wrapper_t (RO handler_wrappers)[NUM_HANDLER_WRAPPERS];
 
        // determine the interrupt handler table to use.  for now, pick the global
@@ -346,7 +341,7 @@ page_fault_handler(trapframe_t *tf)
 
        // Destroy the environment that caused the fault.
        cprintf("[%08x] user fault va %08x ip %08x from core %d\n",
 
        // Destroy the environment that caused the fault.
        cprintf("[%08x] user fault va %08x ip %08x from core %d\n",
-               current->env_id, fault_va, tf->tf_eip, core_id());
+               current->pid, fault_va, tf->tf_eip, core_id());
        print_trapframe(tf);
        proc_destroy(current);
 }
        print_trapframe(tf);
        proc_destroy(current);
 }
@@ -361,14 +356,10 @@ void sysenter_init(void)
 /* This is called from sysenter's asm, with the tf on the kernel stack. */
 void sysenter_callwrapper(struct Trapframe *tf)
 {
 /* This is called from sysenter's asm, with the tf on the kernel stack. */
 void sysenter_callwrapper(struct Trapframe *tf)
 {
-       /* If we're vcore0, save the trapframe in the proc's env_tf.  make sure
-        * silly state is sorted (HSS). This applies to any RUNNING_* state. */
-       if (current->vcoremap[0] == core_id()) {
-               current->env_tf = *tf;
-               tf = &current->env_tf;
-       }
-       // Note we pass the tf ptr along, in case syscall needs to block
-       tf->tf_regs.reg_eax = (intreg_t) syscall(current, tf,
+       // save a per-core reference to the tf
+       set_current_tf(tf);
+
+       tf->tf_regs.reg_eax = (intreg_t) syscall(current,
                                                 tf->tf_regs.reg_eax,
                                                 tf->tf_regs.reg_edx,
                                                 tf->tf_regs.reg_ecx,
                                                 tf->tf_regs.reg_eax,
                                                 tf->tf_regs.reg_edx,
                                                 tf->tf_regs.reg_ecx,
@@ -384,31 +375,32 @@ void sysenter_callwrapper(struct Trapframe *tf)
        proc_startcore(current, tf);
 }
 
        proc_startcore(current, tf);
 }
 
+struct kmem_cache *active_msg_cache;
+void active_msg_init(void)
+{
+       active_msg_cache = kmem_cache_create("active_msgs",
+                          sizeof(struct active_message), HW_CACHE_ALIGN, 0, 0, 0);
+}
+
 uint32_t send_active_message(uint32_t dst, amr_t pc,
                              TV(a0t) arg0, TV(a1t) arg1, TV(a2t) arg2)
 {
 uint32_t send_active_message(uint32_t dst, amr_t pc,
                              TV(a0t) arg0, TV(a1t) arg1, TV(a2t) arg2)
 {
-       error_t retval = -EBUSY;
+       active_message_t *a_msg;
        assert(pc);
        assert(pc);
+       // note this will be freed on the destination core
+       a_msg = (active_message_t *CT(1))TC(kmem_cache_alloc(active_msg_cache, 0));
+       a_msg->srcid = core_id();
+       a_msg->pc = pc;
+       a_msg->arg0 = arg0;
+       a_msg->arg1 = arg1;
+       a_msg->arg2 = arg2;
        spin_lock_irqsave(&per_cpu_info[dst].amsg_lock);
        spin_lock_irqsave(&per_cpu_info[dst].amsg_lock);
-       size_t current_amsg = per_cpu_info[dst].amsg_current;
-       // If there's a PC there, then that means it's an outstanding message
-       FOR_CIRC_BUFFER(current_amsg, NUM_ACTIVE_MESSAGES, i) {
-               if (per_cpu_info[dst].active_msgs[i].pc)
-                       continue;
-               per_cpu_info[dst].active_msgs[i].pc = pc;
-               per_cpu_info[dst].active_msgs[i].arg0 = arg0;
-               per_cpu_info[dst].active_msgs[i].arg1 = arg1;
-               per_cpu_info[dst].active_msgs[i].arg2 = arg2;
-               per_cpu_info[dst].active_msgs[i].srcid = core_id();
-               retval = 0;
-               break;
-       }
+       STAILQ_INSERT_TAIL(&per_cpu_info[dst].active_msgs, a_msg, link);
        spin_unlock_irqsave(&per_cpu_info[dst].amsg_lock);
        // since we touched memory the other core will touch (the lock), we don't
        // need an wmb_f()
        spin_unlock_irqsave(&per_cpu_info[dst].amsg_lock);
        // since we touched memory the other core will touch (the lock), we don't
        // need an wmb_f()
-       if (!retval)
-               send_ipi(dst, 0, I_ACTIVE_MSG);
-       return retval;
+       send_ipi(dst, 0, I_ACTIVE_MSG);
+       return 0;
 }
 
 /* Active message handler.  We don't want to block other AMs from coming in, so
 }
 
 /* Active message handler.  We don't want to block other AMs from coming in, so
@@ -417,35 +409,40 @@ uint32_t send_active_message(uint32_t dst, amr_t pc,
  * we already handled the message (or someone is sending IPIs without loading
  * the active message...)
  * Note that all of this happens from interrupt context, and interrupts are
  * we already handled the message (or someone is sending IPIs without loading
  * the active message...)
  * Note that all of this happens from interrupt context, and interrupts are
- * currently disabled for this gate. */
+ * currently disabled for this gate.  Interrupts need to be disabled so that the
+ * self-ipi doesn't preempt the execution of this active message. */
 void __active_message(trapframe_t *tf)
 {
        per_cpu_info_t RO*myinfo = &per_cpu_info[core_id()];
 void __active_message(trapframe_t *tf)
 {
        per_cpu_info_t RO*myinfo = &per_cpu_info[core_id()];
-       active_message_t amsg;
+       active_message_t my_msg, *a_msg;
 
        lapic_send_eoi();
 
        lapic_send_eoi();
-       while (1) { // will break out when we find an empty amsg
+       while (1) { // will break out when there are no more messages
                /* Get the message */
                spin_lock_irqsave(&myinfo->amsg_lock);
                /* Get the message */
                spin_lock_irqsave(&myinfo->amsg_lock);
-               if (myinfo->active_msgs[myinfo->amsg_current].pc) {
-                       amsg = myinfo->active_msgs[myinfo->amsg_current];
-                       myinfo->active_msgs[myinfo->amsg_current].pc = 0;
-                       myinfo->amsg_current = (myinfo->amsg_current + 1) %
-                                              NUM_ACTIVE_MESSAGES;
-               } else { // was no PC in the current active message, meaning we do nothing
+               a_msg = STAILQ_FIRST(&myinfo->active_msgs);
+               /* No messages to execute, so break out, etc. */
+               if (!a_msg) {
                        spin_unlock_irqsave(&myinfo->amsg_lock);
                        return;
                }
                        spin_unlock_irqsave(&myinfo->amsg_lock);
                        return;
                }
+               STAILQ_REMOVE_HEAD(&myinfo->active_msgs, link);
+               spin_unlock_irqsave(&myinfo->amsg_lock);
+               // copy in, and then free, in case we don't return
+               my_msg = *a_msg;
+               kmem_cache_free(active_msg_cache, (void *CT(1))TC(a_msg));
+               assert(my_msg.pc);
                /* In case the function doesn't return (which is common: __startcore,
                 * __death, etc), there is a chance we could lose an amsg.  We can only
                 * have up to two interrupts outstanding, and if we never return, we
                 * never deal with any other amsgs.  This extra IPI hurts performance
                 * but is only necessary if there is another outstanding message in the
                 * buffer, but makes sure we never miss out on an amsg. */
                /* In case the function doesn't return (which is common: __startcore,
                 * __death, etc), there is a chance we could lose an amsg.  We can only
                 * have up to two interrupts outstanding, and if we never return, we
                 * never deal with any other amsgs.  This extra IPI hurts performance
                 * but is only necessary if there is another outstanding message in the
                 * buffer, but makes sure we never miss out on an amsg. */
-               if (myinfo->active_msgs[myinfo->amsg_current].pc)
-                       send_ipi(core_id(), 0, I_ACTIVE_MSG);
+               spin_lock_irqsave(&myinfo->amsg_lock);
+               if (!STAILQ_EMPTY(&myinfo->active_msgs))
+                       send_self_ipi(I_ACTIVE_MSG);
                spin_unlock_irqsave(&myinfo->amsg_lock);
                /* Execute the active message */
                spin_unlock_irqsave(&myinfo->amsg_lock);
                /* Execute the active message */
-               amsg.pc(tf, amsg.srcid, amsg.arg0, amsg.arg1, amsg.arg2);
+               my_msg.pc(tf, my_msg.srcid, my_msg.arg0, my_msg.arg1, my_msg.arg2);
        }
 }
        }
 }