smp_call backend removed, other async tweaks
authorBarret Rhoden <brho@cs.berkeley.edu>
Mon, 27 Apr 2009 06:47:37 +0000 (23:47 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Mon, 27 Apr 2009 06:47:37 +0000 (23:47 -0700)
Merged the back and frontend to just one checklist, since they both
needed to be waited on.  Added some other checks to prevent trying to
grab a handler when none are available and will never become available
(like if they are all being waited on and the caller could be the
waiter).  Uses a global counter to track the number of outstanding
calls.  Also has support for commit_checklist to preemptively abort if
it knows it will take a while (locked or not clear).

inc/error.h
kern/atomic.c
kern/atomic.h
kern/smp.c
kern/smp.h
kern/testing.c
kern/trap.c

index 58590ab..75d0778 100644 (file)
@@ -4,6 +4,7 @@
 #define ROS_INC_ERROR_H
 
 typedef enum {
+       E_BUSY          =       -2,
        E_FAIL          =       -1,
        E_SUCCESS       =       0,
 } error_t;
index 4ac337c..f7d9e97 100644 (file)
@@ -1,5 +1,6 @@
 #include <inc/string.h>
 #include <inc/assert.h>
+#include <inc/error.h>
 
 #include <kern/atomic.h>
 #include <kern/apic.h>
@@ -8,6 +9,14 @@
 int commit_checklist_wait(checklist_t* list, checklist_mask_t* mask)
 {
        assert(list->mask.size == mask->size);
+       // abort if the list is locked.  this will protect us from trying to commit
+       // and thus spin on a checklist that we are already waiting on.  it is
+       // still possible to not get the lock, but the holder is on another core.
+       // Or, bail out if we can see the list is already in use.  This check is
+       // just an optimization before we try to use the list for real.
+       if ((checklist_is_locked(list)) || !checklist_is_clear(list))
+               return E_BUSY;
+
        // possession of this lock means you can wait on it and set it
        spin_lock_irqsave(&list->lock);
        // wait til the list is available.  could have some adaptive thing here
@@ -56,10 +65,20 @@ int commit_checklist_nowait(checklist_t* list, checklist_mask_t* mask)
 // Assumed we held the lock if we ever call this
 int waiton_checklist(checklist_t* list)
 {
+       extern uint32_t outstanding_calls;
        // can consider breakout out early, like above, and erroring out
        while (!checklist_is_clear(list))
                cpu_relax();
        spin_unlock_irqsave(&list->lock);
+       // global counter of wrappers either waited on or being contended for.
+       atomic_dec(&outstanding_calls);
+       return 0;
+}
+
+// like waiton, but don't bother waiting either
+int release_checklist(checklist_t* list)
+{
+       spin_unlock_irqsave(&list->lock);
        return 0;
 }
 
index 82f95bb..36d7ed6 100644 (file)
@@ -46,6 +46,7 @@ typedef struct checklist {
 int commit_checklist_wait(checklist_t* list, checklist_mask_t* mask);
 int commit_checklist_nowait(checklist_t* list, checklist_mask_t* mask);
 int waiton_checklist(checklist_t* list);
+int release_checklist(checklist_t* list);
 int checklist_is_locked(checklist_t* list);
 int checklist_is_clear(checklist_t* list);
 void reset_checklist(checklist_t* list);
index b718406..0b02646 100644 (file)
@@ -5,6 +5,7 @@
 #include <inc/stdio.h>
 #include <inc/string.h>
 #include <inc/assert.h>
+#include <inc/error.h>
 #include <inc/x86.h>
 
 #include <kern/smp.h>
@@ -22,18 +23,17 @@ barrier_t generic_barrier;
 // checklists to protect the global interrupt_handlers for 0xf0, f1, f2, f3, f4
 // need to be global, since there is no function that will always exist for them
 handler_wrapper_t             handler_wrappers[NUM_HANDLER_WRAPPERS];
+// tracks number of global waits on smp_calls, must be <= NUM_HANDLER_WRAPPERS
+uint32_t outstanding_calls = 0; 
 
 #define DECLARE_HANDLER_CHECKLISTS(vector)                          \
-       INIT_CHECKLIST(f##vector##_front_cpu_list, MAX_NUM_CPUS);   \
-       INIT_CHECKLIST(f##vector##_back_cpu_list, MAX_NUM_CPUS);
-
-#define INIT_HANDLER_WRAPPER(v)                                               \
-{                                                                         \
-       handler_wrappers[(v)].vector = 0xf##v;                                    \
-       handler_wrappers[(v)].front_cpu_list = &f##v##_front_cpu_list;            \
-       handler_wrappers[(v)].front_cpu_list->mask.size = num_cpus;               \
-       handler_wrappers[(v)].back_cpu_list = &f##v##_back_cpu_list;              \
-       handler_wrappers[(v)].back_cpu_list->mask.size = num_cpus;                \
+       INIT_CHECKLIST(f##vector##_cpu_list, MAX_NUM_CPUS);
+
+#define INIT_HANDLER_WRAPPER(v)                                     \
+{                                                                   \
+       handler_wrappers[(v)].vector = 0xf##v;                          \
+       handler_wrappers[(v)].cpu_list = &f##v##_cpu_list;              \
+       handler_wrappers[(v)].cpu_list->mask.size = num_cpus;           \
 }
 
 DECLARE_HANDLER_CHECKLISTS(0);
@@ -210,7 +210,7 @@ uint32_t smp_main(void)
 
 // this idles a core, sometimes we need to call it directly.  never returns.
 // the pause is somewhat of a hack, since kvm isn't halting.  not sure what the
-// deal is wit that.
+// deal is with that.
 void smp_idle(void)
 {
        asm volatile("1: hlt; pause; jmp 1b;");
@@ -221,9 +221,18 @@ static int smp_call_function(uint8_t type, uint8_t dest, isr_t handler,
 {
        extern isr_t interrupt_handlers[];
        int8_t state = 0;
-       uint32_t vector;
+       uint32_t wrapper_num;
        handler_wrapper_t* wrapper;
 
+       // prevents us from ever having more than NUM_HANDLER_WRAPPERS callers in
+       // the process of competing for vectors.  not decremented until both after
+       // the while(1) loop and after it's been waited on.
+       atomic_inc(&outstanding_calls);
+       if (outstanding_calls > NUM_HANDLER_WRAPPERS) {
+               atomic_dec(&outstanding_calls);
+               return E_BUSY;
+       }
+       
        // assumes our cores are numbered in order
        if ((type == 4) && (dest >= num_cpus))
                panic("Destination CPU does not exist!");
@@ -257,50 +266,40 @@ static int smp_call_function(uint8_t type, uint8_t dest, isr_t handler,
                        panic("Invalid type for cross-core function call!");
        }
 
-       // Find an available vector.  Starts with this core's vector (mod the
-       // number of wrappers).  Walk through on conflict
+       // Find an available vector/wrapper.  Starts with this core's id (mod the
+       // number of wrappers).  Walk through on conflict.
        // Commit returns an error if it wanted to give up for some reason,
        // like taking too long to acquire the lock or clear the mask, at which
        // point, we try the next one.
        // When we are done, wrapper points to the one we finally got.
-       // Note we need to make sure the backend is unused too, even if we don't
-       // want to wait.  Don't want our backend responses clobbering another
-       // waiters results.  Even if we don't plan on waiting, we need to make sure
-       // the list is done, so that old results don't trick future viewers into
-       // thinking the list is clear.  The main idea is that if a backend is
-       // unlocked, then no one is waiting on it.  If it is clear, then all
-       // previous callees have *finished* the entire handler function.  We don't
-       // want a weird sequence of events to be able to bypass that.
-       vector = lapic_get_id() % NUM_HANDLER_WRAPPERS;
+       // this wrapper_num trick doesn't work as well if you send a bunch in a row
+       // and wait, since you always check your main one (which is currently busy).
+       wrapper_num = lapic_get_id() % NUM_HANDLER_WRAPPERS;
        while(1) {
-               wrapper = &handler_wrappers[vector];
-               if (!commit_checklist_nowait(wrapper->front_cpu_list, &cpu_mask)) {
-                       // if the backend is free (not locked and clear), then we can break.
-                       if ((!checklist_is_locked(wrapper->back_cpu_list)) && 
-                          (checklist_is_clear(wrapper->back_cpu_list)))
-                               break;
-                       else
-                               reset_checklist(wrapper->front_cpu_list);       
-               }
-               vector = (vector + 1) % NUM_HANDLER_WRAPPERS;
+               wrapper = &handler_wrappers[wrapper_num];
+               if (!commit_checklist_wait(wrapper->cpu_list, &cpu_mask))
+                       break;
+               wrapper_num = (wrapper_num + 1) % NUM_HANDLER_WRAPPERS;
+               /*
+               uint32_t count = 0;
+               // instead of deadlock, smp_call can fail with this.  makes it harder
+               // to use (have to check your return value).  consider putting a delay
+               // here too (like if wrapper_num == initial_wrapper_num)
+               if (count++ > NUM_HANDLER_WRAPPERS * 1000) // note 1000 isn't enough...
+                       return E_BUSY;
+               */
        }
 
        // Wanting to wait is expressed by having a non-NULL handler_wrapper_t**
-       // passed in.
-       // Should be no contention here, since this one is protected by the mask
-       // being present in the frontend.  the only way someone would try to commit
-       // on this would be if it holds the front.
-       // Set this mask regardless if we want to wait or not.  the recipients will
-       // all down it anyway, and it protects any future users of this handler from
-       // waiting on a backend that is actually in use.  Note that above we did not
-       // allow the use of a wrapper if the backend still isn't clear - even if we
-       // won the front end.
-       if (wait_wrapper) {
-               commit_checklist_wait(wrapper->back_cpu_list, &cpu_mask);
-               // pass out our reference to wrapper, so waiting can be done later.
+       // passed in.  Pass out our reference to wrapper, to wait later.
+       // If we don't want to wait, release the checklist (though it is still not
+       // clear, so it can't be used til everyone checks in).
+       if (wait_wrapper)
                *wait_wrapper = wrapper;
-       } else
-               commit_checklist_nowait(wrapper->back_cpu_list, &cpu_mask);
+       else {
+               release_checklist(wrapper->cpu_list);
+               atomic_dec(&outstanding_calls);
+       }
 
        // now register our handler to run
        register_interrupt_handler(interrupt_handlers, wrapper->vector, handler);
@@ -355,7 +354,12 @@ int smp_call_function_single(uint8_t dest, isr_t handler,
 // this to do the actual waiting
 int smp_call_wait(handler_wrapper_t* wrapper)
 {
-       waiton_checklist(wrapper->back_cpu_list);
-       return 0;
+       if (wrapper) {
+               waiton_checklist(wrapper->cpu_list);
+               return 0;
+       } else {
+               warn("Attempting to wait on null wrapper!  Check your return values!");
+               return E_FAIL;
+       }
 }
 
index bf3d379..c953580 100644 (file)
 #endif
 
 // be careful changing this, esp if you go over 16
-#define NUM_HANDLER_WRAPPERS  5
+#define NUM_HANDLER_WRAPPERS           5
 
 typedef struct HandlerWrapper {
-       checklist_t* front_cpu_list;
-       checklist_t* back_cpu_list;
+       checklist_t* cpu_list;
        uint8_t vector;
 } handler_wrapper_t;
 
index 05d14be..e0f1375 100644 (file)
@@ -299,22 +299,22 @@ void test_null_handler(struct Trapframe *tf)
 
 void test_smp_call_functions(void)
 {
-       handler_wrapper_t *waiter, *waiter2;
+       handler_wrapper_t *waiter0, *waiter1, *waiter2, *waiter3, *waiter4, *waiter5;
        uint8_t me = lapic_get_id();
        printk("\nCore %d: SMP Call Self (nowait):\n", me);
        printk("---------------------\n");
        smp_call_function_self(test_hello_world_handler, 0);
        printk("\nCore %d: SMP Call Self (wait):\n", me);
        printk("---------------------\n");
-       smp_call_function_self(test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
+       smp_call_function_self(test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
        printk("\nCore %d: SMP Call All (nowait):\n", me);
        printk("---------------------\n");
        smp_call_function_all(test_hello_world_handler, 0);
        printk("\nCore %d: SMP Call All (wait):\n", me);
        printk("---------------------\n");
-       smp_call_function_all(test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
+       smp_call_function_all(test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
        printk("\nCore %d: SMP Call All-Else Individually, in order (nowait):\n", me);
        printk("---------------------\n");
        smp_call_function_single(1, test_hello_world_handler, 0);
@@ -326,24 +326,24 @@ void test_smp_call_functions(void)
        smp_call_function_single(7, test_hello_world_handler, 0);
        printk("\nCore %d: SMP Call Self (wait):\n", me);
        printk("---------------------\n");
-       smp_call_function_self(test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
+       smp_call_function_self(test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
        printk("\nCore %d: SMP Call All-Else Individually, in order (wait):\n", me);
        printk("---------------------\n");
-       smp_call_function_single(1, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(2, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(3, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(4, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(5, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(6, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
-       smp_call_function_single(7, test_hello_world_handler, &waiter);
-       smp_call_wait(waiter);
+       smp_call_function_single(1, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(2, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(3, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(4, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(5, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(6, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
+       smp_call_function_single(7, test_hello_world_handler, &waiter0);
+       smp_call_wait(waiter0);
        printk("\nTesting to see if any IPI-functions are dropped when not waiting:\n");
        printk("A: %d, B: %d, C: %d (should be 0,0,0)\n", a, b, c);
        smp_call_function_all(test_A_incrementer_handler, 0);
@@ -365,15 +365,45 @@ void test_smp_call_functions(void)
        // wait, so we're sure the others finish before printing.
        // without this, we could (and did) get 19,18,19, since the B_inc
        // handler didn't finish yet
-       smp_call_function_self(test_null_handler, &waiter);
-       smp_call_wait(waiter);
-       printk("A: %d, B: %d, C: %d (should be 19,19,19)\n", a, b, c);
-       printk("Attempting to deadlock by smp_calling on a current wait: ");
-       smp_call_function_self(test_null_handler, &waiter);
+       smp_call_function_self(test_null_handler, &waiter0);
+       // need to grab all 5 handlers (max), since the code moves to the next free.
+       smp_call_function_self(test_null_handler, &waiter1);
        smp_call_function_self(test_null_handler, &waiter2);
-       smp_call_wait(waiter);
+       smp_call_function_self(test_null_handler, &waiter3);
+       smp_call_function_self(test_null_handler, &waiter4);
+       smp_call_wait(waiter0);
+       smp_call_wait(waiter1);
+       smp_call_wait(waiter2);
+       smp_call_wait(waiter3);
+       smp_call_wait(waiter4);
+       printk("A: %d, B: %d, C: %d (should be 19,19,19)\n", a, b, c);
+       printk("Attempting to deadlock by smp_calling with an outstanding wait:\n");
+       smp_call_function_self(test_null_handler, &waiter0);
+       smp_call_function_self(test_null_handler, &waiter1);
+       smp_call_wait(waiter0);
+       smp_call_wait(waiter1);
+       printk("\tMade it through!\n");
+       printk("Attempting to deadlock by smp_calling more than are available:\n");
+       if (smp_call_function_self(test_null_handler, &waiter0))
+               printk("\tInsufficient handlers to call function (0)\n");
+       if (smp_call_function_self(test_null_handler, &waiter1))
+               printk("\tInsufficient handlers to call function (1)\n");
+       if (smp_call_function_self(test_null_handler, &waiter2))
+               printk("\tInsufficient handlers to call function (2)\n");
+       if (smp_call_function_self(test_null_handler, &waiter3))
+               printk("\tInsufficient handlers to call function (3)\n");
+       if (smp_call_function_self(test_null_handler, &waiter4))
+               printk("\tInsufficient handlers to call function (4)\n");
+       if (smp_call_function_self(test_null_handler, &waiter5))
+               printk("\tInsufficient handlers to call function (5)\n");
+       smp_call_wait(waiter0);
+       smp_call_wait(waiter1);
        smp_call_wait(waiter2);
-       printk("Made it through!\n");
+       smp_call_wait(waiter3);
+       smp_call_wait(waiter4);
+       smp_call_wait(waiter5);
+       printk("\tMade it through!\n");
+
        printk("Done\n");
 }
 
index a9893fa..21b9f44 100644 (file)
@@ -234,19 +234,11 @@ void
        // determine the interrupt handler table to use.  for now, pick the global
        isr_t* handler_table = interrupt_handlers;
 
-       // if we're a general purpose IPI function call, store the function we
-       // will call, down the front, exec the function, and down the backend
-       if ((0xf0 <= tf->tf_trapno) && (tf->tf_trapno < 0xf0+NUM_HANDLER_WRAPPERS)) {
-               handler = handler_table[tf->tf_trapno];
-               down_checklist(handler_wrappers[tf->tf_trapno & 0x0f].front_cpu_list);
-               if (handler) // make sure we weren't called on a null ptr
-                       handler(tf);
-               else
-                       warn("Sent a general purpose IPI with no handler");
-               down_checklist(handler_wrappers[tf->tf_trapno & 0x0f].back_cpu_list);
-       } else // regular IRQ
-               if (handler_table[tf->tf_trapno] != 0)
-                       handler_table[tf->tf_trapno](tf);
+       if (handler_table[tf->tf_trapno] != 0)
+               handler_table[tf->tf_trapno](tf);
+       // if we're a general purpose IPI function call, down the cpu_list
+       if ((0xf0 <= tf->tf_trapno) && (tf->tf_trapno < 0xf0 +NUM_HANDLER_WRAPPERS))
+               down_checklist(handler_wrappers[tf->tf_trapno & 0x0f].cpu_list);
 
        // Send EOI.  might want to do this in assembly, and possibly earlier
        // This is set up to work with an old PIC for now