Optimizes run_once(_safe) (XCC)
authorBarret Rhoden <brho@cs.berkeley.edu>
Thu, 20 Dec 2012 02:57:52 +0000 (18:57 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Thu, 20 Dec 2012 02:57:52 +0000 (18:57 -0800)
Takes the old run_once_safe and optimizes it for being completed, so we
don't hit an atomic_swap (and a couple branches) each time through.
Given this, I don't mind using it in more places, so the non-safe
version is now the 'racy' version (same with init_once()).

Reinstall your kernel headers.

kern/include/ros/common.h
user/parlib/slab.c
user/parlib/uthread.c
user/parlib/vcore.c
user/pthread/futex.c
user/pthread/pthread.c

index ac12bcd..6670875 100644 (file)
@@ -121,23 +121,27 @@ static inline bool mult_will_overflow_u64(uint64_t a, uint64_t b)
 
 /* Makes sure func is run exactly once.  Can handle concurrent callers, and
  * other callers spin til the func is complete. */
-/* TODO: look in to optimizing this, with the initialized check first */
-#define run_once_safe(func) \
-{\
-       static atomic_t initializing = FALSE; \
-       static bool initialized = FALSE; \
-       if (!atomic_swap(&initializing, TRUE)) { \
-               func; \
-               initialized = TRUE; \
-       } \
-       else { \
-               while(!initialized) \
-                       cpu_relax(); \
-       } \
+#define run_once(func)                                                         \
+{                                                                              \
+       static bool ran_once = FALSE;                                              \
+       static atomic_t is_running = FALSE;                                        \
+       if (!ran_once) {                                                           \
+               if (!atomic_swap(&is_running, TRUE)) {                                 \
+                       /* we won the race and get to run the func */                      \
+                       func;                                                              \
+                       wmb();  /* don't let the ran_once write pass previous writes */    \
+                       ran_once = TRUE;                                                   \
+               } else {                                                               \
+                       /* someone else won, wait til they are done to break out */        \
+                       while (!ran_once)                                                  \
+                               cpu_relax();                                                   \
+                                                                               \
+               }                                                                      \
+       }                                                                          \
 }
 
 /* Unprotected, single-threaded version, makes sure func is run exactly once */
-#define run_once(func)                                                         \
+#define run_once_racy(func)                                                    \
 {                                                                              \
        static bool ran_once = FALSE;                                              \
        if (!ran_once) {                                                           \
@@ -149,7 +153,7 @@ static inline bool mult_will_overflow_u64(uint64_t a, uint64_t b)
 /* Aborts with 'retcmd' if this function has already been called.  Compared to
  * run_once, this is put at the top of a function that can be called from
  * multiple sources but should only execute once. */
-#define init_once(retcmd)                                                      \
+#define init_once_racy(retcmd)                                                 \
 {                                                                              \
        static bool initialized = FALSE;                                           \
        if (initialized) {                                                         \
index 7c20e79..1c4fc99 100644 (file)
@@ -89,7 +89,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size,
                                      void (*ctor)(void *, size_t),
                                      void (*dtor)(void *, size_t))
 {
-       run_once_safe(kmem_cache_init());
+       run_once(kmem_cache_init());
        struct kmem_cache *kc = kmem_cache_alloc(&kmem_cache_cache, 0);
        __kmem_cache_create(kc, name, obj_size, align, flags, ctor, dtor);
        return kc;
index c7bac00..db9f675 100644 (file)
@@ -62,7 +62,7 @@ static void uthread_manage_thread0(struct uthread *uthread)
  * returns, you're in _M mode, still running thread0, on vcore0 */
 void uthread_lib_init(struct uthread *uthread)
 {
-       init_once(return);
+       init_once_racy(return);
        vcore_init();
        uthread_manage_thread0(uthread);
        /* Receive preemption events.  Note that this merely tells the kernel how to
index 0440852..10b9883 100644 (file)
@@ -78,7 +78,7 @@ void vcore_init(void)
 {
        uintptr_t mmap_block;
        /* Note this is racy, but okay.  The first time through, we are _S */
-       init_once(return);
+       init_once_racy(return);
 
        vcore_thread_control_blocks = (void**)calloc(max_vcores(),sizeof(void*));
 
index 381a9ef..58b4131 100644 (file)
@@ -83,7 +83,7 @@ int futex(int *uaddr, int op, int val, const struct timespec *timeout,
   assert(uaddr2 == NULL);
   assert(val3 == 0);
 
-  run_once_safe(futex_init());
+  run_once(futex_init());
   switch(op) {
     case FUTEX_WAIT:
       return futex_wait(uaddr, val);
index 20c0cfc..771e133 100644 (file)
@@ -20,7 +20,6 @@
 struct pthread_queue ready_queue = TAILQ_HEAD_INITIALIZER(ready_queue);
 struct pthread_queue active_queue = TAILQ_HEAD_INITIALIZER(active_queue);
 struct mcs_pdr_lock queue_lock;
-pthread_once_t init_once = PTHREAD_ONCE_INIT;
 int threads_ready = 0;
 int threads_active = 0;
 bool can_adjust_vcores = TRUE;
@@ -328,7 +327,7 @@ void pthread_lib_init(void)
        /* Some testing code might call this more than once (once for a slimmed down
         * pth 2LS, and another from pthread_create().  Also, this is racy, but the
         * first time through we are an SCP. */
-       init_once(return);
+       init_once_racy(return);
        assert(!in_multi_mode());
        mcs_pdr_init(&queue_lock);
        /* Create a pthread_tcb for the main thread */
@@ -407,7 +406,6 @@ void pthread_lib_init(void)
 int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
                    void *(*start_routine)(void *), void *arg)
 {
-       /* Racy, but the first time through we are an SCP */
        run_once(pthread_lib_init());
        /* Create the actual thread */
        struct pthread_tcb *pthread;