Added BCQs to the preempt structs in procdata
authorBarret Rhoden <brho@cs.berkeley.edu>
Fri, 26 Mar 2010 23:43:18 +0000 (16:43 -0700)
committerKevin Klues <klueska@cs.berkeley.edu>
Thu, 3 Nov 2011 00:35:40 +0000 (17:35 -0700)
This involved duplicating the code for atomic operations (though for
now, only CAS was needed, and that might go away).  On sparc, this means
copying all of the spinlock stuff too.

Also, bcq_struct.h is included with the structure and other things, with
the actual work done in bcq.h.  This is because libc includes
bcq_struct, but doesn't actually do the ops, and we want to limit the
amount of crap in the ros interface (like comp_and_swap).

kern/include/ros/bcq.h
kern/include/ros/bcq_struct.h [new file with mode: 0644]
kern/include/ros/notification.h
user/include/i686/atomic.h [new file with mode: 0644]
user/include/sparc/atomic.h [new file with mode: 0644]

index 45402b8..b5b255b 100644 (file)
@@ -9,7 +9,11 @@
 #ifndef ROS_INC_BCQ_H
 #define ROS_INC_BCQ_H
 
-#include <atomic.h>
+#include <ros/common.h>
+#include <ros/bcq_struct.h>
+/* Each arch has some basic atomic ops.  We need comp_and_swap for now. */
+#include <arch/atomic.h>
+#include <string.h>
 
 /* Bounded Concurrent Queues, untrusted consumer
  *
  * Using uint32_t for now, since that's the comp_and_swap we have.  We'll
  * probably get other sizes once we're sure we like the current one.  */
 
+#if 0 // Defined in the included header
+
 struct bcq_header {
        uint32_t prod_idx;              /* next to be produced in */
        uint32_t cons_pub_idx;  /* last completely consumed */
        uint32_t cons_pvt_idx;  /* last a consumer has dibs on */
 };
 
-#define DEFINE_BCQ_TYPES(__name, __elem_t, __num_elems)                        \
-                                                                               \
-/* Wrapper, per element, with the consumption bool */                          \
-struct __name##_bcq_wrap {                                                     \
-       __elem_t elem;                                                             \
-       bool rdy_for_cons;      /* elem is ready for consumption */                    \
-};                                                                             \
-                                                                               \
-/* The actual BC queue */                                                      \
-struct __name##_bcq {                                                          \
-       struct bcq_header hdr;                                                     \
-       struct __name##_bcq_wrap wraps[__num_elems];                               \
-};                                                                             
-                                                                               
+// This is there too:
+#define DEFINE_BCQ_TYPES(__name, __elem_t, __num_elems)
+
+#endif
+
 /* Functions */                                                                
 #define bcq_init(_bcq, _ele_type, _num_elems)                                  \
        memset((_bcq), 0, sizeof( _ele_type ) * (_num_elems))                                 
diff --git a/kern/include/ros/bcq_struct.h b/kern/include/ros/bcq_struct.h
new file mode 100644 (file)
index 0000000..20058f0
--- /dev/null
@@ -0,0 +1,31 @@
+/* Copyright (c) 2010 The Regents of the University of California
+ * Barret Rhoden <brho@cs.berkeley.edu>
+ * See LICENSE for details.
+ *
+ * Struct for the BCQ.  Needs to be in its own file so glibc doesn't try to
+ * include any of the atomics needed for the actual BCQ operations.  */
+
+#ifndef ROS_INC_BCQ_STRUCT_H
+#define ROS_INC_BCQ_STRUCT_H
+
+struct bcq_header {
+       uint32_t prod_idx;              /* next to be produced in */
+       uint32_t cons_pub_idx;  /* last completely consumed */
+       uint32_t cons_pvt_idx;  /* last a consumer has dibs on */
+};
+
+#define DEFINE_BCQ_TYPES(__name, __elem_t, __num_elems)                        \
+                                                                               \
+/* Wrapper, per element, with the consumption bool */                          \
+struct __name##_bcq_wrap {                                                     \
+       __elem_t elem;                                                             \
+       bool rdy_for_cons;      /* elem is ready for consumption */                    \
+};                                                                             \
+                                                                               \
+/* The actual BC queue */                                                      \
+struct __name##_bcq {                                                          \
+       struct bcq_header hdr;                                                     \
+       struct __name##_bcq_wrap wraps[__num_elems];                               \
+};                                                                             
+                                                                               
+#endif /* !ROS_INC_BCQ_STRUCT_H */
index 0491005..3ec0e3e 100644 (file)
@@ -9,8 +9,8 @@
 
 #include <ros/common.h>
 #include <ros/atomic.h>
+#include <ros/bcq_struct.h>
 #include <ros/arch/trapframe.h>
-// TODO: #include some one-way queue macros for the notif_event queue
 
 /* How/If a process wants to be notified about an event */
 struct notif_method {
@@ -56,21 +56,19 @@ struct notif_event {
 
 #define NR_PERCORE_EVENTS 10 // whatever
 
+DEFINE_BCQ_TYPES(notif_evt, struct notif_event, NR_PERCORE_EVENTS);
+
 /* Per-core data about preemptions and notifications */
 struct preempt_data {
        struct user_trapframe   preempt_tf;
        struct ancillary_state  preempt_anc;
        struct user_trapframe   notif_tf;
        void                                    *transition_stack;      /* advertised by the user */
-       // TODO: move to procinfo!
-       uint64_t                                preempt_pending;
        bool                                    notif_enabled;          /* vcore is willing to receive*/
        bool                                    notif_pending;          /* notif k_msg on the way */
        seq_ctr_t                               preempt_tf_valid;
-       uint8_t                                 notif_bmask[(NR_PERCORE_EVENTS - 1) / 8 + 1];
-       struct notif_event              notif_events[NR_PERCORE_EVENTS];
-       unsigned int                    prod_idx;
-       unsigned int                    cons_idx;
+       uint8_t                                 notif_bmask[(MAX_NR_NOTIF - 1) / 8 + 1];
+       struct notif_evt_bcq    notif_evts;
        unsigned int                    event_overflows;
 };
 
diff --git a/user/include/i686/atomic.h b/user/include/i686/atomic.h
new file mode 100644 (file)
index 0000000..5cc5437
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef PARLIB_ATOMIC_H
+#define PARLIB_ATOMIC_H
+
+#include <ros/common.h>
+
+typedef void * RACY atomic_t;
+
+static inline void atomic_init(atomic_t *number, int32_t val);
+static inline int32_t atomic_read(atomic_t *number);
+static inline void atomic_set(atomic_t *number, int32_t val);
+static inline void atomic_inc(atomic_t *number);
+static inline void atomic_dec(atomic_t *number);
+static inline uint32_t atomic_swap(uint32_t *addr, uint32_t val);
+static inline bool atomic_comp_swap(uint32_t *addr, uint32_t exp_val,
+                                    uint32_t new_val);
+static inline void atomic_andb(volatile uint8_t RACY* number, uint8_t mask);
+static inline void atomic_orb(volatile uint8_t RACY* number, uint8_t mask);
+
+/* Inlined functions declared above */
+static inline void atomic_init(atomic_t *number, int32_t val)
+{
+       asm volatile("movl %1,%0" : "=m"(*number) : "r"(val));
+}
+
+static inline int32_t atomic_read(atomic_t *number)
+{
+       int32_t val;
+       asm volatile("movl %1,%0" : "=r"(val) : "m"(*number));
+       return val;
+}
+
+static inline void atomic_set(atomic_t *number, int32_t val)
+{
+       asm volatile("movl %1,%0" : "=m"(*number) : "r"(val));
+}
+
+// need to do this with pointers and deref.  %0 needs to be the memory address
+static inline void atomic_inc(atomic_t *number)
+{
+       asm volatile("lock incl %0" : "=m"(*number) : : "cc");
+}
+
+static inline void atomic_dec(atomic_t *number)
+{
+       // for instance, this doesn't work:
+       //asm volatile("lock decl (%0)" : "=r"(number) : : "cc");
+       asm volatile("lock decl %0" : "=m"(*number) : : "cc");
+}
+
+static inline uint32_t atomic_swap(uint32_t *addr, uint32_t val)
+{
+       // this would work, but its code is bigger, and it's not like the others
+       //asm volatile("xchgl %0,(%2)" : "=r"(val) : "0"(val), "r"(addr) : "memory");
+       asm volatile("xchgl %0,%1" : "=r"(val), "=m"(*addr) : "0"(val), "m"(*addr));
+       return val;
+}
+
+/* reusing exp_val for the bool return */
+static inline bool atomic_comp_swap(uint32_t *addr, uint32_t exp_val,
+                                    uint32_t new_val)
+{
+       asm volatile("lock cmpxchgl %4,%1; sete %%al"
+                    : "=a"(exp_val), "=m"(*addr)
+                    : "m"(*addr), "a"(exp_val), "r"(new_val)
+                    : "cc");
+       return exp_val;
+}
+
+static inline void atomic_andb(volatile uint8_t RACY*number, uint8_t mask)
+{
+       asm volatile("lock andb %1,%0" : "=m"(*number) : "r"(mask) : "cc");
+}
+
+static inline void atomic_orb(volatile uint8_t RACY*number, uint8_t mask)
+{
+       asm volatile("lock orb %1,%0" : "=m"(*number) : "r"(mask) : "cc");
+}
+
+#endif /* !PARLIB_ATOMIC_H */
diff --git a/user/include/sparc/atomic.h b/user/include/sparc/atomic.h
new file mode 100644 (file)
index 0000000..4e01ec6
--- /dev/null
@@ -0,0 +1,136 @@
+#ifndef PARLIB_ATOMIC_H
+#define PARLIB_ATOMIC_H
+
+/* Unlike in x86, we need to include spinlocks in the user atomic ops file.
+ * Since compare and swap isn't truely non-blocking, and we can't disable
+ * interrupts in userspace, there is a slight chance of deadlock. */
+
+#include <ros/common.h>
+#include <ros/arch/membar.h>
+
+typedef struct
+{
+       volatile uint32_t rlock;
+} spinlock_t;
+
+#define SPINLOCK_INITIALIZER {0}
+
+// atomic_t is void*, so we can't accidentally dereference it
+typedef void* atomic_t;
+
+static inline void atomic_init(atomic_t* number, int32_t val);
+static inline int32_t atomic_read(atomic_t* number);
+static inline void atomic_set(atomic_t* number, int32_t val);
+static inline void atomic_add(atomic_t* number, int32_t inc);
+static inline void atomic_inc(atomic_t* number);
+static inline void atomic_dec(atomic_t* number);
+static inline uint32_t atomic_swap(uint32_t* addr, uint32_t val);
+static inline bool atomic_comp_swap(uint32_t *addr, uint32_t exp_val,
+                                    uint32_t new_val);
+static inline uint32_t spin_trylock(spinlock_t*SAFE lock);
+static inline uint32_t spin_locked(spinlock_t*SAFE lock);
+static inline void spin_lock(spinlock_t*SAFE lock);
+static inline void spin_unlock(spinlock_t*SAFE lock);
+
+/* Inlined functions declared above */
+
+static inline void atomic_init(atomic_t* number, int32_t val)
+{
+       val <<= 8;
+       __asm__ __volatile__ ("st %0,[%1]" : : "r"(val), "r"(number) : "memory");
+}
+
+static inline int32_t atomic_read(atomic_t* number)
+{
+       int32_t val;
+       __asm__ __volatile__ ("ld [%1],%0" : "=r"(val) : "r"(number));
+       return val >> 8;
+}
+
+static inline void atomic_add(atomic_t* number, int32_t inc)
+{
+       // this is pretty clever.  the lower 8 bits (i.e byte 3)
+       // of the atomic_t serve as a spinlock.  let's acquire it.
+       { TRUSTEDBLOCK spin_lock((spinlock_t*)number); }
+
+       // compute new counter value.
+       inc += atomic_read(number);
+
+       // set the new counter value.  the lock is cleared (for free)
+       atomic_init(number,inc);
+}
+
+static inline void atomic_set(atomic_t* number, int32_t val)
+{
+       // this works basically the same as atomic_add... but without the add
+       spin_lock((spinlock_t*)number);
+       atomic_init(number,val);
+}
+
+static inline void atomic_inc(atomic_t* number)
+{
+       atomic_add(number,1);
+}
+
+static inline void atomic_dec(atomic_t* number)
+{
+       atomic_add(number,-1);
+}
+
+static inline uint32_t atomic_swap(uint32_t* addr, uint32_t val)
+{
+       __asm__ __volatile__ ("swap [%2],%0" : "=r"(val) : "0"(val),"r"(addr) : "memory");
+       return val;
+}
+
+// TODO: make this better! (no global locks, etc)
+static inline bool atomic_comp_swap(uint32_t *addr, uint32_t exp_val,
+                                    uint32_t new_val)
+{
+       bool retval = 0;
+       uint32_t temp;
+       static spinlock_t cas_lock = SPINLOCK_INITIALIZER;
+
+       if (*addr != exp_val)
+               return 0;
+       spin_lock(&cas_lock);
+       if (*addr == exp_val) {
+               atomic_swap(addr, new_val);
+               retval = 1;
+       }
+       spin_unlock(&cas_lock);
+       return retval;
+}
+
+static inline uint32_t spin_trylock(spinlock_t*SAFE lock)
+{
+       uint32_t reg;
+       __asm__ __volatile__ ("ldstub [%1+3],%0" : "=r"(reg) : "r"(&lock->rlock) : "memory");
+       return reg;
+}
+
+static inline uint32_t spin_locked(spinlock_t*SAFE lock)
+{
+       uint32_t reg;
+       __asm__ __volatile__ ("ldub [%1+3],%0" : "=r"(reg) : "r"(&lock->rlock));
+       return reg;
+}
+
+static inline void spin_lock(spinlock_t*SAFE lock)
+{
+       while(spin_trylock(lock))
+               while(spin_locked(lock));
+}
+
+static inline void spin_unlock(spinlock_t*SAFE lock)
+{
+       wmb();
+       __asm__ __volatile__ ("stub %%g0,[%0+3]" : : "r"(&lock->rlock) : "memory");
+}
+
+static inline void spinlock_init(spinlock_t* lock)
+{
+       lock->rlock = 0;
+}
+
+#endif /* !PARLIB_ATOMIC_H */