seq counters to protect vcoremap changes
authorBarret Rhoden <brho@cs.berkeley.edu>
Thu, 18 Mar 2010 00:33:15 +0000 (17:33 -0700)
committerKevin Klues <klueska@cs.berkeley.edu>
Thu, 3 Nov 2011 00:35:39 +0000 (17:35 -0700)
The kernel uses the seq counter when updating vcore-pcore mappings and
the number of vcores.  Userspace may need to check the counter when
trying to perform certain operations.

Also, this introduces a reference seq lock, that is an example of how to
use it.  It relies on a spinlock, which isn't that flexible.

kern/include/atomic.h
kern/include/ros/atomic.h [new file with mode: 0644]
kern/include/ros/notification.h
kern/include/ros/procinfo.h
kern/src/process.c
kern/src/resource.c

index 3786e89..1069b50 100644 (file)
@@ -2,9 +2,13 @@
 #define ROS_KERN_ATOMIC_H
 
 #include <ros/common.h>
+#include <ros/atomic.h>
 #include <arch/mmu.h>
 #include <arch/atomic.h>
 #include <arch/arch.h>
+#include <assert.h>
+
+#define SEQLOCK_DEBUG
 
 static inline void
 (SLOCK(0) spin_lock_irqsave)(spinlock_t RACY*SAFE lock);
@@ -12,6 +16,20 @@ static inline void
 (SUNLOCK(0) spin_unlock_irqsave)(spinlock_t RACY*SAFE lock);
 static inline bool spin_lock_irq_enabled(spinlock_t *SAFE lock);
 
+/* An example seq lock, built from the counter.  I don't particularly like this,
+ * since it forces you to use a specific locking type.  */
+typedef struct seq_lock {
+       spinlock_t                      w_lock;
+       seq_ctr_t                       r_ctr;
+} seqlock_t;
+
+static inline void __seq_start_write(seq_ctr_t *seq_ctr);
+static inline void __seq_end_write(seq_ctr_t *seq_ctr);
+static inline void write_seqlock(seqlock_t *lock);
+static inline void write_sequnlock(seqlock_t *lock);
+static inline seq_ctr_t read_seqbegin(seqlock_t *lock);
+static inline bool read_seqretry(seqlock_t *lock, seq_ctr_t ctr);
+
 /*********************** Checklist stuff **********************/
 typedef struct checklist_mask {
        // only need an uint8_t, but we need the bits[] to be word aligned
@@ -103,4 +121,49 @@ static inline bool spin_lock_irq_enabled(spinlock_t *SAFE lock)
        return lock->rlock & SPINLOCK_IRQ_EN;
 }
 
+/* Note, the seq_ctr is not a full seq lock - just the counter guts.  Write
+ * access can be controlled by another lock (like the proc-lock).  start_ and
+ * end_write are the writer's responsibility to signal the readers of a
+ * concurrent write. */
+static inline void __seq_start_write(seq_ctr_t *seq_ctr)
+{
+#ifdef SEQLOCK_DEBUG
+       assert(*seq_ctr % 2 == 0);
+#endif
+       (*seq_ctr)++;
+}
+
+static inline void __seq_end_write(seq_ctr_t *seq_ctr)
+{
+#ifdef SEQLOCK_DEBUG
+       assert(*seq_ctr % 2 == 1);
+#endif
+       (*seq_ctr)++;
+}
+
+/* Untested reference implementation of a seq lock.  As mentioned above, we
+ * might need a variety of these (for instance, this doesn't do an irqsave).  Or
+ * there may be other invariants that we need the lock to protect. */
+static inline void write_seqlock(seqlock_t *lock)
+{
+       spin_lock(&lock->w_lock);
+       __seq_start_write(&lock->r_ctr);
+}
+
+static inline void write_sequnlock(seqlock_t *lock)
+{
+       __seq_end_write(&lock->r_ctr);
+       spin_unlock(&lock->w_lock);
+}
+
+static inline seq_ctr_t read_seqbegin(seqlock_t *lock)
+{
+       return lock->r_ctr;
+}
+
+static inline bool read_seqretry(seqlock_t *lock, seq_ctr_t ctr)
+{
+       return seqctr_retry(lock->r_ctr, ctr);
+}
+
 #endif /* !ROS_KERN_ATOMIC_H */
diff --git a/kern/include/ros/atomic.h b/kern/include/ros/atomic.h
new file mode 100644 (file)
index 0000000..d471cc2
--- /dev/null
@@ -0,0 +1,40 @@
+/* Copyright (c) 2010 The Regents of the University of California
+ * Barret Rhoden <brho@cs.berkeley.edu>
+ * See LICENSE for details.
+ *
+ * Locking and atomics that are part of the kernel interface. */
+
+#ifndef ROS_INC_ATOMIC_H
+#define ROS_INC_ATOMIC_H
+
+#include <ros/common.h>
+
+/* The seq counters are used by userspace to see if the kernel is updating
+ * something or if something is valid, such as the vcore->pcore mapping.  The
+ * way a reader can tell nothing has changed is to read the counter before and
+ * after.  If the value has changed, the reader needs to re-read.  If the value
+ * is odd, a write is in progress or it is otherwise invalid/locked. */
+
+typedef uint8_t seq_ctr_t;
+#define SEQCTR_INITIALIZER 0
+
+static inline bool seq_is_locked(seq_ctr_t seq_ctr);
+static inline bool seqctr_retry(seq_ctr_t old_ctr, seq_ctr_t new_ctr);
+
+/* Basic helpers for readers.  Ex:
+ * do {
+ *             seq_ctr_t seq = kernel_maintained_seq_ctr
+ *             read_data_whatever();
+ * } while (seqctr_retry(seq, kernel_maintained_seq_ctr);
+ */
+static inline bool seq_is_locked(seq_ctr_t seq_ctr)
+{
+       return seq_ctr % 2;
+}
+
+static inline bool seqctr_retry(seq_ctr_t old_ctr, seq_ctr_t new_ctr)
+{
+       return (seq_is_locked(old_ctr)) || (old_ctr != new_ctr);        
+}
+
+#endif /* !ROS_INC_ATOMIC_H */
index 82bd3da..282dfed 100644 (file)
@@ -8,13 +8,9 @@
 #define ROS_INC_NOTIFICATION_H
 
 #include <ros/common.h>
+#include <ros/atomic.h>
 #include <ros/arch/trapframe.h>
 // TODO: #include some one-way queue macros for the notif_event queue
-// TODO: move me to an atomic header, and give me some support functions.
-#ifndef __TMP_SEQ_CTR
-#define __TMP_SEQ_CTR
-typedef uint8_t seq_ctr_t;
-#endif
 
 /* How/If a process wants to be notified about an event */
 struct notif_method {
index 30d9890..f901361 100644 (file)
@@ -5,16 +5,12 @@
 
 #include <ros/memlayout.h>
 #include <ros/common.h>
+#include <ros/atomic.h>
 #include <ros/arch/arch.h>
 
 #define PROCINFO_MAX_ARGP 32
 #define PROCINFO_ARGBUF_SIZE 3072
 
-// TODO: move me to an atomic header, and give me some support functions.
-#ifndef __TMP_SEQ_CTR
-#define __TMP_SEQ_CTR
-typedef uint8_t seq_ctr_t;
-#endif
 
 /* Not necessary to expose all of this, but it doesn't hurt, and is convenient
  * for the kernel. */
@@ -44,7 +40,7 @@ typedef struct procinfo {
        struct vcore            vcoremap[MAX_NUM_CPUS];
        uint32_t                        num_vcores;
        struct pcore            pcoremap[MAX_NUM_CPUS];
-       seq_ctr_t                       coremap_edit;
+       seq_ctr_t                       coremap_seqctr;
 } procinfo_t;
 #define PROCINFO_NUM_PAGES  ((sizeof(procinfo_t)-1)/PGSIZE + 1)        
 
index 185ad06..c1f24ed 100644 (file)
@@ -204,6 +204,7 @@ proc_init_procinfo(struct proc* p)
        memset(&p->procinfo->vcoremap, 0, sizeof(p->procinfo->vcoremap));
        memset(&p->procinfo->pcoremap, 0, sizeof(p->procinfo->pcoremap));
        p->procinfo->num_vcores = 0;
+       p->procinfo->coremap_seqctr = SEQCTR_INITIALIZER;
        // TODO: change these too
        p->procinfo->pid = p->pid;
        p->procinfo->ppid = p->ppid;
@@ -385,9 +386,10 @@ void proc_run(struct proc *p)
                         * only in RUNNING_S.  can use the vcoremap, which makes death easy.
                         * Also, this is the signal used in trap.c to know to save the tf in
                         * env_tf. */
-                       // TODO: (VSEQ) signal these vcore changes
+                       __seq_start_write(&p->procinfo->coremap_seqctr);
                        p->procinfo->num_vcores = 0;
                        __map_vcore(p, 0, core_id()); // sort of.  this needs work.
+                       __seq_end_write(&p->procinfo->coremap_seqctr);
                        spin_unlock_irqsave(&p->proc_lock);
                        /* Transferring our reference to startcore, where p will become
                         * current.  If it already is, decref in advance.  This is similar
@@ -588,8 +590,10 @@ void proc_destroy(struct proc *p)
                        #endif
                        send_active_message(p->procinfo->vcoremap[0].pcoreid, __death,
                                           (void *SNT)0, (void *SNT)0, (void *SNT)0);
-                       // TODO: (VSEQ) signal these vcore changes
+                       __seq_start_write(&p->procinfo->coremap_seqctr);
+                       // TODO: might need to sort num_vcores too later (VC#)
                        __unmap_vcore(p, 0);
+                       __seq_end_write(&p->procinfo->coremap_seqctr);
                        #if 0
                        /* right now, RUNNING_S only runs on a mgmt core (0), not cores
                         * managed by the idlecoremap.  so don't do this yet. */
@@ -700,11 +704,12 @@ void proc_yield(struct proc *SAFE p)
                        schedule_proc(p);
                        break;
                case (PROC_RUNNING_M):
-                       // TODO: (VSEQ) signal these vcore changes
+                       __seq_start_write(&p->procinfo->coremap_seqctr);
                        // give up core
                        __unmap_vcore(p, get_vcoreid(p, core_id()));
                        p->resources[RES_CORES].amt_granted = --(p->procinfo->num_vcores);
                        p->resources[RES_CORES].amt_wanted = p->procinfo->num_vcores;
+                       __seq_end_write(&p->procinfo->coremap_seqctr);
                        // add to idle list
                        put_idle_core(core_id());
                        // last vcore?  then we really want 1, and to yield the gang
@@ -792,8 +797,8 @@ bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
                                for (int i = 0; i < p->procinfo->num_vcores; i++)
                                        assert(p->procinfo->vcoremap[i].valid);
                        }
-                       // TODO: (VSEQ) signal these vcore changes
                        // add new items to the vcoremap
+                       __seq_start_write(&p->procinfo->coremap_seqctr);
                        for (int i = 0; i < num; i++) {
                                // find the next free slot, which should be the next one
                                free_vcoreid = get_free_vcoreid(p, free_vcoreid);
@@ -802,13 +807,14 @@ bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
                                __map_vcore(p, free_vcoreid, pcorelist[i]);
                                p->procinfo->num_vcores++;
                        }
+                       __seq_end_write(&p->procinfo->coremap_seqctr);
                        break;
                case (PROC_RUNNING_M):
                        /* Up the refcnt, since num cores are going to start using this
                         * process and have it loaded in their 'current'. */
                        // TODO: (REF) use proc_incref once we have atomics
                        p->env_refcnt += num;
-                       // TODO: (VSEQ) signal these vcore changes
+                       __seq_start_write(&p->procinfo->coremap_seqctr);
                        for (int i = 0; i < num; i++) {
                                free_vcoreid = get_free_vcoreid(p, free_vcoreid);
                                //todo
@@ -822,6 +828,7 @@ bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
                                if (pcorelist[i] == core_id())
                                        self_ipi_pending = TRUE;
                        }
+                       __seq_end_write(&p->procinfo->coremap_seqctr);
                        break;
                default:
                        panic("Weird state(%s) in %s()", procstate2str(p->state),
@@ -874,7 +881,7 @@ bool __proc_take_cores(struct proc *SAFE p, uint32_t *pcorelist,
        assert((num <= p->procinfo->num_vcores) &&
               (num_idlecores + num <= num_cpus));
        spin_unlock(&idle_lock);
-       // TODO: (VSEQ) signal these vcore changes
+       __seq_start_write(&p->procinfo->coremap_seqctr);
        for (int i = 0; i < num; i++) {
                vcoreid = get_vcoreid(p, pcorelist[i]);
                pcoreid = p->procinfo->vcoremap[vcoreid].pcoreid;
@@ -889,6 +896,7 @@ bool __proc_take_cores(struct proc *SAFE p, uint32_t *pcorelist,
                put_idle_core(pcoreid);
        }
        p->procinfo->num_vcores -= num;
+       __seq_end_write(&p->procinfo->coremap_seqctr);
        p->resources[RES_CORES].amt_granted -= num;
        return self_ipi_pending;
 }
@@ -918,7 +926,7 @@ bool __proc_take_allcores(struct proc *SAFE p, amr_t message,
        spin_lock(&idle_lock);
        assert(num_idlecores + p->procinfo->num_vcores <= num_cpus); // sanity
        spin_unlock(&idle_lock);
-       // TODO: (VSEQ) signal these vcore changes
+       __seq_start_write(&p->procinfo->coremap_seqctr);
        for (int i = 0; i < p->procinfo->num_vcores; i++) {
                // find next active vcore
                active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
@@ -933,6 +941,7 @@ bool __proc_take_allcores(struct proc *SAFE p, amr_t message,
                put_idle_core(pcoreid);
        }
        p->procinfo->num_vcores = 0;
+       __seq_end_write(&p->procinfo->coremap_seqctr);
        p->resources[RES_CORES].amt_granted = 0;
        return self_ipi_pending;
 }
index f4e1aab..ad8f97c 100644 (file)
@@ -112,8 +112,10 @@ ssize_t core_request(struct proc *p)
                                 * syscall). */
                                /* this process no longer runs on its old location (which is
                                 * this core, for now, since we don't handle async calls) */
-                               // TODO: (VSEQ) signal these vcore changes
+                               __seq_start_write(&p->procinfo->coremap_seqctr);
+                               // TODO: (VC#) might need to adjust num_vcores
                                __unmap_vcore(p, 0);
+                               __seq_end_write(&p->procinfo->coremap_seqctr);
                                // will need to give up this core / idle later (sync)
                                need_to_idle = TRUE;
                                // change to runnable_m (it's TF is already saved)