Removes the MCS-PDR lock holder optimization (XCC)
authorBarret Rhoden <brho@cs.berkeley.edu>
Wed, 20 Mar 2013 22:17:50 +0000 (15:17 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 20 Mar 2013 22:24:49 +0000 (15:24 -0700)
Was an optimization for making preemption recovery fast, but was making
the locking common case slow.

Reinstall your kernel headers if you want (it'll still build okay).

kern/include/ros/ucq.h
user/parlib/include/mcs.h
user/parlib/mcs.c

index d1166e4..a125550 100644 (file)
@@ -33,7 +33,7 @@ struct ucq {
        atomic_t                                        cons_idx;               /* cons pg and slot nr */
        bool                                            ucq_ready;              /* ucq is ready to be used */
        /* Userspace lock for modifying the UCQ */
-       void                                            *u_lock[3];             /* sizeof an mcs_pdr_lock */
+       void                                            *u_lock[2];             /* sizeof an mcs_pdr_lock */
 };
 
 /* Struct at the beginning of every page/buffer, tracking consumers and
index e581333..8d6dfe1 100644 (file)
@@ -71,7 +71,6 @@ struct mcs_pdr_qnode
 struct mcs_pdr_lock
 {
        struct mcs_pdr_qnode *lock;
-       struct mcs_pdr_qnode *lock_holder;
        struct mcs_pdr_qnode *vc_qnodes;        /* malloc this at init time */
 };
 
index ea281ba..02746af 100644 (file)
@@ -170,7 +170,6 @@ void mcs_barrier_wait(mcs_barrier_t* b, size_t pid)
 void mcs_pdr_init(struct mcs_pdr_lock *lock)
 {
        lock->lock = 0;
-       lock->lock_holder = 0;
        lock->vc_qnodes = memalign(__alignof(struct mcs_pdr_qnode),
                                   sizeof(struct mcs_pdr_qnode) * max_vcores());
        assert(lock->vc_qnodes);
@@ -218,19 +217,14 @@ void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode)
                /* no need for a wrmb(), since this will only get unlocked after they
                 * read our previous write */
                while (qnode->locked) {
-                       /* Ideally, we know who the lock holder is, and we'll make sure they
-                        * run.  If not, we'll make sure our pred is running, which trickles
-                        * up to the lock holder, if it isn't them. */
-                       if (lock->lock_holder)
-                               __ensure_qnode_runs(lock->lock_holder);
-                       else
-                               __ensure_qnode_runs(predecessor);
+                       /* We don't know who the lock holder is (it hurts performance via
+                        * 'true' sharing to track it)  Instead we'll make sure our pred is
+                        * running, which trickles up to the lock holder. */
+                       __ensure_qnode_runs(predecessor);
                        cpu_relax();
                }
        }
        cmb();  /* just need a cmb, the swap handles the CPU wmb/wrmb() */
-       /* publish ourselves as the lock holder (optimization) */
-       lock->lock_holder = qnode;      /* mbs() handled by the cmb/swap */
 }
 
 /* Using the CAS style unlocks, since the usurper recovery is a real pain in the
@@ -238,8 +232,6 @@ void __mcs_pdr_lock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode)
 void __mcs_pdr_unlock(struct mcs_pdr_lock *lock, struct mcs_pdr_qnode *qnode)
 {
        struct mcs_pdr_qnode *a_tail;
-       /* Clear us from being the lock holder */
-       lock->lock_holder = 0;  /* mbs() are covered by the cmb/cas and the wmb */
        /* Check if someone is already waiting on us to unlock */
        if (qnode->next == 0) {
                cmb();  /* no need for CPU mbs, since there's an atomic_cas() */