akaros/user/pthread/futex.c
<<
>>
Prefs
   1#include <parlib/common.h>
   2#include <futex.h>
   3#include <sys/queue.h>
   4#include <parlib/uthread.h>
   5#include <parlib/parlib.h>
   6#include <parlib/assert.h>
   7#include <parlib/stdio.h>
   8#include <errno.h>
   9#include <parlib/slab.h>
  10#include <parlib/mcs.h>
  11#include <parlib/alarm.h>
  12
  13struct futex_element {
  14        TAILQ_ENTRY(futex_element) link;
  15        int *uaddr;
  16        bool on_list;
  17        bool waker_using;
  18        uth_cond_var_t cv;
  19};
  20TAILQ_HEAD(futex_queue, futex_element);
  21
  22struct futex_data {
  23        struct mcs_pdr_lock lock;
  24        struct futex_queue queue;
  25};
  26static struct futex_data __futex;
  27
  28static inline void futex_init(void *arg)
  29{
  30        mcs_pdr_init(&__futex.lock);
  31        TAILQ_INIT(&__futex.queue);
  32}
  33
  34static inline int futex_wait(int *uaddr, int val,
  35                             const struct timespec *abs_timeout)
  36{
  37        struct futex_element e[1];
  38        bool timed_out;
  39
  40        mcs_pdr_lock(&__futex.lock);
  41        if (*uaddr != val) {
  42                mcs_pdr_unlock(&__futex.lock);
  43                return 0;
  44        }
  45        e->uaddr = uaddr;
  46        uth_cond_var_init(&e->cv);
  47        e->waker_using = false;
  48        e->on_list = true;
  49        TAILQ_INSERT_TAIL(&__futex.queue, e, link);
  50        /* Lock switch.  Any waker will grab the global lock, then grab ours.
  51         * We're downgrading to the CV lock, which still protects us from
  52         * missing the signal (which is someone calling Wake after changing
  53         * *uaddr).  The CV code will atomically block (with timeout) and unlock
  54         * the CV lock.
  55         *
  56         * Ordering is __futex.lock -> CV lock, but you can have the inner lock
  57         * without holding the outer lock. */
  58        uth_cond_var_lock(&e->cv);
  59        mcs_pdr_unlock(&__futex.lock);
  60
  61        timed_out = !uth_cond_var_timed_wait(&e->cv, NULL, abs_timeout);
  62        /* CV wait returns with the lock held, which is unneccessary for
  63         * futexes.  We could use this cv lock and maybe a trylock on the futex
  64         * to sync with futex_wake, instead of the current synchronization
  65         * techniques with the bools. */
  66        uth_cond_var_unlock(&e->cv);
  67
  68        /* In the common case, the waker woke us and already cleared on_list,
  69         * and we'd rather not grab the __futex lock again.  Note the outer
  70         * on_list check is an optimization, and we need the lock to be sure.
  71         * Also note the waker sets waker_using before on_list, so if we happen
  72         * to see !on_list (while the waker is mucking with the list), we'll see
  73         * waker_using and spin below. */
  74        if (e->on_list) {
  75                mcs_pdr_lock(&__futex.lock);
  76                if (e->on_list)
  77                        TAILQ_REMOVE(&__futex.queue, e, link);
  78                mcs_pdr_unlock(&__futex.lock);
  79        }
  80        rmb();  /* read on_list before waker_using */
  81        /* The waker might have yanked us and is about to kick the CV.  Need to
  82         * wait til they are done before freeing e. */
  83        while (e->waker_using)
  84                cpu_relax_any();
  85
  86        if (timed_out) {
  87                errno = ETIMEDOUT;
  88                return -1;
  89        }
  90        return 0;
  91}
  92
  93static inline int futex_wake(int *uaddr, int count)
  94{
  95        int max = count;
  96        struct futex_element *e, *temp;
  97        struct futex_queue q = TAILQ_HEAD_INITIALIZER(q);
  98
  99        /* The waiter spins on us with cpu_relax_any().  That code assumes the
 100         * target of the wait/spin is in vcore context, or at least has notifs
 101         * disabled. */
 102        uth_disable_notifs();
 103        mcs_pdr_lock(&__futex.lock);
 104        TAILQ_FOREACH_SAFE(e, &__futex.queue, link, temp) {
 105                if (count <= 0)
 106                        break;
 107                if (e->uaddr == uaddr) {
 108                        e->waker_using = true;
 109                        /* flag waker_using before saying !on_list */
 110                        wmb();
 111                        e->on_list = false;
 112                        TAILQ_REMOVE(&__futex.queue, e, link);
 113                        TAILQ_INSERT_TAIL(&q, e, link);
 114                        count--;
 115                }
 116        }
 117        mcs_pdr_unlock(&__futex.lock);
 118        TAILQ_FOREACH_SAFE(e, &q, link, temp) {
 119                TAILQ_REMOVE(&q, e, link);
 120                uth_cond_var_signal(&e->cv);
 121                /* Do not touch e after marking it. */
 122                e->waker_using = false;
 123        }
 124        uth_enable_notifs();
 125
 126        return max - count;
 127}
 128
 129int futex(int *uaddr, int op, int val, const struct timespec *timeout,
 130          int *uaddr2, int val3)
 131{
 132        static parlib_once_t once = PARLIB_ONCE_INIT;
 133        struct timespec abs_timeout[1];
 134
 135        parlib_run_once(&once, futex_init, NULL);
 136        assert(uaddr2 == NULL);
 137        assert(val3 == 0);
 138
 139        /* futex timeouts are relative.  Internally, we use absolute timeouts */
 140        if (timeout) {
 141                clock_gettime(CLOCK_MONOTONIC, abs_timeout);
 142                /* timespec_add is available inside glibc, but not out here. */
 143                abs_timeout->tv_sec += timeout->tv_sec;
 144                abs_timeout->tv_nsec += timeout->tv_nsec;
 145                if (abs_timeout->tv_nsec >= 1000000000) {
 146                        abs_timeout->tv_nsec -= 1000000000;
 147                        abs_timeout->tv_sec++;
 148                }
 149        }
 150
 151        switch (op) {
 152        case FUTEX_WAIT:
 153                return futex_wait(uaddr, val, timeout ? abs_timeout : NULL);
 154        case FUTEX_WAKE:
 155                return futex_wake(uaddr, val);
 156        default:
 157                errno = ENOSYS;
 158                return -1;
 159        }
 160        return -1;
 161}
 162