net: Make select() not spurious
[akaros.git] / user / vmm / sched.c
index de1a75b..32c420e 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <vmm/sched.h>
 #include <vmm/vmm.h>
+#include <vmm/vthread.h>
 #include <sys/mman.h>
 #include <stdlib.h>
 #include <assert.h>
@@ -28,6 +29,7 @@ static struct spin_pdr_lock queue_lock = SPINPDR_INITIALIZER;
 /* Runnable queues, broken up by thread type. */
 static struct vmm_thread_tq rnbl_tasks = TAILQ_HEAD_INITIALIZER(rnbl_tasks);
 static struct vmm_thread_tq rnbl_guests = TAILQ_HEAD_INITIALIZER(rnbl_guests);
+static struct vmm_thread **greedy_rnbl_guests;
 /* Counts of *unblocked* threads.  Unblocked = Running + Runnable. */
 static atomic_t nr_unblk_tasks;
 static atomic_t nr_unblk_guests;
@@ -73,6 +75,17 @@ static struct vmm_thread *alloc_vmm_thread(struct virtual_machine *vm,
 static void *__alloc_stack(size_t stacksize);
 static void __free_stack(void *stacktop, size_t stacksize);
 
+static bool sched_is_greedy(void)
+{
+       return parlib_never_yield;
+}
+
+static unsigned int sched_nr_greedy_cores(void)
+{
+       if (!current_vm)
+               return 1;
+       return current_vm->nr_gpcs + 1;
+}
 
 static void restart_thread(struct syscall *sysc)
 {
@@ -206,9 +219,13 @@ static void yield_current_uth(void)
  * to send events, how to avoid interfering with gpcs, etc. */
 static bool try_to_get_vcores(void)
 {
-       int nr_vcores_wanted = desired_nr_vcores();
-       bool have_enough = nr_vcores_wanted <= num_vcores();
+       int nr_vcores_wanted;
+       bool have_enough;
 
+       if (sched_is_greedy())
+               return num_vcores() == sched_nr_greedy_cores();
+       nr_vcores_wanted = desired_nr_vcores();
+       have_enough = nr_vcores_wanted <= num_vcores();
        if (have_enough) {
                vcore_tick_disable();
                return TRUE;
@@ -227,7 +244,35 @@ static void stats_run_vth(struct vmm_thread *vth)
        }
 }
 
-static void __attribute__((noreturn)) vmm_sched_entry(void)
+/* TODO: This assumes we get all of our vcores. */
+static struct vmm_thread *sched_pick_thread_greedy(void)
+{
+       struct vmm_thread *vth;
+
+       if (current_uthread) {
+               stats_run_vth((struct vmm_thread*)current_uthread);
+               run_current_uthread();
+       }
+       if (vcore_id() == 0) {
+               spin_pdr_lock(&queue_lock);
+               vth = __pop_first(&rnbl_tasks);
+               spin_pdr_unlock(&queue_lock);
+               return vth;
+       }
+       /* This races with enqueue_vmm_thread, which can run on another core.
+        * Here are the rules:
+        * - set when runnable (race free, only one state for the thread at a time)
+        * - cleared when we run it (race free, we're the only runners)
+        * - if we take an interrupt, we'll just run_current_uthread and not check
+        * - if we vmexit, we'll run the buddy directly */
+       assert(vcore_id() <= current_vm->nr_gpcs);
+       vth = greedy_rnbl_guests[vcore_id() - 1];
+       if (vth)
+               greedy_rnbl_guests[vcore_id() - 1] = NULL;
+       return vth;
+}
+
+static struct vmm_thread *sched_pick_thread_nice(void)
 {
        struct vmm_thread *vth;
        bool have_enough;
@@ -245,8 +290,29 @@ static void __attribute__((noreturn)) vmm_sched_entry(void)
                vth = pick_a_thread_plenty();
        else
                vth = pick_a_thread_degraded();
-       if (!vth)
-               vcore_yield_or_restart();
+       return vth;
+}
+
+static void __attribute__((noreturn)) vmm_sched_entry(void)
+{
+       struct vmm_thread *vth;
+
+       if (sched_is_greedy()) {
+               vth = sched_pick_thread_greedy();
+               if (!vth) {
+                       /* sys_halt_core will return, but we need to restart the vcore.  We
+                        * might have woke due to an event, and we'll need to handle_events
+                        * and other things dealt with by uthreads. */
+                       if (vcore_id() == 0)
+                               sys_halt_core(0);
+                       /* In greedy mode, yield will abort and we'll just restart */
+                       vcore_yield_or_restart();
+               }
+       } else {
+               vth = sched_pick_thread_nice();
+               if (!vth)
+                       vcore_yield_or_restart();
+       }
        stats_run_vth(vth);
        run_uthread((struct uthread*)vth);
 }
@@ -408,24 +474,43 @@ static void task_thread_dtor(void *obj, void *priv)
 {
        struct task_thread *tth = (struct task_thread*)obj;
 
+       uthread_cleanup((struct uthread*)tth);
        __free_stack(tth->stacktop, tth->stacksize);
 }
 
-static void vmm_thread_exited(struct uthread *uth)
+static void task_thread_exit(struct task_thread *tth)
 {
-       struct vmm_thread *vth = (struct vmm_thread*)uth;
-       struct task_thread *tth = (struct task_thread*)uth;
-
-       /* Catch bugs.  Right now, only tasks threads can exit. */
-       assert(vth->type == VMM_THREAD_TASK);
+       struct uthread *uth = (struct uthread*)tth;
 
-       acct_thread_blocked((struct vmm_thread*)tth);
-       uthread_cleanup(uth);
        if (uth->flags & UTHREAD_IS_THREAD0)
                return;
        kmem_cache_free(task_thread_cache, tth);
 }
 
+static void ctlr_thread_exit(struct ctlr_thread *cth)
+{
+       __vthread_exited((struct vthread*)cth->buddy);
+}
+
+static void vmm_thread_exited(struct uthread *uth)
+{
+       struct vmm_thread *vth = (struct vmm_thread*)uth;
+
+       assert(vth->type != VMM_THREAD_GUEST);
+
+       acct_thread_blocked(vth);
+       switch (vth->type) {
+       case VMM_THREAD_TASK:
+               task_thread_exit((struct task_thread*)uth);
+               break;
+       case VMM_THREAD_CTLR:
+               ctlr_thread_exit((struct ctlr_thread*)uth);
+               break;
+       case VMM_THREAD_GUEST:
+               panic("Guest threads shouldn't be able to exit");
+       }
+}
+
 static void destroy_guest_thread(struct guest_thread *gth)
 {
        struct ctlr_thread *cth = gth->buddy;
@@ -437,8 +522,9 @@ static void destroy_guest_thread(struct guest_thread *gth)
        free(gth);
 }
 
-static struct guest_thread *create_guest_thread(struct virtual_machine *vm,
-                                                unsigned int gpcoreid)
+struct guest_thread *create_guest_thread(struct virtual_machine *vm,
+                                         unsigned int gpcoreid,
+                                         struct vmm_gpcore_init *gpci)
 {
        struct guest_thread *gth;
        struct ctlr_thread *cth;
@@ -457,6 +543,7 @@ static struct guest_thread *create_guest_thread(struct virtual_machine *vm,
        gth->buddy = cth;
        cth->buddy = gth;
        gth->gpc_id = gpcoreid;
+       gth->gpci = *gpci;
        cth->stacksize = VMM_THR_STACKSIZE;
        cth->stacktop = __alloc_stack(cth->stacksize);
        if (!cth->stacktop) {
@@ -466,12 +553,8 @@ static struct guest_thread *create_guest_thread(struct virtual_machine *vm,
        }
        gth->uthread.u_ctx.type = ROS_VM_CTX;
        gth->uthread.u_ctx.tf.vm_tf.tf_guest_pcoreid = gpcoreid;
-       /* No need to init the ctlr.  It gets re-init'd each time it starts. */
        uthread_init((struct uthread*)gth, &gth_attr);
        uthread_init((struct uthread*)cth, &cth_attr);
-       /* TODO: give it a correct FP state.  Our current one is probably fine */
-       restore_fp_state(&gth->uthread.as);
-       gth->uthread.flags |= UTHREAD_FPSAVED;
        gth->halt_mtx = uth_mutex_alloc();
        gth->halt_cv = uth_cond_var_alloc();
        return gth;
@@ -490,7 +573,7 @@ static void ev_handle_diag(struct event_msg *ev_msg, unsigned int ev_type,
 
        fprintf(stderr, "\nSCHED stats:\n---------------\n");
        for (int i = 0; i < vm->nr_gpcs; i++) {
-               gth = vm->gths[i];
+               gth = gpcid_to_gth(vm, i);
                cth = gth->buddy;
                fprintf(stderr, "\tGPC %2d: %lu resched, %lu gth runs, %lu ctl runs, %lu user-handled vmexits\n",
                                i,
@@ -509,20 +592,34 @@ static void ev_handle_diag(struct event_msg *ev_msg, unsigned int ev_type,
                atomic_read(&nr_unblk_guests), atomic_read(&nr_unblk_tasks));
 }
 
-int vmm_init(struct virtual_machine *vm, int flags)
+int vmm_init(struct virtual_machine *vm, struct vmm_gpcore_init *gpcis,
+             int flags)
 {
        struct guest_thread **gths;
 
        if (current_vm)
                return -1;
        current_vm = vm;
-       if (syscall(SYS_vmm_setup, vm->nr_gpcs, vm->gpcis, flags) != vm->nr_gpcs)
+       /* We should tell the kernel to create all of the GPCs we'll need in
+        * advance.
+        *
+        * We could create the others on the fly, but the kernel's answer for
+        * CPUID[0x1] will not have to total number of cores.  If we move that
+        * handler to userspace, we can create the SMP-booted GPCs on the fly.
+        *
+        * We'd also have to deal with gths[] growing dynamically, which would
+        * require synchronization. */
+       if (syscall(SYS_vmm_add_gpcs, vm->nr_gpcs, gpcis) != vm->nr_gpcs)
                return -1;
+       if (flags) {
+               if (syscall(SYS_vmm_ctl, VMM_CTL_SET_FLAGS, flags))
+                       return -1;
+       }
        gths = malloc(vm->nr_gpcs * sizeof(struct guest_thread *));
        if (!gths)
                return -1;
        for (int i = 0; i < vm->nr_gpcs; i++) {
-               gths[i] = create_guest_thread(vm, i);
+               gths[i] = create_guest_thread(vm, i, &gpcis[i]);
                if (!gths[i]) {
                        for (int j = 0; j < i; j++)
                                destroy_guest_thread(gths[j]);
@@ -530,9 +627,18 @@ int vmm_init(struct virtual_machine *vm, int flags)
                        return -1;
                }
        }
-       vm->gths = gths;
+       wmb(); /* All gths posted before advertising. */
+       vm->__gths = gths;
        uthread_mcp_init();
        register_ev_handler(EV_FREE_APPLE_PIE, ev_handle_diag, NULL);
+       if (sched_is_greedy()) {
+               greedy_rnbl_guests = calloc(vm->nr_gpcs, sizeof(struct vmm_thread *));
+               assert(greedy_rnbl_guests);
+               vcore_request_total(sched_nr_greedy_cores());
+               syscall(SYS_vmm_ctl, VMM_CTL_SET_EXITS,
+                       syscall(SYS_vmm_ctl, VMM_CTL_GET_EXITS) &
+                               ~(VMM_CTL_EXIT_HALT | VMM_CTL_EXIT_MWAIT));
+       }
        return 0;
 }
 
@@ -629,21 +735,41 @@ static void acct_thread_unblocked(struct vmm_thread *vth)
        }
 }
 
+static void greedy_mark_guest_runnable(struct vmm_thread *vth)
+{
+       int gpcid;
+
+       if (vth->type == VMM_THREAD_GUEST)
+               gpcid = ((struct guest_thread*)vth)->gpc_id;
+       else
+               gpcid = ((struct ctlr_thread*)vth)->buddy->gpc_id;
+       /* racing with the reader */
+       greedy_rnbl_guests[gpcid] = vth;
+}
+
 static void enqueue_vmm_thread(struct vmm_thread *vth)
 {
-       spin_pdr_lock(&queue_lock);
        switch (vth->type) {
        case VMM_THREAD_GUEST:
        case VMM_THREAD_CTLR:
-               TAILQ_INSERT_TAIL(&rnbl_guests, vth, tq_next);
+               if (sched_is_greedy()) {
+                       greedy_mark_guest_runnable(vth);
+               } else {
+                       spin_pdr_lock(&queue_lock);
+                       TAILQ_INSERT_TAIL(&rnbl_guests, vth, tq_next);
+                       spin_pdr_unlock(&queue_lock);
+               }
                break;
        case VMM_THREAD_TASK:
+               spin_pdr_lock(&queue_lock);
                TAILQ_INSERT_TAIL(&rnbl_tasks, vth, tq_next);
+               spin_pdr_unlock(&queue_lock);
+               if (sched_is_greedy())
+                       vcore_wake(0, false);
                break;
        default:
                panic("Bad vmm_thread type %p\n", vth->type);
        }
-       spin_pdr_unlock(&queue_lock);
        try_to_get_vcores();
 }