parlib: vmm: Allow VM uthreads to have no FP state
[akaros.git] / user / vmm / sched.c
index b44c44f..26d5d17 100644 (file)
@@ -28,6 +28,7 @@ static struct spin_pdr_lock queue_lock = SPINPDR_INITIALIZER;
 /* Runnable queues, broken up by thread type. */
 static struct vmm_thread_tq rnbl_tasks = TAILQ_HEAD_INITIALIZER(rnbl_tasks);
 static struct vmm_thread_tq rnbl_guests = TAILQ_HEAD_INITIALIZER(rnbl_guests);
+static struct vmm_thread **greedy_rnbl_guests;
 /* Counts of *unblocked* threads.  Unblocked = Running + Runnable. */
 static atomic_t nr_unblk_tasks;
 static atomic_t nr_unblk_guests;
@@ -73,6 +74,17 @@ static struct vmm_thread *alloc_vmm_thread(struct virtual_machine *vm,
 static void *__alloc_stack(size_t stacksize);
 static void __free_stack(void *stacktop, size_t stacksize);
 
+static bool sched_is_greedy(void)
+{
+       return parlib_never_yield;
+}
+
+static unsigned int sched_nr_greedy_cores(void)
+{
+       if (!current_vm)
+               return 1;
+       return current_vm->nr_gpcs + 1;
+}
 
 static void restart_thread(struct syscall *sysc)
 {
@@ -164,26 +176,12 @@ static struct vmm_thread *__pop_first(struct vmm_thread_tq *tq)
 
 static struct vmm_thread *pick_a_thread_degraded(void)
 {
-       struct vmm_thread *vth = 0;
-       static int next_class = VMM_THREAD_GUEST;
+       struct vmm_thread *vth;
 
-       /* We don't have a lot of cores (maybe 0), so we'll alternate which type of
-        * thread we look at first.  Basically, we're RR within a class of threads,
-        * and we'll toggle between those two classes. */
        spin_pdr_lock(&queue_lock);
-       if (next_class == VMM_THREAD_GUEST) {
-               if (!vth)
-                       vth = __pop_first(&rnbl_guests);
-               if (!vth)
-                       vth = __pop_first(&rnbl_tasks);
-               next_class = VMM_THREAD_TASK;
-       } else {
-               if (!vth)
-                       vth = __pop_first(&rnbl_tasks);
-               if (!vth)
-                       vth = __pop_first(&rnbl_guests);
-               next_class = VMM_THREAD_GUEST;
-       };
+       vth = __pop_first(&rnbl_tasks);
+       if (!vth)
+               vth = __pop_first(&rnbl_guests);
        spin_pdr_unlock(&queue_lock);
        return vth;
 }
@@ -220,9 +218,13 @@ static void yield_current_uth(void)
  * to send events, how to avoid interfering with gpcs, etc. */
 static bool try_to_get_vcores(void)
 {
-       int nr_vcores_wanted = desired_nr_vcores();
-       bool have_enough = nr_vcores_wanted <= num_vcores();
+       int nr_vcores_wanted;
+       bool have_enough;
 
+       if (sched_is_greedy())
+               return num_vcores() == sched_nr_greedy_cores();
+       nr_vcores_wanted = desired_nr_vcores();
+       have_enough = nr_vcores_wanted <= num_vcores();
        if (have_enough) {
                vcore_tick_disable();
                return TRUE;
@@ -232,7 +234,44 @@ static bool try_to_get_vcores(void)
        return FALSE;
 }
 
-static void __attribute__((noreturn)) vmm_sched_entry(void)
+static void stats_run_vth(struct vmm_thread *vth)
+{
+       vth->nr_runs++;
+       if (vth->prev_vcoreid != vcore_id()) {
+               vth->prev_vcoreid = vcore_id();
+               vth->nr_resched++;
+       }
+}
+
+/* TODO: This assumes we get all of our vcores. */
+static struct vmm_thread *sched_pick_thread_greedy(void)
+{
+       struct vmm_thread *vth;
+
+       if (current_uthread) {
+               stats_run_vth((struct vmm_thread*)current_uthread);
+               run_current_uthread();
+       }
+       if (vcore_id() == 0) {
+               spin_pdr_lock(&queue_lock);
+               vth = __pop_first(&rnbl_tasks);
+               spin_pdr_unlock(&queue_lock);
+               return vth;
+       }
+       /* This races with enqueue_vmm_thread, which can run on another core.
+        * Here are the rules:
+        * - set when runnable (race free, only one state for the thread at a time)
+        * - cleared when we run it (race free, we're the only runners)
+        * - if we take an interrupt, we'll just run_current_uthread and not check
+        * - if we vmexit, we'll run the buddy directly */
+       assert(vcore_id() <= current_vm->nr_gpcs);
+       vth = greedy_rnbl_guests[vcore_id() - 1];
+       if (vth)
+               greedy_rnbl_guests[vcore_id() - 1] = NULL;
+       return vth;
+}
+
+static struct vmm_thread *sched_pick_thread_nice(void)
 {
        struct vmm_thread *vth;
        bool have_enough;
@@ -242,14 +281,28 @@ static void __attribute__((noreturn)) vmm_sched_entry(void)
                /* slightly less than ideal: we grab the queue lock twice */
                yield_current_uth();
        }
-       if (current_uthread)
+       if (current_uthread) {
+               stats_run_vth((struct vmm_thread*)current_uthread);
                run_current_uthread();
+       }
        if (have_enough)
                vth = pick_a_thread_plenty();
        else
                vth = pick_a_thread_degraded();
+       return vth;
+}
+
+static void __attribute__((noreturn)) vmm_sched_entry(void)
+{
+       struct vmm_thread *vth;
+
+       if (sched_is_greedy())
+               vth = sched_pick_thread_greedy();
+       else
+               vth = sched_pick_thread_nice();
        if (!vth)
                vcore_yield_or_restart();
+       stats_run_vth(vth);
        run_uthread((struct uthread*)vth);
 }
 
@@ -336,6 +389,7 @@ static void __swap_to_gth(struct uthread *uth, void *dummy)
        /* We just immediately run our buddy.  The ctlr and the guest are accounted
         * together ("pass the token" back and forth). */
        current_uthread = NULL;
+       stats_run_vth((struct vmm_thread*)cth->buddy);
        run_uthread((struct uthread*)cth->buddy);
        assert(0);
 }
@@ -373,6 +427,7 @@ static void vmm_thread_refl_vm_fault(struct uthread *uth)
        struct guest_thread *gth = (struct guest_thread*)uth;
        struct ctlr_thread *cth = gth->buddy;
 
+       gth->nr_vmexits++;
        /* The ctlr starts frm the top every time we get a new fault. */
        cth->uthread.flags |= UTHREAD_SAVED;
        init_user_ctx(&cth->uthread.u_ctx, (uintptr_t)&__ctlr_entry,
@@ -380,6 +435,7 @@ static void vmm_thread_refl_vm_fault(struct uthread *uth)
        /* We just immediately run our buddy.  The ctlr and the guest are accounted
         * together ("pass the token" back and forth). */
        current_uthread = NULL;
+       stats_run_vth((struct vmm_thread*)cth);
        run_uthread((struct uthread*)cth);
        assert(0);
 }
@@ -465,17 +521,45 @@ static struct guest_thread *create_guest_thread(struct virtual_machine *vm,
        }
        gth->uthread.u_ctx.type = ROS_VM_CTX;
        gth->uthread.u_ctx.tf.vm_tf.tf_guest_pcoreid = gpcoreid;
-       /* No need to init the ctlr.  It gets re-init'd each time it starts. */
        uthread_init((struct uthread*)gth, &gth_attr);
        uthread_init((struct uthread*)cth, &cth_attr);
-       /* TODO: give it a correct FP state.  Our current one is probably fine */
-       restore_fp_state(&gth->uthread.as);
-       gth->uthread.flags |= UTHREAD_FPSAVED;
        gth->halt_mtx = uth_mutex_alloc();
        gth->halt_cv = uth_cond_var_alloc();
        return gth;
 }
 
+static void ev_handle_diag(struct event_msg *ev_msg, unsigned int ev_type,
+                           void *data)
+{
+       struct virtual_machine *vm = current_vm;
+       struct guest_thread *gth;
+       struct ctlr_thread *cth;
+       bool reset = FALSE;
+
+       if (ev_msg && (ev_msg->ev_arg1 == 1))
+               reset = TRUE;
+
+       fprintf(stderr, "\nSCHED stats:\n---------------\n");
+       for (int i = 0; i < vm->nr_gpcs; i++) {
+               gth = vm->gths[i];
+               cth = gth->buddy;
+               fprintf(stderr, "\tGPC %2d: %lu resched, %lu gth runs, %lu ctl runs, %lu user-handled vmexits\n",
+                               i,
+                       ((struct vmm_thread*)gth)->nr_resched,
+                       ((struct vmm_thread*)gth)->nr_runs,
+                       ((struct vmm_thread*)cth)->nr_runs,
+                       gth->nr_vmexits);
+               if (reset) {
+                   ((struct vmm_thread*)gth)->nr_resched = 0;
+                   ((struct vmm_thread*)gth)->nr_runs = 0;
+                   ((struct vmm_thread*)cth)->nr_runs = 0;
+                   gth->nr_vmexits = 0;
+               }
+       }
+       fprintf(stderr, "\n\tNr unblocked gpc %lu, Nr unblocked tasks %lu\n",
+               atomic_read(&nr_unblk_guests), atomic_read(&nr_unblk_tasks));
+}
+
 int vmm_init(struct virtual_machine *vm, int flags)
 {
        struct guest_thread **gths;
@@ -499,6 +583,14 @@ int vmm_init(struct virtual_machine *vm, int flags)
        }
        vm->gths = gths;
        uthread_mcp_init();
+       register_ev_handler(EV_FREE_APPLE_PIE, ev_handle_diag, NULL);
+       if (sched_is_greedy()) {
+               greedy_rnbl_guests = calloc(vm->nr_gpcs, sizeof(struct vmm_thread *));
+               assert(greedy_rnbl_guests);
+               vcore_request_total(sched_nr_greedy_cores());
+               syscall(SYS_vmm_ctl, VMM_CTL_SET_EXITS,
+                       syscall(SYS_vmm_ctl, VMM_CTL_GET_EXITS) & ~VMM_CTL_EXIT_HALT);
+       }
        return 0;
 }
 
@@ -595,21 +687,39 @@ static void acct_thread_unblocked(struct vmm_thread *vth)
        }
 }
 
+static void greedy_mark_guest_runnable(struct vmm_thread *vth)
+{
+       int gpcid;
+
+       if (vth->type == VMM_THREAD_GUEST)
+               gpcid = ((struct guest_thread*)vth)->gpc_id;
+       else
+               gpcid = ((struct ctlr_thread*)vth)->buddy->gpc_id;
+       /* racing with the reader */
+       greedy_rnbl_guests[gpcid] = vth;
+}
+
 static void enqueue_vmm_thread(struct vmm_thread *vth)
 {
-       spin_pdr_lock(&queue_lock);
        switch (vth->type) {
        case VMM_THREAD_GUEST:
        case VMM_THREAD_CTLR:
-               TAILQ_INSERT_TAIL(&rnbl_guests, vth, tq_next);
+               if (sched_is_greedy()) {
+                       greedy_mark_guest_runnable(vth);
+               } else {
+                       spin_pdr_lock(&queue_lock);
+                       TAILQ_INSERT_TAIL(&rnbl_guests, vth, tq_next);
+                       spin_pdr_unlock(&queue_lock);
+               }
                break;
        case VMM_THREAD_TASK:
+               spin_pdr_lock(&queue_lock);
                TAILQ_INSERT_TAIL(&rnbl_tasks, vth, tq_next);
+               spin_pdr_unlock(&queue_lock);
                break;
        default:
                panic("Bad vmm_thread type %p\n", vth->type);
        }
-       spin_pdr_unlock(&queue_lock);
        try_to_get_vcores();
 }