vmm: Allow VMMs to change vmexit conditions (XCC)
authorBarret Rhoden <brho@cs.berkeley.edu>
Wed, 23 Aug 2017 18:57:47 +0000 (14:57 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Fri, 25 Aug 2017 18:41:49 +0000 (14:41 -0400)
The 2LS (or any HR3 app) can change certain vmexit conditions.  For VMX,
these are the VMX controls.  For AMD, we'll have to do something similar.

Right now, you can control exit on halt (default yes) and exit on pause
(default no).

The greedy mode scheduler will turn off halt exiting, so that when the
guest wants to halt, the core will actually halt.  This will cut down on
the interference with hyperthreads/caches.  2LSs can actually change this
on the fly, subject to the number of host cores available.

Ideally, we'd allow mwait too, but we need to sort out letting the guest
mwait for power management, but not use it for monitor-mwait.  As is, they
actually could monitor-mwait, but once we tell them that mwait (and implied
monitor) is available, we can't renege.  That means we wouldn't be able to
change the exiting status on the fly without the guest potentially sleeping
forever.

Reinstall your kernel headers.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/vmm/intel/vmx.c
kern/arch/x86/vmm/intel/vmx.h
kern/include/ros/bits/syscall.h
kern/include/ros/vmm.h
kern/src/syscall.c
tests/strace.c
user/vmm/sched.c

index 863fec7..57b64b0 100644 (file)
@@ -606,7 +606,6 @@ static struct vmxec vmexit = {
                                VM_EXIT_HOST_ADDR_SPACE_SIZE),  /* 64 bit */
 
        .must_be_0 = (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
-                               // VM_EXIT_ACK_INTR_ON_EXIT |
                                 VM_EXIT_SAVE_IA32_PAT |
                                 VM_EXIT_LOAD_IA32_PAT |
                                VM_EXIT_SAVE_VMX_PREEMPTION_TIMER),
@@ -1434,3 +1433,39 @@ uint64_t gpc_get_eptp(struct guest_pcore *gpc)
 {
        return gpc->proc->env_pgdir.eptp;
 }
+
+int vmx_ctl_get_exits(struct vmx_vmm *vmx)
+{
+       int ret = 0;
+
+       if (vmx->cpu_exec_ctls & CPU_BASED_HLT_EXITING)
+               ret |= VMM_CTL_EXIT_HALT;
+       if (vmx->cpu_exec_ctls & CPU_BASED_PAUSE_EXITING)
+               ret |= VMM_CTL_EXIT_PAUSE;
+       return ret;
+}
+
+int vmx_ctl_set_exits(struct vmx_vmm *vmx, int vmm_exits)
+{
+       int toggle_want;
+       int vmx_toggle_do = 0;
+
+       toggle_want = (vmx_ctl_get_exits(vmx) ^ vmm_exits) & VMM_CTL_ALL_EXITS;
+       if (toggle_want & VMM_CTL_EXIT_HALT) {
+           if (!vmx_control_can_be_changed(&cbec, CPU_BASED_HLT_EXITING)) {
+                       set_error(ENOSYS, "VMX can't toggle EXIT_HALT");
+                       return -1;
+               }
+               vmx_toggle_do |= CPU_BASED_HLT_EXITING;
+       }
+       if (toggle_want & VMM_CTL_EXIT_PAUSE) {
+           if (!vmx_control_can_be_changed(&cbec, CPU_BASED_PAUSE_EXITING)) {
+                       set_error(ENOSYS, "VMX can't toggle EXIT_PAUSE");
+                       return -1;
+               }
+               vmx_toggle_do |= CPU_BASED_PAUSE_EXITING;
+       }
+       /* This is being read concurrently by load_guest_pcore. */
+       WRITE_ONCE(vmx->cpu_exec_ctls, vmx->cpu_exec_ctls ^ vmx_toggle_do);
+       return 0;
+}
index 2b448f7..b851933 100644 (file)
@@ -403,3 +403,5 @@ void vmx_unload_guest_pcore(struct guest_pcore *gpc);
 uint64_t gpc_get_eptp(struct guest_pcore *gpc);
 void vmx_clear_vmcs(void);
 void vmx_setup_vmx_vmm(struct vmx_vmm *vmx);
+int vmx_ctl_get_exits(struct vmx_vmm *vmx);
+int vmx_ctl_set_exits(struct vmx_vmm *vmx, int vmm_exits);
index 23384c5..f92bc52 100644 (file)
@@ -42,6 +42,7 @@
 #define SYS_pop_ctx                                    37
 #define SYS_vmm_poke_guest                     38
 #define SYS_send_event                         39
+#define SYS_vmm_ctl                                    40
 
 /* FS Syscalls */
 #define SYS_read                               100
index bf7bb8c..ef70f49 100644 (file)
@@ -15,3 +15,9 @@
 #define VMCALL_SMPBOOT         0x2
 
 #define VMM_ALL_FLAGS  (VMM_VMCALL_PRINTF)
+
+#define VMM_CTL_GET_EXITS              1
+#define VMM_CTL_SET_EXITS              2
+#define VMM_CTL_EXIT_HALT              (1 << 0)
+#define VMM_CTL_EXIT_PAUSE             (1 << 1)
+#define VMM_CTL_ALL_EXITS              ((1 << 2) - 1)
index 32cfae8..39425af 100644 (file)
@@ -1512,6 +1512,48 @@ static int sys_vmm_poke_guest(struct proc *p, int guest_pcoreid)
        return vmm_poke_guest(p, guest_pcoreid);
 }
 
+static int no_amd(void)
+{
+       set_error(ENOTSUP, "AMD VMMs unsupported");
+       return -1;
+}
+
+static int sys_vmm_ctl(struct proc *p, int cmd, unsigned long arg1,
+                       unsigned long arg2, unsigned long arg3,
+                       unsigned long arg4)
+{
+       int ret;
+
+       /* Protects against concurrent setters and for gets that are not atomic
+        * reads (say, multiple exec ctls). */
+       qlock(&p->vmm.qlock);
+       switch (cmd) {
+       case VMM_CTL_GET_EXITS:
+               if (p->vmm.amd)
+                       ret = no_amd();
+               else
+                       ret = vmx_ctl_get_exits(&p->vmm.vmx);
+               break;
+       case VMM_CTL_SET_EXITS:
+               if (arg1 & ~VMM_CTL_ALL_EXITS) {
+                       set_error(EINVAL, "Bad vmm_ctl_exits %x (%x)", arg1,
+                                 VMM_CTL_ALL_EXITS);
+                       ret = -1;
+                       break;
+               }
+               if (p->vmm.amd)
+                       ret = no_amd();
+               else
+                       ret = vmx_ctl_set_exits(&p->vmm.vmx, arg1);
+               break;
+       default:
+               set_error(EINVAL, "Bad vmm_ctl cmd %d", cmd);
+               ret = -1;
+       }
+       qunlock(&p->vmm.qlock);
+       return ret;
+}
+
 /* Pokes the ksched for the given resource for target_pid.  If the target pid
  * == 0, we just poke for the calling process.  The common case is poking for
  * self, so we avoid the lookup.
@@ -2589,6 +2631,7 @@ const struct sys_table_entry syscall_table[] = {
        [SYS_change_to_m] = {(syscall_t)sys_change_to_m, "change_to_m"},
        [SYS_vmm_setup] = {(syscall_t)sys_vmm_setup, "vmm_setup"},
        [SYS_vmm_poke_guest] = {(syscall_t)sys_vmm_poke_guest, "vmm_poke_guest"},
+       [SYS_vmm_ctl] = {(syscall_t)sys_vmm_ctl, "vmm_ctl"},
        [SYS_poke_ksched] = {(syscall_t)sys_poke_ksched, "poke_ksched"},
        [SYS_abort_sysc] = {(syscall_t)sys_abort_sysc, "abort_sysc"},
        [SYS_abort_sysc_fd] = {(syscall_t)sys_abort_sysc_fd, "abort_sysc_fd"},
index 3ff8fc8..407f572 100644 (file)
@@ -222,6 +222,7 @@ static struct trace_set sched_trace_set = { "sched",
 static struct trace_set vmm_trace_set = { "vmm",
        {SYS_vmm_setup,
         SYS_vmm_poke_guest,
+        SYS_vmm_ctl,
         SYS_pop_ctx,
         0}
 };
index 288b8d7..1cc5b19 100644 (file)
@@ -592,6 +592,8 @@ int vmm_init(struct virtual_machine *vm, int flags)
                greedy_rnbl_guests = calloc(vm->nr_gpcs, sizeof(struct vmm_thread *));
                assert(greedy_rnbl_guests);
                vcore_request_total(sched_nr_greedy_cores());
+               syscall(SYS_vmm_ctl, VMM_CTL_SET_EXITS,
+                       syscall(SYS_vmm_ctl, VMM_CTL_GET_EXITS) & ~VMM_CTL_EXIT_HALT);
        }
        return 0;
 }