vmm: Add support for changing VMX controls
authorBarret Rhoden <brho@cs.berkeley.edu>
Thu, 17 Aug 2017 19:13:41 +0000 (15:13 -0400)
committerBarret Rhoden <brho@cs.berkeley.edu>
Fri, 25 Aug 2017 18:41:49 +0000 (14:41 -0400)
This adds internal support for changing pin-based, cpu-based, and secondary
cpu-based VMX controls.  VMMs will want to change some of them on the fly,
such as "vmexit on halt."  It's not enough to set them once at startup
either, since the 2LS's decision may depend on the number of cores
available dynamically.

Later patches will add support for userspace to change the flags in
vmx_vmm.  Once those bits are changed, the next time a GPC reloads, it will
have the new controls.  At a minimum, GPCs reload any time we run a VM and
were previously in userspace.  That should be sufficient for 2LSs.  The
alternative is to set those VMCS fields on every pop, which will slightly
slow down kernel vmexit handling.

The VMCS writes are only a few nsec each - basically the minimum cost of
any similar instruction.

Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/vmm/intel/vmx.c
kern/arch/x86/vmm/intel/vmx.h
kern/arch/x86/vmm/vmm.c
kern/arch/x86/vmm/vmm.h

index 6f9cef7..863fec7 100644 (file)
@@ -325,6 +325,11 @@ vmcs_write64(unsigned long field, uint64_t value)
 
 void vapic_status_dump_kernel(void *vapic);
 
+static bool vmx_control_can_be_changed(struct vmxec *v, uint32_t ctl)
+{
+       return v->hw_changeable & v->policy_changeable & ctl;
+}
+
 /*
  * A note on Things You Can't Make Up.
  * or
@@ -401,10 +406,8 @@ void vapic_status_dump_kernel(void *vapic);
  * weirdness in the bits, we don't want to run.
  * The try_set stuff adds particular ugliness but we have to have it.
  */
-
-static bool
-check_vmxec_controls(struct vmxec const *v, bool have_true_msr,
-                                        uint32_t * result)
+static bool check_vmxec_controls(struct vmxec *v, bool have_true_msr,
+                                 uint32_t *result)
 {
        bool err = false;
        uint32_t vmx_msr_low, vmx_msr_high;
@@ -425,6 +428,7 @@ check_vmxec_controls(struct vmxec const *v, bool have_true_msr,
        reserved_0 = (~vmx_msr_low) & (~vmx_msr_high);
        reserved_1 = vmx_msr_low & vmx_msr_high;
        changeable_bits = ~(reserved_0 | reserved_1);
+       v->hw_changeable = changeable_bits;
 
        /*
         * this is very much as follows:
@@ -495,7 +499,7 @@ check_vmxec_controls(struct vmxec const *v, bool have_true_msr,
  * We're trying to make this as readable as possible. Realistically, it will
  * rarely if ever change, if the past is any guide.
  */
-static const struct vmxec pbec = {
+static struct vmxec pbec = {
        .name = "Pin Based Execution Controls",
        .msr = MSR_IA32_VMX_PINBASED_CTLS,
        .truemsr = MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -508,7 +512,7 @@ static const struct vmxec pbec = {
        .must_be_0 = (PIN_BASED_VMX_PREEMPTION_TIMER),
 };
 
-static const struct vmxec cbec = {
+static struct vmxec cbec = {
        .name = "CPU Based Execution Controls",
        .msr = MSR_IA32_VMX_PROCBASED_CTLS,
        .truemsr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
@@ -537,10 +541,13 @@ static const struct vmxec cbec = {
                     CPU_BASED_PAUSE_EXITING |
                     CPU_BASED_UNCOND_IO_EXITING),
 
-       .try_set_0 = (CPU_BASED_MONITOR_EXITING)
+       .try_set_0 = (CPU_BASED_MONITOR_EXITING),
+       .policy_changeable = (
+                CPU_BASED_HLT_EXITING |
+                CPU_BASED_PAUSE_EXITING),
 };
 
-static const struct vmxec cb2ec = {
+static struct vmxec cb2ec = {
        .name = "CPU Based 2nd Execution Controls",
        .msr = MSR_IA32_VMX_PROCBASED_CTLS2,
        .truemsr = MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -552,8 +559,6 @@ static const struct vmxec cb2ec = {
                     SECONDARY_EXEC_WBINVD_EXITING),
 
        .must_be_0 = (
-                    //SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                    //SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                     SECONDARY_EXEC_DESCRIPTOR_EXITING |
                     SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                     SECONDARY_EXEC_ENABLE_VPID |
@@ -573,7 +578,7 @@ static const struct vmxec cb2ec = {
 
 };
 
-static const struct vmxec vmentry = {
+static struct vmxec vmentry = {
        .name = "VMENTRY controls",
        .msr = MSR_IA32_VMX_ENTRY_CTLS,
        .truemsr = MSR_IA32_VMX_TRUE_ENTRY_CTLS,
@@ -589,7 +594,7 @@ static const struct vmxec vmentry = {
                     VM_ENTRY_LOAD_IA32_PAT),
 };
 
-static const struct vmxec vmexit = {
+static struct vmxec vmexit = {
        .name = "VMEXIT controls",
        .msr = MSR_IA32_VMX_EXIT_CTLS,
        .truemsr = MSR_IA32_VMX_TRUE_EXIT_CTLS,
@@ -655,6 +660,7 @@ setup_vmcs_config(void *p)
                printk("vmxexec controls is no good.\n");
                return;
        }
+       assert(cpu_has_secondary_exec_ctrls());
 
        /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
        if ((vmx_msr_high & 0x1fff) > PGSIZE) {
@@ -767,12 +773,18 @@ static void vmx_setup_constant_host_state(void)
 static void __vmx_setup_pcpu(struct guest_pcore *gpc)
 {
        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+       struct vmx_vmm *vmx = &gpc->proc->vmm.vmx;
 
        vmcs_write(HOST_TR_BASE, (uintptr_t)pcpui->tss);
        vmcs_writel(HOST_GDTR_BASE, (uintptr_t)pcpui->gdt);
        vmcs_write(HOST_GS_BASE, (uintptr_t)pcpui);
        /* TODO: we might need to also set HOST_IA32_PERF_GLOBAL_CTRL.  Need to
         * think about how perf will work with VMs */
+       /* Userspace can request changes to the ctls.  They take effect when we
+        * reload the GPC, which occurs after a transition from userspace to VM. */
+       vmcs_write(PIN_BASED_VM_EXEC_CONTROL, vmx->pin_exec_ctls);
+       vmcs_write(CPU_BASED_VM_EXEC_CONTROL, vmx->cpu_exec_ctls);
+       vmcs_write(SECONDARY_VM_EXEC_CONTROL, vmx->cpu2_exec_ctls);
 }
 
 uint64_t
@@ -1004,6 +1016,13 @@ static void setup_msr(struct guest_pcore *gpc)
        vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
 }
 
+void vmx_setup_vmx_vmm(struct vmx_vmm *vmx)
+{
+       vmx->pin_exec_ctls = vmcs_config.pin_based_exec_ctrl;
+       vmx->cpu_exec_ctls = vmcs_config.cpu_based_exec_ctrl;
+       vmx->cpu2_exec_ctls = vmcs_config.cpu_based_2nd_exec_ctrl;
+}
+
 /**
  *  vmx_setup_vmcs - configures the vmcs with starting parameters
  */
@@ -1012,18 +1031,6 @@ static void vmx_setup_vmcs(struct guest_pcore *gpc)
        vmcs_write16(VIRTUAL_PROCESSOR_ID, 0);
        vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
-       /* Control */
-       vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
-                    vmcs_config.pin_based_exec_ctrl);
-
-       vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
-                    vmcs_config.cpu_based_exec_ctrl);
-
-       if (cpu_has_secondary_exec_ctrls()) {
-               vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
-                            vmcs_config.cpu_based_2nd_exec_ctrl);
-       }
-
        vmcs_write64(EPT_POINTER, gpc_get_eptp(gpc));
 
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
index e6b50ac..2b448f7 100644 (file)
@@ -385,6 +385,15 @@ struct vmxec {
        uint32_t must_be_0;
        uint32_t try_set_1;
        uint32_t try_set_0;
+       uint32_t hw_changeable;
+       uint32_t policy_changeable;
+};
+
+/* Per-VM VMX info */
+struct vmx_vmm {
+       uint32_t                                        pin_exec_ctls;
+       uint32_t                                        cpu_exec_ctls;
+       uint32_t                                        cpu2_exec_ctls;
 };
 
 int intel_vmm_init(void);
@@ -393,3 +402,4 @@ void vmx_load_guest_pcore(struct guest_pcore *gpc);
 void vmx_unload_guest_pcore(struct guest_pcore *gpc);
 uint64_t gpc_get_eptp(struct guest_pcore *gpc);
 void vmx_clear_vmcs(void);
+void vmx_setup_vmx_vmm(struct vmx_vmm *vmx);
index 8f0acc8..f40625d 100644 (file)
@@ -91,8 +91,9 @@ int vmm_struct_init(struct proc *p, unsigned int nr_guest_pcores,
                error(EAGAIN, "We're already running a vmmcp?");
        /* Set this early, so cleanup checks the gpc array */
        vmm->vmmcp = TRUE;
-       nr_guest_pcores = MIN(nr_guest_pcores, num_cores);
        vmm->amd = 0;
+       vmx_setup_vmx_vmm(&vmm->vmx);
+       nr_guest_pcores = MIN(nr_guest_pcores, num_cores);
        vmm->guest_pcores = kzmalloc(sizeof(void *) * nr_guest_pcores, MEM_WAIT);
        if (!vmm->guest_pcores)
                error(ENOMEM, "Allocation of vmm->guest_pcores failed");
index 9ae840d..da62fe9 100644 (file)
@@ -40,8 +40,9 @@ struct vmm {
        // installed would GPF on a K7.
        union {
                void *svm;
-               struct guest_pcore **guest_pcores;
+               struct vmx_vmm vmx;
        };
+       struct guest_pcore **guest_pcores;
        unsigned long vmexits[VMM_VMEXIT_NR_TYPES];
 };