Restart with just vmm rebased on master.
authorRon Minnich <rminnich@gmail.com>
Tue, 17 Feb 2015 18:13:20 +0000 (10:13 -0800)
committerRon Minnich <rminnich@gmail.com>
Tue, 17 Feb 2015 18:13:20 +0000 (10:13 -0800)
Much cleaner, the bhyve stuff was interfering.

Next step: fix vm to handle the vmm bits.

Signed-off-by: Ron Minnich <rminnich@gmail.com>
kern/arch/x86/Kbuild
kern/arch/x86/vmm/Kbuild [new file with mode: 0644]
kern/arch/x86/vmm/func.h [new file with mode: 0644]
kern/arch/x86/vmm/intel/vmcs.h [new file with mode: 0644]
kern/arch/x86/vmm/intel/vmx.h [new file with mode: 0644]
kern/arch/x86/vmm/intel/vmx_cpufunc.h [new file with mode: 0644]
kern/arch/x86/vmm/vmm.c [new file with mode: 0644]
kern/arch/x86/vmm/vmm.h [new file with mode: 0644]
kern/arch/x86/vmm/x86.h [new file with mode: 0644]
kern/include/env.h

index ee17438..d16eaac 100644 (file)
@@ -31,7 +31,6 @@ obj-y                                         += trap.o trap64.o
 obj-y                                          += trapentry64.o
 obj-y                                          += usb.o
 
-# Virtual machine support. Optional.
-obj-$(CONFIG_VM)                               += emulate.o
-obj-$(CONFIG_VM)                               += vmx.o
-obj-$(CONFIG_VM)                               += vmx_mmu.o
+# VMM support. Always leave this in to ensure we don't break anything.
+obj-y                                          += vmm/
+
diff --git a/kern/arch/x86/vmm/Kbuild b/kern/arch/x86/vmm/Kbuild
new file mode 100644 (file)
index 0000000..6f46965
--- /dev/null
@@ -0,0 +1 @@
+obj-y                                          += vmm.o
diff --git a/kern/arch/x86/vmm/func.h b/kern/arch/x86/vmm/func.h
new file mode 100644 (file)
index 0000000..02ad4f8
--- /dev/null
@@ -0,0 +1,64 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_FUNC_H_
+#define        _VMM_FUNC_H_
+
+/* APIs to inject faults into the guest */
+void vm_inject_fault(void *vm, int vcpuid, int vector, int errcode_valid,
+    int errcode);
+
+static __inline void
+vm_inject_ud(void *vm, int vcpuid)
+{
+       vm_inject_fault(vm, vcpuid, T_ILLOP, 0, 0);
+}
+
+static __inline void
+vm_inject_gp(void *vm, int vcpuid)
+{
+       vm_inject_fault(vm, vcpuid, T_GPFLT, 1, 0);
+}
+
+static __inline void
+vm_inject_ac(void *vm, int vcpuid, int errcode)
+{
+       vm_inject_fault(vm, vcpuid, T_ALIGN, 1, errcode);
+}
+
+static __inline void
+vm_inject_ss(void *vm, int vcpuid, int errcode)
+{
+       vm_inject_fault(vm, vcpuid, T_STACK, 1, errcode);
+}
+
+void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
+
+int vm_restart_instruction(void *vm, int vcpuid);
+
+#endif /* _VMM_FUNC_H_ */
diff --git a/kern/arch/x86/vmm/intel/vmcs.h b/kern/arch/x86/vmm/intel/vmcs.h
new file mode 100644 (file)
index 0000000..19f42e0
--- /dev/null
@@ -0,0 +1,402 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMCS_H_
+#define        _VMCS_H_
+
+#ifdef ROS_KERNEL
+struct vmcs {
+       uint32_t identifier;
+       uint32_t abort_code;
+       char _impl_specific[PAGE_SIZE - sizeof(uint32_t) * 2];
+};
+
+/* MSR save region is composed of an array of 'struct msr_entry' */
+struct msr_entry {
+       uint32_t index;
+       uint32_t reserved;
+       uint64_t val;
+
+};
+
+int vmcs_set_msr_save(struct vmcs *vmcs, unsigned long g_area,
+                     unsigned int g_count);
+int vmcs_init(struct vmcs *vmcs);
+int vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t * rv);
+int vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val);
+int vmcs_getdesc(struct vmcs *vmcs, int running, int ident,
+                                syssegdesc_t*desc);
+int vmcs_setdesc(struct vmcs *vmcs, int running, int ident,
+                                syssegdesc_t*desc);
+
+static __inline uint64_t vmcs_read(uint32_t encoding)
+{
+       int error;
+       uint64_t val;
+       static_assert(sizeof(struct vmcs) == PAGE_SIZE);
+
+       error = vmread(encoding, &val);
+       // well, huh, what do we do?
+       if (error) {
+               printk("vmcs_read(%u) error %d", encoding, error);
+               assert(0);
+       }
+       return (val);
+}
+
+static __inline void vmcs_write(uint32_t encoding, uint64_t val)
+{
+       int error;
+
+       error = vmwrite(encoding, val);
+       if (error) {
+               printk("vmcs_write(%u) error %d", encoding, error);
+               assert(0);
+       }
+}
+
+#define        vmexit_instruction_length()     vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH)
+#define        vmcs_guest_rip()                vmcs_read(VMCS_GUEST_RIP)
+#define        vmcs_instruction_error()        vmcs_read(VMCS_INSTRUCTION_ERROR)
+#define        vmcs_exit_reason()              (vmcs_read(VMCS_EXIT_REASON) & 0xffff)
+#define        vmcs_exit_qualification()       vmcs_read(VMCS_EXIT_QUALIFICATION)
+#define        vmcs_guest_cr3()                vmcs_read(VMCS_GUEST_CR3)
+#define        vmcs_gpa()                      vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS)
+#define        vmcs_gla()                      vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)
+#define        vmcs_idt_vectoring_info()       vmcs_read(VMCS_IDT_VECTORING_INFO)
+#define        vmcs_idt_vectoring_err()        vmcs_read(VMCS_IDT_VECTORING_ERROR)
+
+#endif /* ROS_KERNEL */
+
+#define        VMCS_INITIAL                    0xffffffffffffffff
+
+#define        VMCS_IDENT(encoding)            ((encoding) | 0x80000000)
+/*
+ * VMCS field encodings from Appendix H, Intel Architecture Manual Vol3B.
+ */
+#define        VMCS_INVALID_ENCODING           0xffffffff
+
+/* 16-bit control fields */
+#define        VMCS_VPID                       0x00000000
+#define        VMCS_PIR_VECTOR                 0x00000002
+
+/* 16-bit guest-state fields */
+#define        VMCS_GUEST_ES_SELECTOR          0x00000800
+#define        VMCS_GUEST_CS_SELECTOR          0x00000802
+#define        VMCS_GUEST_SS_SELECTOR          0x00000804
+#define        VMCS_GUEST_DS_SELECTOR          0x00000806
+#define        VMCS_GUEST_FS_SELECTOR          0x00000808
+#define        VMCS_GUEST_GS_SELECTOR          0x0000080A
+#define        VMCS_GUEST_LDTR_SELECTOR        0x0000080C
+#define        VMCS_GUEST_TR_SELECTOR          0x0000080E
+#define        VMCS_GUEST_INTR_STATUS          0x00000810
+
+/* 16-bit host-state fields */
+#define        VMCS_HOST_ES_SELECTOR           0x00000C00
+#define        VMCS_HOST_CS_SELECTOR           0x00000C02
+#define        VMCS_HOST_SS_SELECTOR           0x00000C04
+#define        VMCS_HOST_DS_SELECTOR           0x00000C06
+#define        VMCS_HOST_FS_SELECTOR           0x00000C08
+#define        VMCS_HOST_GS_SELECTOR           0x00000C0A
+#define        VMCS_HOST_TR_SELECTOR           0x00000C0C
+
+/* 64-bit control fields */
+#define        VMCS_IO_BITMAP_A                0x00002000
+#define        VMCS_IO_BITMAP_B                0x00002002
+#define        VMCS_MSR_BITMAP                 0x00002004
+#define        VMCS_EXIT_MSR_STORE             0x00002006
+#define        VMCS_EXIT_MSR_LOAD              0x00002008
+#define        VMCS_ENTRY_MSR_LOAD             0x0000200A
+#define        VMCS_EXECUTIVE_VMCS             0x0000200C
+#define        VMCS_TSC_OFFSET                 0x00002010
+#define        VMCS_VIRTUAL_APIC               0x00002012
+#define        VMCS_APIC_ACCESS                0x00002014
+#define        VMCS_PIR_DESC                   0x00002016
+#define        VMCS_EPTP                       0x0000201A
+#define        VMCS_EOI_EXIT0                  0x0000201C
+#define        VMCS_EOI_EXIT1                  0x0000201E
+#define        VMCS_EOI_EXIT2                  0x00002020
+#define        VMCS_EOI_EXIT3                  0x00002022
+#define        VMCS_EOI_EXIT(vector)           (VMCS_EOI_EXIT0 + ((vector) / 64) * 2)
+
+/* 64-bit read-only fields */
+#define        VMCS_GUEST_PHYSICAL_ADDRESS     0x00002400
+
+/* 64-bit guest-state fields */
+#define        VMCS_LINK_POINTER               0x00002800
+#define        VMCS_GUEST_IA32_DEBUGCTL        0x00002802
+#define        VMCS_GUEST_IA32_PAT             0x00002804
+#define        VMCS_GUEST_IA32_EFER            0x00002806
+#define        VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808
+#define        VMCS_GUEST_PDPTE0               0x0000280A
+#define        VMCS_GUEST_PDPTE1               0x0000280C
+#define        VMCS_GUEST_PDPTE2               0x0000280E
+#define        VMCS_GUEST_PDPTE3               0x00002810
+
+/* 64-bit host-state fields */
+#define        VMCS_HOST_IA32_PAT              0x00002C00
+#define        VMCS_HOST_IA32_EFER             0x00002C02
+#define        VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04
+
+/* 32-bit control fields */
+#define        VMCS_PIN_BASED_CTLS             0x00004000
+#define        VMCS_PRI_PROC_BASED_CTLS        0x00004002
+#define        VMCS_EXCEPTION_BITMAP           0x00004004
+#define        VMCS_PF_ERROR_MASK              0x00004006
+#define        VMCS_PF_ERROR_MATCH             0x00004008
+#define        VMCS_CR3_TARGET_COUNT           0x0000400A
+#define        VMCS_EXIT_CTLS                  0x0000400C
+#define        VMCS_EXIT_MSR_STORE_COUNT       0x0000400E
+#define        VMCS_EXIT_MSR_LOAD_COUNT        0x00004010
+#define        VMCS_ENTRY_CTLS                 0x00004012
+#define        VMCS_ENTRY_MSR_LOAD_COUNT       0x00004014
+#define        VMCS_ENTRY_INTR_INFO            0x00004016
+#define        VMCS_ENTRY_EXCEPTION_ERROR      0x00004018
+#define        VMCS_ENTRY_INST_LENGTH          0x0000401A
+#define        VMCS_TPR_THRESHOLD              0x0000401C
+#define        VMCS_SEC_PROC_BASED_CTLS        0x0000401E
+#define        VMCS_PLE_GAP                    0x00004020
+#define        VMCS_PLE_WINDOW                 0x00004022
+
+/* 32-bit read-only data fields */
+#define        VMCS_INSTRUCTION_ERROR          0x00004400
+#define        VMCS_EXIT_REASON                0x00004402
+#define        VMCS_EXIT_INTR_INFO             0x00004404
+#define        VMCS_EXIT_INTR_ERRCODE          0x00004406
+#define        VMCS_IDT_VECTORING_INFO         0x00004408
+#define        VMCS_IDT_VECTORING_ERROR        0x0000440A
+#define        VMCS_EXIT_INSTRUCTION_LENGTH    0x0000440C
+#define        VMCS_EXIT_INSTRUCTION_INFO      0x0000440E
+
+/* 32-bit guest-state fields */
+#define        VMCS_GUEST_ES_LIMIT             0x00004800
+#define        VMCS_GUEST_CS_LIMIT             0x00004802
+#define        VMCS_GUEST_SS_LIMIT             0x00004804
+#define        VMCS_GUEST_DS_LIMIT             0x00004806
+#define        VMCS_GUEST_FS_LIMIT             0x00004808
+#define        VMCS_GUEST_GS_LIMIT             0x0000480A
+#define        VMCS_GUEST_LDTR_LIMIT           0x0000480C
+#define        VMCS_GUEST_TR_LIMIT             0x0000480E
+#define        VMCS_GUEST_GDTR_LIMIT           0x00004810
+#define        VMCS_GUEST_IDTR_LIMIT           0x00004812
+#define        VMCS_GUEST_ES_ACCESS_RIGHTS     0x00004814
+#define        VMCS_GUEST_CS_ACCESS_RIGHTS     0x00004816
+#define        VMCS_GUEST_SS_ACCESS_RIGHTS     0x00004818
+#define        VMCS_GUEST_DS_ACCESS_RIGHTS     0x0000481A
+#define        VMCS_GUEST_FS_ACCESS_RIGHTS     0x0000481C
+#define        VMCS_GUEST_GS_ACCESS_RIGHTS     0x0000481E
+#define        VMCS_GUEST_LDTR_ACCESS_RIGHTS   0x00004820
+#define        VMCS_GUEST_TR_ACCESS_RIGHTS     0x00004822
+#define        VMCS_GUEST_INTERRUPTIBILITY     0x00004824
+#define        VMCS_GUEST_ACTIVITY             0x00004826
+#define VMCS_GUEST_SMBASE              0x00004828
+#define        VMCS_GUEST_IA32_SYSENTER_CS     0x0000482A
+#define        VMCS_PREEMPTION_TIMER_VALUE     0x0000482E
+
+/* 32-bit host state fields */
+#define        VMCS_HOST_IA32_SYSENTER_CS      0x00004C00
+
+/* Natural Width control fields */
+#define        VMCS_CR0_MASK                   0x00006000
+#define        VMCS_CR4_MASK                   0x00006002
+#define        VMCS_CR0_SHADOW                 0x00006004
+#define        VMCS_CR4_SHADOW                 0x00006006
+#define        VMCS_CR3_TARGET0                0x00006008
+#define        VMCS_CR3_TARGET1                0x0000600A
+#define        VMCS_CR3_TARGET2                0x0000600C
+#define        VMCS_CR3_TARGET3                0x0000600E
+
+/* Natural Width read-only fields */
+#define        VMCS_EXIT_QUALIFICATION         0x00006400
+#define        VMCS_IO_RCX                     0x00006402
+#define        VMCS_IO_RSI                     0x00006404
+#define        VMCS_IO_RDI                     0x00006406
+#define        VMCS_IO_RIP                     0x00006408
+#define        VMCS_GUEST_LINEAR_ADDRESS       0x0000640A
+
+/* Natural Width guest-state fields */
+#define        VMCS_GUEST_CR0                  0x00006800
+#define        VMCS_GUEST_CR3                  0x00006802
+#define        VMCS_GUEST_CR4                  0x00006804
+#define        VMCS_GUEST_ES_BASE              0x00006806
+#define        VMCS_GUEST_CS_BASE              0x00006808
+#define        VMCS_GUEST_SS_BASE              0x0000680A
+#define        VMCS_GUEST_DS_BASE              0x0000680C
+#define        VMCS_GUEST_FS_BASE              0x0000680E
+#define        VMCS_GUEST_GS_BASE              0x00006810
+#define        VMCS_GUEST_LDTR_BASE            0x00006812
+#define        VMCS_GUEST_TR_BASE              0x00006814
+#define        VMCS_GUEST_GDTR_BASE            0x00006816
+#define        VMCS_GUEST_IDTR_BASE            0x00006818
+#define        VMCS_GUEST_DR7                  0x0000681A
+#define        VMCS_GUEST_RSP                  0x0000681C
+#define        VMCS_GUEST_RIP                  0x0000681E
+#define        VMCS_GUEST_RFLAGS               0x00006820
+#define        VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822
+#define        VMCS_GUEST_IA32_SYSENTER_ESP    0x00006824
+#define        VMCS_GUEST_IA32_SYSENTER_EIP    0x00006826
+
+/* Natural Width host-state fields */
+#define        VMCS_HOST_CR0                   0x00006C00
+#define        VMCS_HOST_CR3                   0x00006C02
+#define        VMCS_HOST_CR4                   0x00006C04
+#define        VMCS_HOST_FS_BASE               0x00006C06
+#define        VMCS_HOST_GS_BASE               0x00006C08
+#define        VMCS_HOST_TR_BASE               0x00006C0A
+#define        VMCS_HOST_GDTR_BASE             0x00006C0C
+#define        VMCS_HOST_IDTR_BASE             0x00006C0E
+#define        VMCS_HOST_IA32_SYSENTER_ESP     0x00006C10
+#define        VMCS_HOST_IA32_SYSENTER_EIP     0x00006C12
+#define        VMCS_HOST_RSP                   0x00006C14
+#define        VMCS_HOST_RIP                   0x00006c16
+
+/*
+ * VM instruction error numbers
+ */
+#define        VMRESUME_WITH_NON_LAUNCHED_VMCS 5
+
+/*
+ * VMCS exit reasons
+ */
+#define EXIT_REASON_EXCEPTION          0
+#define EXIT_REASON_EXT_INTR           1
+#define EXIT_REASON_TRIPLE_FAULT       2
+#define EXIT_REASON_INIT               3
+#define EXIT_REASON_SIPI               4
+#define EXIT_REASON_IO_SMI             5
+#define EXIT_REASON_SMI                        6
+#define EXIT_REASON_INTR_WINDOW                7
+#define EXIT_REASON_NMI_WINDOW         8
+#define EXIT_REASON_TASK_SWITCH                9
+#define EXIT_REASON_CPUID              10
+#define EXIT_REASON_GETSEC             11
+#define EXIT_REASON_HLT                        12
+#define EXIT_REASON_INVD               13
+#define EXIT_REASON_INVLPG             14
+#define EXIT_REASON_RDPMC              15
+#define EXIT_REASON_RDTSC              16
+#define EXIT_REASON_RSM                        17
+#define EXIT_REASON_VMCALL             18
+#define EXIT_REASON_VMCLEAR            19
+#define EXIT_REASON_VMLAUNCH           20
+#define EXIT_REASON_VMPTRLD            21
+#define EXIT_REASON_VMPTRST            22
+#define EXIT_REASON_VMREAD             23
+#define EXIT_REASON_VMRESUME           24
+#define EXIT_REASON_VMWRITE            25
+#define EXIT_REASON_VMXOFF             26
+#define EXIT_REASON_VMXON              27
+#define EXIT_REASON_CR_ACCESS          28
+#define EXIT_REASON_DR_ACCESS          29
+#define EXIT_REASON_INOUT              30
+#define EXIT_REASON_RDMSR              31
+#define EXIT_REASON_WRMSR              32
+#define EXIT_REASON_INVAL_VMCS         33
+#define EXIT_REASON_INVAL_MSR          34
+#define EXIT_REASON_MWAIT              36
+#define EXIT_REASON_MTF                        37
+#define EXIT_REASON_MONITOR            39
+#define EXIT_REASON_PAUSE              40
+#define EXIT_REASON_MCE_DURING_ENTRY   41
+#define EXIT_REASON_TPR                        43
+#define EXIT_REASON_APIC_ACCESS                44
+#define        EXIT_REASON_VIRTUALIZED_EOI     45
+#define EXIT_REASON_GDTR_IDTR          46
+#define EXIT_REASON_LDTR_TR            47
+#define EXIT_REASON_EPT_FAULT          48
+#define EXIT_REASON_EPT_MISCONFIG      49
+#define EXIT_REASON_INVEPT             50
+#define EXIT_REASON_RDTSCP             51
+#define EXIT_REASON_VMX_PREEMPT                52
+#define EXIT_REASON_INVVPID            53
+#define EXIT_REASON_WBINVD             54
+#define EXIT_REASON_XSETBV             55
+#define        EXIT_REASON_APIC_WRITE          56
+
+/*
+ * NMI unblocking due to IRET.
+ *
+ * Applies to VM-exits due to hardware exception or EPT fault.
+ */
+#define        EXIT_QUAL_NMIUDTI       (1 << 12)
+/*
+ * VMCS interrupt information fields
+ */
+#define        VMCS_INTR_VALID         (1U << 31)
+#define        VMCS_INTR_T_MASK        0x700   /* Interruption-info type */
+#define        VMCS_INTR_T_HWINTR      (0 << 8)
+#define        VMCS_INTR_T_NMI         (2 << 8)
+#define        VMCS_INTR_T_HWEXCEPTION (3 << 8)
+#define        VMCS_INTR_T_SWINTR      (4 << 8)
+#define        VMCS_INTR_T_PRIV_SWEXCEPTION (5 << 8)
+#define        VMCS_INTR_T_SWEXCEPTION (6 << 8)
+#define        VMCS_INTR_DEL_ERRCODE   (1 << 11)
+
+/*
+ * VMCS IDT-Vectoring information fields
+ */
+#define        VMCS_IDT_VEC_VALID              (1U << 31)
+#define        VMCS_IDT_VEC_ERRCODE_VALID      (1 << 11)
+
+/*
+ * VMCS Guest interruptibility field
+ */
+#define        VMCS_INTERRUPTIBILITY_STI_BLOCKING      (1 << 0)
+#define        VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING    (1 << 1)
+#define        VMCS_INTERRUPTIBILITY_SMI_BLOCKING      (1 << 2)
+#define        VMCS_INTERRUPTIBILITY_NMI_BLOCKING      (1 << 3)
+
+/*
+ * Exit qualification for EXIT_REASON_INVAL_VMCS
+ */
+#define        EXIT_QUAL_NMI_WHILE_STI_BLOCKING        3
+
+/*
+ * Exit qualification for EPT violation
+ */
+#define        EPT_VIOLATION_DATA_READ         (1UL << 0)
+#define        EPT_VIOLATION_DATA_WRITE        (1UL << 1)
+#define        EPT_VIOLATION_INST_FETCH        (1UL << 2)
+#define        EPT_VIOLATION_GPA_READABLE      (1UL << 3)
+#define        EPT_VIOLATION_GPA_WRITEABLE     (1UL << 4)
+#define        EPT_VIOLATION_GPA_EXECUTABLE    (1UL << 5)
+#define        EPT_VIOLATION_GLA_VALID         (1UL << 7)
+#define        EPT_VIOLATION_XLAT_VALID        (1UL << 8)
+
+/*
+ * Exit qualification for APIC-access VM exit
+ */
+#define        APIC_ACCESS_OFFSET(qual)        ((qual) & 0xFFF)
+#define        APIC_ACCESS_TYPE(qual)          (((qual) >> 12) & 0xF)
+
+/*
+ * Exit qualification for APIC-write VM exit
+ */
+#define        APIC_WRITE_OFFSET(qual)         ((qual) & 0xFFF)
+
+#endif
diff --git a/kern/arch/x86/vmm/intel/vmx.h b/kern/arch/x86/vmm/intel/vmx.h
new file mode 100644 (file)
index 0000000..a87fbe6
--- /dev/null
@@ -0,0 +1,150 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMX_H_
+#define        _VMX_H_
+
+struct vmxctx {
+       uint64_t guest_rdi;             /* Guest state */
+       uint64_t guest_rsi;
+       uint64_t guest_rdx;
+       uint64_t guest_rcx;
+       uint64_t guest_r8;
+       uint64_t guest_r9;
+       uint64_t guest_rax;
+       uint64_t guest_rbx;
+       uint64_t guest_rbp;
+       uint64_t guest_r10;
+       uint64_t guest_r11;
+       uint64_t guest_r12;
+       uint64_t guest_r13;
+       uint64_t guest_r14;
+       uint64_t guest_r15;
+       uint64_t guest_cr2;
+
+       uint64_t host_r15;              /* Host state */
+       uint64_t host_r14;
+       uint64_t host_r13;
+       uint64_t host_r12;
+       uint64_t host_rbp;
+       uint64_t host_rsp;
+       uint64_t host_rbx;
+       /*
+        * XXX todo debug registers and fpu state
+        */
+
+       int inst_fail_status;
+
+       /*
+        * The pmap needs to be deactivated in vmx_enter_guest()
+        * so keep a copy of the 'pmap' in each vmxctx.
+       struct pmap *pmap;
+        */
+       // For Akaros. The pmap did not apply directly, but struct proc * is right.
+       struct proc *p;
+};
+
+struct vmxcap {
+       int set;
+       uint32_t proc_ctls;
+       uint32_t proc_ctls2;
+};
+
+struct vmxstate {
+       uint64_t nextrip;                       /* next instruction to be executed by guest */
+       int lastcpu;                            /* host cpu that this 'vcpu' last ran on */
+       uint16_t vpid;
+};
+
+// TODO: akaros: merge all our various apic structs. 
+struct apic_page {
+       uint32_t reg[PAGE_SIZE / 4];
+};
+
+/* Posted Interrupt Descriptor (described in section 29.6 of the Intel SDM) */
+struct pir_desc {
+       atomic_t pir[4];
+       atomic_t pending;
+       uint64_t unused[3];
+} __attribute__((aligned(64)));
+
+/* Index into the 'guest_msrs[]' array */
+enum {
+       IDX_MSR_LSTAR,
+       IDX_MSR_CSTAR,
+       IDX_MSR_STAR,
+       IDX_MSR_SYSCALL_MASK,
+       IDX_MSR_KERNEL_GS_BASE,
+       GUEST_MSR_NUM                           /* must be the last enumeration */
+};
+
+struct msr_bitmap {
+       char bitmap[PAGE_SIZE]; 
+} __attribute__ ((aligned(PAGE_SIZE)));
+/* virtual machine softc */
+// TODO: this has to go somewhere is we make VMs a flavor of an MCP, as we hope to do.
+struct vmx {
+       struct vmcs vmcs[MAX_NUM_CPUS]; /* one vmcs per virtual cpu */
+       struct apic_page apic_page[MAX_NUM_CPUS];       /* one apic page per vcpu */
+       struct msr_bitmap msr_bitmap;
+       struct pir_desc pir_desc[MAX_NUM_CPUS];
+       uint64_t guest_msrs[MAX_NUM_CPUS][GUEST_MSR_NUM];
+       struct vmxctx ctx[MAX_NUM_CPUS];
+       struct vmxcap cap[MAX_NUM_CPUS];
+       struct vmxstate state[MAX_NUM_CPUS];
+       uint64_t eptp;
+       struct vm *vm;
+       long eptgen[MAX_NUM_CPUS];      /* cached pmap->pm_eptgen */
+};
+
+#define        VMX_GUEST_VMEXIT        0
+#define        VMX_VMRESUME_ERROR      1
+#define        VMX_VMLAUNCH_ERROR      2
+#define        VMX_INVEPT_ERROR        3
+
+// This is here solely to make all the static asserts work. Hack. But those
+// are very useful functions. 
+// TODO: there HAS to be a better way ...
+static void __1(void) {
+       static_assert((offsetof(struct vmx, pir_desc[0]) & 63) == 0);
+       // should not fail  but does ... TODO Akaros
+       //static_assert((offsetof(struct vmx, msr_bitmap) & PAGE_MASK) == 0);
+       static_assert((offsetof(struct vmx, vmcs) & PAGE_MASK) == 0);
+       static_assert(sizeof(struct pir_desc) == 64);
+       static_assert(sizeof(struct apic_page) == PAGE_SIZE);
+}
+
+int vmx_enter_guest(struct vmxctx *ctx, struct vmx *vmx, int launched);
+void vmx_call_isr(uintptr_t entry);
+
+unsigned long vmx_fix_cr0(unsigned long cr0);
+unsigned long vmx_fix_cr4(unsigned long cr4);
+
+extern char vmx_exit_guest[];
+
+#endif
diff --git a/kern/arch/x86/vmm/intel/vmx_cpufunc.h b/kern/arch/x86/vmm/intel/vmx_cpufunc.h
new file mode 100644 (file)
index 0000000..eafb7b3
--- /dev/null
@@ -0,0 +1,196 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef        _VMX_CPUFUNC_H_
+#define        _VMX_CPUFUNC_H_
+
+struct vmcs;
+
+/*
+ * Section 5.2 "Conventions" from Intel Architecture Manual 2B.
+ *
+ *                     error
+ * VMsucceed             0
+ * VMFailInvalid         1
+ * VMFailValid           2     see also VMCS VM-Instruction Error Field
+ */
+#define        VM_SUCCESS              0
+#define        VM_FAIL_INVALID         1
+#define        VM_FAIL_VALID           2
+#define        VMX_SET_ERROR_CODE \
+       "       jnc 1f;"                                                \
+       "       mov $1, %[error];"      /* CF: error = 1 */             \
+       "       jmp 3f;"                                                \
+       "1:     jnz 2f;"                                                \
+       "       mov $2, %[error];"      /* ZF: error = 2 */             \
+       "       jmp 3f;"                                                \
+       "2:     mov $0, %[error];"                                      \
+       "3:"
+
+/* returns 0 on success and non-zero on failure */
+static __inline int vmxon(char *region)
+{
+       int error;
+       uint64_t addr;
+
+       addr = PADDR(region);
+       __asm __volatile("vmxon %[addr];" VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[addr] "m"(*(uint64_t *) & addr)
+                                        :"memory");
+
+       return (error);
+}
+
+/* returns 0 on success and non-zero on failure */
+static __inline int vmclear(struct vmcs *vmcs)
+{
+       int error;
+       uint64_t addr;
+
+       addr = PADDR(vmcs);
+       __asm __volatile("vmclear %[addr];" VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[addr] "m"(*(uint64_t *) & addr)
+                                        :"memory");
+       return (error);
+}
+
+static __inline void vmxoff(void)
+{
+
+       __asm __volatile("vmxoff");
+}
+
+static __inline void vmptrst(uint64_t * addr)
+{
+
+       __asm __volatile("vmptrst %[addr]"::[addr] "m"(*addr):"memory");
+}
+
+static __inline int vmptrld(struct vmcs *vmcs)
+{
+       int error;
+       uint64_t addr;
+
+       addr = PADDR(vmcs);
+       __asm __volatile("vmptrld %[addr];" VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[addr] "m"(*(uint64_t *) & addr)
+                                        :"memory");
+       return (error);
+}
+
+static __inline int vmwrite(uint64_t reg, uint64_t val)
+{
+       int error;
+
+       __asm __volatile("vmwrite %[val], %[reg];"
+                                        VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[val] "r"(val),[reg] "r"(reg)
+                                        :"memory");
+
+       return (error);
+}
+
+static __inline int vmread(uint64_t r, uint64_t * addr)
+{
+       int error;
+
+       __asm __volatile("vmread %[r], %[addr];"
+                                        VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[r] "r"(r),[addr] "m"(*addr)
+                                        :"memory");
+
+       return (error);
+}
+
+static void __inline VMCLEAR(int8_t *irq, struct vmcs *vmcs)
+{
+       int err;
+
+       err = vmclear(vmcs);
+       if (err != 0)
+               panic("%s: vmclear(%p) error %d", __func__, vmcs, err);
+
+       enable_irqsave(irq);
+}
+
+static void __inline VMPTRLD(int8_t *irq, struct vmcs *vmcs)
+{
+       int err;
+
+       disable_irqsave(irq);
+
+       err = vmptrld(vmcs);
+       if (err != 0)
+               panic("%s: vmptrld(%p) error %d", __func__, vmcs, err);
+}
+
+#define        INVVPID_TYPE_ADDRESS            0UL
+#define        INVVPID_TYPE_SINGLE_CONTEXT     1UL
+#define        INVVPID_TYPE_ALL_CONTEXTS       2UL
+
+struct invvpid_desc {
+       uint16_t vpid;
+       uint16_t _res1;
+       uint32_t _res2;
+       uint64_t linear_addr;
+};
+
+static void __inline invvpid(uint64_t type, struct invvpid_desc desc)
+{
+       int error;
+       static_assert(sizeof(struct invvpid_desc) == 16);
+
+       __asm __volatile("invvpid %[desc], %[type];"
+                                        VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[desc] "m"(desc),[type] "r"(type)
+                                        :"memory");
+
+       if (error)
+               panic("invvpid error %d", error);
+}
+
+#define        INVEPT_TYPE_SINGLE_CONTEXT      1UL
+#define        INVEPT_TYPE_ALL_CONTEXTS        2UL
+struct invept_desc {
+       uint64_t eptp;
+       uint64_t _res;
+};
+static void __inline invept(uint64_t type, struct invept_desc desc)
+{
+       int error;
+       static_assert(sizeof(struct invept_desc) == 16);
+
+       __asm __volatile("invept %[desc], %[type];"
+                                        VMX_SET_ERROR_CODE:[error] "=r"(error)
+                                        :[desc] "m"(desc),[type] "r"(type)
+                                        :"memory");
+
+       if (error)
+               panic("invept error %d", error);
+}
+#endif
diff --git a/kern/arch/x86/vmm/vmm.c b/kern/arch/x86/vmm/vmm.c
new file mode 100644 (file)
index 0000000..41f5eb0
--- /dev/null
@@ -0,0 +1,25 @@
+/* Copyright 2015 Google Inc.
+ * 
+ * See LICENSE for details.
+ */
+
+/* We're not going to falll into the trap of only compiling support
+ * for AMD OR Intel for an image. It all gets compiled in, and which
+ * one you use depends on on cpuinfo, not a compile-time
+ * switch. That's proven to be the best strategy.  Conditionally
+ * compiling in support is the path to hell.
+ */
+#include <assert.h>
+#include <pmap.h>
+
+// NO . FILES HERE INCLUDE .h
+// That forces us to make the includes visible.
+#include "intel/vmx_cpufunc.h"
+#include "intel/vmcs.h"
+#include "intel/vmx.h"
+#include "x86.h"
+#include "vmm.h"
+#include "func.h"
+
+/* this will be the init function for vmm. For now, it just ensures we
+   don't break things. */
diff --git a/kern/arch/x86/vmm/vmm.h b/kern/arch/x86/vmm/vmm.h
new file mode 100644 (file)
index 0000000..baa0a0f
--- /dev/null
@@ -0,0 +1,377 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VMM_H_
+#define        _VMM_H_
+
+enum vm_suspend_how {
+       VM_SUSPEND_NONE,
+       VM_SUSPEND_RESET,
+       VM_SUSPEND_POWEROFF,
+       VM_SUSPEND_HALT,
+       VM_SUSPEND_TRIPLEFAULT,
+       VM_SUSPEND_LAST
+};
+
+/*
+ * Identifiers for architecturally defined registers.
+ */
+enum vm_reg_name {
+       VM_REG_GUEST_RAX,
+       VM_REG_GUEST_RBX,
+       VM_REG_GUEST_RCX,
+       VM_REG_GUEST_RDX,
+       VM_REG_GUEST_RSI,
+       VM_REG_GUEST_RDI,
+       VM_REG_GUEST_RBP,
+       VM_REG_GUEST_R8,
+       VM_REG_GUEST_R9,
+       VM_REG_GUEST_R10,
+       VM_REG_GUEST_R11,
+       VM_REG_GUEST_R12,
+       VM_REG_GUEST_R13,
+       VM_REG_GUEST_R14,
+       VM_REG_GUEST_R15,
+       VM_REG_GUEST_CR0,
+       VM_REG_GUEST_CR3,
+       VM_REG_GUEST_CR4,
+       VM_REG_GUEST_DR7,
+       VM_REG_GUEST_RSP,
+       VM_REG_GUEST_RIP,
+       VM_REG_GUEST_RFLAGS,
+       VM_REG_GUEST_ES,
+       VM_REG_GUEST_CS,
+       VM_REG_GUEST_SS,
+       VM_REG_GUEST_DS,
+       VM_REG_GUEST_FS,
+       VM_REG_GUEST_GS,
+       VM_REG_GUEST_LDTR,
+       VM_REG_GUEST_TR,
+       VM_REG_GUEST_IDTR,
+       VM_REG_GUEST_GDTR,
+       VM_REG_GUEST_EFER,
+       VM_REG_GUEST_CR2,
+       VM_REG_GUEST_PDPTE0,
+       VM_REG_GUEST_PDPTE1,
+       VM_REG_GUEST_PDPTE2,
+       VM_REG_GUEST_PDPTE3,
+       VM_REG_GUEST_INTR_SHADOW,
+       VM_REG_LAST
+};
+
+enum x2apic_state {
+       X2APIC_DISABLED,
+       X2APIC_ENABLED,
+       X2APIC_STATE_LAST
+};
+
+#define        VM_INTINFO_VECTOR(info) ((info) & 0xff)
+#define        VM_INTINFO_DEL_ERRCODE  0x800
+#define        VM_INTINFO_RSVD         0x7ffff000
+#define        VM_INTINFO_VALID        0x80000000
+#define        VM_INTINFO_TYPE         0x700
+#define        VM_INTINFO_HWINTR       (0 << 8)
+#define        VM_INTINFO_NMI          (2 << 8)
+#define        VM_INTINFO_HWEXCEPTION  (3 << 8)
+#define        VM_INTINFO_SWINTR       (4 << 8)
+
+enum vcpu_state {
+       VCPU_IDLE,
+       VCPU_FROZEN,
+       VCPU_RUNNING,
+       VCPU_SLEEPING,
+};
+
+/*
+ * Identifiers for optional vmm capabilities
+ */
+enum vm_cap_type {
+       VM_CAP_HALT_EXIT,
+       VM_CAP_MTRAP_EXIT,
+       VM_CAP_PAUSE_EXIT,
+       VM_CAP_UNRESTRICTED_GUEST,
+       VM_CAP_ENABLE_INVPCID,
+       VM_CAP_MAX
+};
+
+enum vm_intr_trigger {
+       EDGE_TRIGGER,
+       LEVEL_TRIGGER
+};
+       
+/*
+ * The 'access' field has the format specified in Table 21-2 of the Intel
+ * Architecture Manual vol 3b.
+ *
+ * XXX The contents of the 'access' field are architecturally defined except
+ * bit 16 - Segment Unusable.
+ */
+struct seg_desc {
+       uint64_t        base;
+       uint32_t        limit;
+       uint32_t        access;
+};
+#define        SEG_DESC_TYPE(access)           ((access) & 0x001f)
+#define        SEG_DESC_DPL(access)            (((access) >> 5) & 0x3)
+#define        SEG_DESC_PRESENT(access)        (((access) & 0x0080) ? 1 : 0)
+#define        SEG_DESC_DEF32(access)          (((access) & 0x4000) ? 1 : 0)
+#define        SEG_DESC_GRANULARITY(access)    (((access) & 0x8000) ? 1 : 0)
+#define        SEG_DESC_UNUSABLE(access)       (((access) & 0x10000) ? 1 : 0)
+
+enum vm_cpu_mode {
+       CPU_MODE_REAL,
+       CPU_MODE_PROTECTED,
+       CPU_MODE_COMPATIBILITY,         /* IA-32E mode (CS.L = 0) */
+       CPU_MODE_64BIT,                 /* IA-32E mode (CS.L = 1) */
+};
+
+enum vm_paging_mode {
+       PAGING_MODE_FLAT,
+       PAGING_MODE_32,
+       PAGING_MODE_PAE,
+       PAGING_MODE_64,
+};
+
+struct vm_guest_paging {
+       uint64_t        cr3;
+       int             cpl;
+       enum vm_cpu_mode cpu_mode;
+       enum vm_paging_mode paging_mode;
+};
+
+/*
+ * The data structures 'vie' and 'vie_op' are meant to be opaque to the
+ * consumers of instruction decoding. The only reason why their contents
+ * need to be exposed is because they are part of the 'vm_exit' structure.
+ */
+struct vie_op {
+       uint8_t         op_byte;        /* actual opcode byte */
+       uint8_t         op_type;        /* type of operation (e.g. MOV) */
+       uint16_t        op_flags;
+};
+
+#define        VIE_INST_SIZE   15
+struct vie {
+       uint8_t         inst[VIE_INST_SIZE];    /* instruction bytes */
+       uint8_t         num_valid;              /* size of the instruction */
+       uint8_t         num_processed;
+
+       uint8_t         addrsize:4, opsize:4;   /* address and operand sizes */
+       uint8_t         rex_w:1,                /* REX prefix */
+                       rex_r:1,
+                       rex_x:1,
+                       rex_b:1,
+                       rex_present:1,
+                       opsize_override:1,      /* Operand size override */
+                       addrsize_override:1;    /* Address size override */
+
+       uint8_t         mod:2,                  /* ModRM byte */
+                       reg:4,
+                       rm:4;
+
+       uint8_t         ss:2,                   /* SIB byte */
+                       index:4,
+                       base:4;
+
+       uint8_t         disp_bytes;
+       uint8_t         imm_bytes;
+
+       uint8_t         scale;
+       int             base_register;          /* VM_REG_GUEST_xyz */
+       int             index_register;         /* VM_REG_GUEST_xyz */
+
+       int64_t         displacement;           /* optional addr displacement */
+       int64_t         immediate;              /* optional immediate operand */
+
+       uint8_t         decoded;        /* set to 1 if successfully decoded */
+
+       struct vie_op   op;                     /* opcode description */
+};
+
+enum vm_exitcode {
+       VM_EXITCODE_INOUT,
+       VM_EXITCODE_VMX,
+       VM_EXITCODE_BOGUS,
+       VM_EXITCODE_RDMSR,
+       VM_EXITCODE_WRMSR,
+       VM_EXITCODE_HLT,
+       VM_EXITCODE_MTRAP,
+       VM_EXITCODE_PAUSE,
+       VM_EXITCODE_PAGING,
+       VM_EXITCODE_INST_EMUL,
+       VM_EXITCODE_SPINUP_AP,
+       VM_EXITCODE_DEPRECATED1,        /* used to be SPINDOWN_CPU */
+       VM_EXITCODE_RENDEZVOUS,
+       VM_EXITCODE_IOAPIC_EOI,
+       VM_EXITCODE_SUSPENDED,
+       VM_EXITCODE_INOUT_STR,
+       VM_EXITCODE_TASK_SWITCH,
+       VM_EXITCODE_MONITOR,
+       VM_EXITCODE_MWAIT,
+       VM_EXITCODE_SVM,
+       VM_EXITCODE_MAX
+};
+
+struct vm_inout {
+       uint16_t        bytes:3;        /* 1 or 2 or 4 */
+       uint16_t        in:1;
+       uint16_t        string:1;
+       uint16_t        rep:1;
+       uint16_t        port;
+       uint32_t        eax;            /* valid for out */
+};
+
+struct vm_inout_str {
+       struct vm_inout inout;          /* must be the first element */
+       struct vm_guest_paging paging;
+       uint64_t        rflags;
+       uint64_t        cr0;
+       uint64_t        index;
+       uint64_t        count;          /* rep=1 (%rcx), rep=0 (1) */
+       int             addrsize;
+       enum vm_reg_name seg_name;
+       struct seg_desc seg_desc;
+};
+
+enum task_switch_reason {
+       TSR_CALL,
+       TSR_IRET,
+       TSR_JMP,
+       TSR_IDT_GATE,   /* task gate in IDT */
+};
+
+struct vm_task_switch {
+       uint16_t        tsssel;         /* new TSS selector */
+       int             ext;            /* task switch due to external event */
+       uint32_t        errcode;
+       int             errcode_valid;  /* push 'errcode' on the new stack */
+       enum task_switch_reason reason;
+       struct vm_guest_paging paging;
+};
+
+struct vm_exit {
+       enum vm_exitcode        exitcode;
+       int                     inst_length;    /* 0 means unknown */
+       uint64_t                rip;
+       union {
+               struct vm_inout inout;
+               struct vm_inout_str inout_str;
+               struct {
+                       uint64_t        gpa;
+                       int             fault_type;
+               } paging;
+               struct {
+                       uint64_t        gpa;
+                       uint64_t        gla;
+                       int             cs_d;           /* CS.D */
+                       struct vm_guest_paging paging;
+                       struct vie      vie;
+               } inst_emul;
+               /*
+                * VMX specific payload. Used when there is no "better"
+                * exitcode to represent the VM-exit.
+                */
+               struct {
+                       int             status;         /* vmx inst status */
+                       /*
+                        * 'exit_reason' and 'exit_qualification' are valid
+                        * only if 'status' is zero.
+                        */
+                       uint32_t        exit_reason;
+                       uint64_t        exit_qualification;
+                       /*
+                        * 'inst_error' and 'inst_type' are valid
+                        * only if 'status' is non-zero.
+                        */
+                       int             inst_type;
+                       int             inst_error;
+               } vmx;
+               /*
+                * SVM specific payload.
+                */
+               struct {
+                       uint64_t        exitcode;
+                       uint64_t        exitinfo1;
+                       uint64_t        exitinfo2;
+               } svm;
+               struct {
+                       uint32_t        code;           /* ecx value */
+                       uint64_t        wval;
+               } msr;
+               struct {
+                       int             vcpu;
+                       uint64_t        rip;
+               } spinup_ap;
+               struct {
+                       uint64_t        rflags;
+               } hlt;
+               struct {
+                       int             vector;
+               } ioapic_eoi;
+               struct {
+                       enum vm_suspend_how how;
+               } suspended;
+               struct vm_task_switch task_switch;
+       } u;
+};
+
+struct vmm {
+       // true if this is a VMMCP.
+       bool vmmcp;
+
+       // Number of cores in this VMMCP.
+       int ncores;
+       // The EPT entries are incompatible in just a few bit
+       // positions. Very few. You could *almost* use the same page
+       // tables for EPT and process page tables, but ... not quite.
+       // Really: you put X in bit two of the EPT and in bit 63 of
+       // the standard one.  Setting WB (6 in bits 5:3) in the EPT
+       // versions disables caching (CD is bit 4) in the native
+       // versions.  WTF?
+       //
+       // As a result we have to keep these two in sync, IFF
+       // we have a VMMCP. N.B. only for the sie of the EPT
+       // address space, which is limited to much less than
+       // the virtual address space.
+       struct vmr_tailq vm_regions;
+       physaddr_t eptpt;
+
+       // The VMCS is intel-specific. But, maybe, someday, AMD will
+       // be back.  Just make this an anon union and we'll work it
+       // all out later. Again, remember, we're compiling in support
+       // for both architectures to ensure that we can correctly
+       // figure out at boot time what we're on and what we should
+       // do. This avoids the problem seen years ago with RH6 where
+       // you could install a kernel from the ISO, but the kernel it
+       // installed would GPF on a K7.
+       union {
+               struct vmcs *vmcs;
+       };
+};
+
+#endif /* _VMM_H_ */
diff --git a/kern/arch/x86/vmm/x86.h b/kern/arch/x86/vmm/x86.h
new file mode 100644 (file)
index 0000000..6a8bf56
--- /dev/null
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2011 NetApp, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _X86_H_
+#define        _X86_H_
+
+#define CPUID_0000_0000 (0x0)
+#define CPUID_0000_0001        (0x1)
+#define CPUID_0000_0002 (0x2)
+#define CPUID_0000_0003 (0x3)
+#define CPUID_0000_0004 (0x4)
+#define CPUID_0000_0006 (0x6)
+#define CPUID_0000_0007 (0x7)
+#define        CPUID_0000_000A (0xA)
+#define        CPUID_0000_000B (0xB)
+#define        CPUID_0000_000D (0xD)
+#define CPUID_8000_0000        (0x80000000)
+#define CPUID_8000_0001        (0x80000001)
+#define CPUID_8000_0002        (0x80000002)
+#define CPUID_8000_0003        (0x80000003)
+#define CPUID_8000_0004        (0x80000004)
+#define CPUID_8000_0006        (0x80000006)
+#define CPUID_8000_0007        (0x80000007)
+#define CPUID_8000_0008        (0x80000008)
+
+/*
+ * CPUID instruction Fn0000_0001:
+ */
+#define CPUID_0000_0001_APICID_MASK                    (0xff<<24)
+#define CPUID_0000_0001_APICID_SHIFT                   24
+
+/*
+ * CPUID instruction Fn0000_0001 ECX
+ */
+#define CPUID_0000_0001_FEAT0_VMX      (1<<5)
+
+int x86_emulate_cpuid(struct vm *vm, int vcpu_id, uint32_t * eax,
+                                         uint32_t * ebx, uint32_t * ecx, uint32_t * edx);
+
+#endif
index 7b1302e..5a2e84f 100644 (file)
@@ -22,6 +22,7 @@
 #include <schedule.h>
 #include <devalarm.h>
 #include <ns.h>
+#include <arch/vmm/vmm.h>
 
 TAILQ_HEAD(vcore_tailq, vcore);
 /* 'struct proc_list' declared in sched.h (not ideal...) */
@@ -103,6 +104,9 @@ struct proc {
        struct cv_lookup_tailq          abortable_sleepers;
        spinlock_t                                      abort_list_lock;
        void *virtinfo;
+
+       /* VMMCP */
+       struct vmm vmm;
 };
 
 /* Til we remove all Env references */