Start shuffling functionality from kernel to user.
authorRonald G. Minnich <rminnich@gmail.com>
Fri, 31 Jul 2015 18:18:28 +0000 (11:18 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Mon, 2 Nov 2015 23:24:25 +0000 (18:24 -0500)
First step is IO, then vmcall, then whatever else we can do.

Required to make virtio channels work, and, really, this stuff should be
in user mode anyway.

Signed-off-by: Ronald G. Minnich <rminnich@gmail.com>
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/vmm/intel/vmx.c
kern/include/ros/vmm.h
scripts/VMPXE
tests/vmm/vmrunkernel.c
tests/vmrunkernel.c [new file with mode: 0644]
user/vmm/io.c [new file with mode: 0644]
user/vmm/vmx.c [new file with mode: 0644]
user/vmm/vmx.h [new file with mode: 0644]

index ae634bd..cd46ff7 100644 (file)
@@ -1284,178 +1284,6 @@ msrio(struct vmx_vcpu *vcpu, uint32_t opcode, uint32_t qual) {
        return SHUTDOWN_UNHANDLED_EXIT_REASON;
 }
 
-/* crude PCI bus. Just enough to get virtio working. I would rather not add to this. */
-struct pciconfig {
-       uint32_t registers[256];
-};
-
-/* just index by devfn, i.e. 8 bits */
-struct pciconfig pcibus[] = {
-       /* linux requires that devfn 0 be a bridge. 
-        * 00:00.0 Host bridge: Intel Corporation 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (rev 01)
-        */
-       {
-               {0x71908086, 0x02000006, 0x06000001},
-       },
-};
-/* cf8 is a single-threaded resource. */
-static uint32_t cf8;
-static uint32_t allones = (uint32_t)-1;
-
-/* Return a pointer to the 32-bit "register" in the "pcibus" give an address. Use cf8.
- * only for readonly access.
- * this will fail if we ever want to do writes, but we don't.
- */
-void regp(uint32_t **reg)
-{
-       *reg = &allones;
-       int devfn = (cf8>>8) & 0xff;
-       //printk("devfn %d\n", devfn);
-       if (devfn < ARRAY_SIZE(pcibus))
-               *reg = &pcibus[devfn].registers[(cf8>>2)&0x3f];
-       //printk("-->regp *reg 0x%lx\n", **reg);
-}
-
-static uint32_t configaddr(uint32_t val)
-{
-       printk("%s 0x%lx\n", __func__, val);
-       cf8 = val;
-       return 0;
-}
-
-static uint32_t configread32(uint32_t edx, uint64_t *reg)
-{
-       uint32_t *r = &cf8;
-       regp(&r);
-       *reg = set_low32(*reg, *r);
-       printk("%s: 0x%lx 0x%lx, 0x%lx 0x%lx\n", __func__, cf8, edx, r, *reg);
-       return 0;
-}
-
-static uint32_t configread16(uint32_t edx, uint64_t *reg)
-{
-       uint64_t val;
-       int which = ((edx&2)>>1) * 16;
-       configread32(edx, &val);
-       val >>= which;
-       *reg = set_low16(*reg, val);
-       printk("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
-       return 0;
-}
-
-static uint32_t configread8(uint32_t edx, uint64_t *reg)
-{
-       uint64_t val;
-       int which = (edx&3) * 8;
-       configread32(edx, &val);
-       val >>= which;
-       *reg = set_low16(*reg, val);
-       printk("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
-       return 0;
-}
-
-static int configwrite32(uint32_t addr, uint32_t val)
-{
-       uint32_t *r = &cf8;
-       regp(&r);
-       *r = val;
-       printk("%s 0x%lx 0x%lx\n", __func__, addr, val);
-       return 0;
-}
-
-static int configwrite16(uint32_t addr, uint16_t val)
-{
-       printk("%s 0x%lx 0x%lx\n", __func__, addr, val);
-       return 0;
-}
-
-static int configwrite8(uint32_t addr, uint8_t val)
-{
-       printk("%s 0x%lx 0x%lx\n", __func__, addr, val);
-       return 0;
-}
-
-/* this is very minimal. It needs to move to vmm/io.c but we don't
- * know if this minimal approach will even be workable. It only (for
- * now) handles pci config space. We'd like to hope that's all we will
- * need.
- * It would have been nice had intel encoded the IO exit info as nicely as they
- * encoded, some of the other exits.
- */
-static int io(struct vmx_vcpu *vcpu, int *advance)
-{
-
-       /* Get a pointer to the memory at %rip. This is quite messy and part of the
-        * reason we don't want to do this at all. It sucks. Would have been nice
-        * had linux had an option to ONLY do mmio config space access, but no such
-        * luck.
-        */
-       uint8_t *ip8 = NULL;
-       uint16_t *ip16;
-       uintptr_t ip;
-       uint32_t edx;
-       /* for now, we're going to be a bit crude. In kernel, p is about v, so we just blow away
-        * the upper 34 bits and take the rest as our address
-        */
-       ip = vcpu->regs.tf_rip & 0x3fffffff;
-       edx = vcpu->regs.tf_rdx;
-       ip8 = (void *)ip;
-       ip16 = (void *)ip;
-       //printk("io: ip16 %p\n", *ip16, edx);
-
-       if (*ip8 == 0xef) {
-               *advance = 1;
-               /* out at %edx */
-               if (edx == 0xcf8) {
-                       //printk("Set cf8 ");
-                       return configaddr(vcpu->regs.tf_rax);
-               }
-               if (edx == 0xcfc) {
-                       //printk("Set cfc ");
-                       return configwrite32(edx, vcpu->regs.tf_rax);
-               }
-               printk("unhandled IO address dx @%p is 0x%x\n", ip8, edx);
-               return SHUTDOWN_UNHANDLED_EXIT_REASON;
-       }
-       // out %al, %dx
-       if (*ip8 == 0xee) {
-               *advance = 1;
-               /* out al %edx */
-               if (edx == 0xcfb) { // special!
-                       printk("Just ignore the damned cfb write\n");
-                       return 0;
-               }
-               if ((edx&~3) == 0xcfc) {
-                       //printk("ignoring write to cfc ");
-                       return 0;
-               }
-               printk("unhandled IO address dx @%p is 0x%x\n", ip8, edx);
-               return SHUTDOWN_UNHANDLED_EXIT_REASON;
-       }
-       if (*ip8 == 0xec) {
-               *advance = 1;
-               //printk("configread8 ");
-               return configread8(edx, &vcpu->regs.tf_rax);
-       }
-       if (*ip8 == 0xed) {
-               *advance = 1;
-               if (edx == 0xcf8) {
-                       //printk("read cf8 0x%lx\n", vcpu->regs.tf_rax);
-                       vcpu->regs.tf_rax = cf8;
-                       return 0;
-               }
-               //printk("configread32 ");
-               return configread32(edx, &vcpu->regs.tf_rax);
-       }
-       if (*ip16 == 0xed66) {
-               *advance = 2;
-               //printk("configread16 ");
-               return configread16(edx, &vcpu->regs.tf_rax);
-       }
-       printk("unknown IO %p %x %x\n", ip8, *ip8, *ip16);
-       return SHUTDOWN_UNHANDLED_EXIT_REASON;
-}
-
 /* Notes on autoloading.  We can't autoload FS_BASE or GS_BASE, according to the
  * manual, but that's because they are automatically saved and restored when all
  * of the other architectural registers are saved and restored, such as cs, ds,
@@ -1743,7 +1571,7 @@ static void vmx_step_instruction(void) {
                    vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 }
 
-static int vmx_handle_ept_violation(struct vmx_vcpu *vcpu) {
+static int vmx_handle_ept_violation(struct vmx_vcpu *vcpu, struct vmctl *v) {
        unsigned long gva, gpa;
        int exit_qual, ret = -1;
        page_t *page;
@@ -1752,7 +1580,9 @@ static int vmx_handle_ept_violation(struct vmx_vcpu *vcpu) {
        exit_qual = vmcs_read32(EXIT_QUALIFICATION);
        gva = vmcs_readl(GUEST_LINEAR_ADDRESS);
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-
+       v->gpa = gpa;
+       v->gva = gva;
+       v->exit_qual = exit_qual;
        vmx_put_cpu(vcpu);
 
        int prot = 0;
@@ -1887,10 +1717,10 @@ int vmx_launch(struct vmctl *v) {
                        vmcs_writel(GUEST_RIP, vcpu->regs.tf_rip + 2);
                        vmx_put_cpu(vcpu);
                } else if (ret == EXIT_REASON_EPT_VIOLATION) {
-                       if (vmx_handle_ept_violation(vcpu))
+                       if (vmx_handle_ept_violation(vcpu, v))
                                vcpu->shutdown = SHUTDOWN_EPT_VIOLATION;
                } else if (ret == EXIT_REASON_EXCEPTION_NMI) {
-                       if (vmx_handle_nmi_exception(vcpu))
+                       if (vmx_handle_nmi_exception(vcpu)) 
                                vcpu->shutdown = SHUTDOWN_NMI_EXCEPTION;
                } else if (ret == EXIT_REASON_EXTERNAL_INTERRUPT) {
                        printd("External interrupt\n");
@@ -1908,9 +1738,8 @@ int vmx_launch(struct vmctl *v) {
                                msrio(vcpu, ret, vmcs_read32(EXIT_QUALIFICATION));
                        advance = 2;
                } else if (ret == EXIT_REASON_IO_INSTRUCTION) {
-                       /* we never wanted to do this. But virtio
-                        * requires pci config space emulation. */
-                       vcpu->shutdown = io(vcpu, &advance);
+                       /* the VMM does this now. */
+                       vcpu->shutdown = ret; 
                } else {
                        printk("unhandled exit: reason 0x%x, exit qualification 0x%x\n",
                               ret, vmcs_read32(EXIT_QUALIFICATION));
@@ -1934,6 +1763,8 @@ int vmx_launch(struct vmctl *v) {
        printd("RETURN. ip %016lx sp %016lx\n",
               vcpu->regs.tf_rip, vcpu->regs.tf_rsp);
        v->regs = vcpu->regs;
+       v->shutdown = vcpu->shutdown;
+       v->ret_code = vcpu->ret_code;
 //  hexdump((void *)vcpu->regs.tf_rsp, 128 * 8);
        /*
         * Return both the reason for the shutdown and a status value.
index 1bf2bd0..3db8cf3 100644 (file)
@@ -25,6 +25,12 @@ enum {
 struct vmctl {
        uint64_t command;
        uint64_t cr3;
+       uint64_t gva;
+       uint64_t gpa;
+       uint64_t exit_qual;
+       int shutdown;
+       int ret_code;
+       int core;
        struct hw_trapframe regs;
 };
 
index f927a02..4b351b1 100644 (file)
@@ -1,6 +1,9 @@
 #!/bin/bash
 make tests && make fill-kfs &&make  && sudo cp obj/kern/akaros-kernel /var/lib/tftpboot/akaros && \
-       sudo service isc-dhcp-server restart && echo "OK" && exit
+       sudo dhcpd -d eth0 && echo "OK" && exit
+
+
+#      sudo service isc-dhcp-server restart && echo "OK" && exit
 
 
 echo " IT WENT WRONG"
index c903d2a..4b27657 100644 (file)
@@ -27,7 +27,6 @@ int mcp = 1;
 
 #define MiB 0x100000u
 #define GiB (1u<<30)
-#define VIRTIOBASE (15*MiB)
 #define GKERNBASE (16*MiB)
 #define KERNSIZE (128*MiB+GKERNBASE)
 uint8_t _kernel[KERNSIZE];
@@ -87,18 +86,6 @@ void dumpvirtio_mmio(FILE *f, void *v)
        fprintf(f, "VIRTIO_MMIO_QUEUE_USED_HIGH: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_USED_HIGH));
        fprintf(f, "VIRTIO_MMIO_CONFIG_GENERATION: 0x%x\n", read32(v+VIRTIO_MMIO_CONFIG_GENERATION));
 }
-static void setupconsole(void *v)
-{
-       // try to make linux happy.
-       // this is not really endian safe but ... well ... WE'RE ON THE SAME MACHINE
-       write32(v+VIRTIO_MMIO_MAGIC_VALUE, ('v' | 'i' << 8 | 'r' << 16 | 't' << 24));
-       // no constant for this is defined anywhere. It's just 1.
-       write32(v+VIRTIO_MMIO_VERSION, 1);
-       write32(v+VIRTIO_MMIO_DEVICE_ID, VIRTIO_ID_CONSOLE);
-       write32(v+VIRTIO_MMIO_QUEUE_NUM_MAX, 1);
-       write32(v+VIRTIO_MMIO_QUEUE_PFN, 0);
-}
-
 int debug = 1;
 
 struct ttargs {
@@ -114,7 +101,7 @@ void *talk_thread(void *arg)
        uint32_t vv;
        int i;
        int num;
-       return;
+       return NULL;
        printf("Sleep 15 seconds\n");
        uthread_sleep(15);
        printf("----------------------- TT a %p\n", a);
@@ -177,6 +164,7 @@ struct ttargs t;
 
 int main(int argc, char **argv)
 {
+       uint64_t virtiobase = 0x100000000ULL;
        struct vmctl vmctl;
        int amt;
        int vmmflags = VMM_VMCALL_PRINTF;
@@ -187,10 +175,10 @@ int main(int argc, char **argv)
        int kfd = -1;
        static char cmd[512];
        void *coreboot_tables = (void *) 0x1165000;
-       /* kernel has to be in the range VIRTIOBASE to KERNSIZE+GKERNBASE for now. */
+
        // mmap is not working for us at present.
-       if ((uint64_t)_kernel > VIRTIOBASE) {
-               printf("kernel array @%p is above , VIRTIOBASE@%p sucks\n", _kernel, VIRTIOBASE);
+       if ((uint64_t)_kernel > GKERNBASE) {
+               printf("kernel array @%p is above , GKERNBASE@%p sucks\n", _kernel, GKERNBASE);
                exit(1);
        }
        memset(_kernel, 0, sizeof(_kernel));
@@ -279,7 +267,7 @@ int main(int argc, char **argv)
                }
        }
 
-       t.virtio = (void *)VIRTIOBASE;
+       //t.virtio = (void *)VIRTIOBASE;
 
        ret = syscall(33, 1);
        if (ret < 0) {
@@ -309,10 +297,8 @@ int main(int argc, char **argv)
        kernbase >>= (0+12);
        kernbase <<= (0 + 12);
        uint8_t *kernel = (void *)GKERNBASE;
-       write_coreboot_table(coreboot_tables, ((void *)VIRTIOBASE) /*kernel*/, KERNSIZE + 1048576);
+       //write_coreboot_table(coreboot_tables, ((void *)VIRTIOBASE) /*kernel*/, KERNSIZE + 1048576);
        hexdump(stdout, coreboot_tables, 512);
-       setupconsole((void *)VIRTIOBASE);
-       hexdump(stdout, (void *)VIRTIOBASE, 128);
        printf("kernbase for pml4 is 0x%llx and entry is %llx\n", kernbase, entry);
        printf("p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]);
        vmctl.command = REG_RSP_RIP_CR3;
@@ -332,6 +318,7 @@ int main(int argc, char **argv)
        }
        vmctl.command = RESUME;
        while (1) {
+               void showstatus(FILE *f, struct vmctl *v);
                int c;
                printf("RESUME?\n");
                c = getchar();
@@ -342,8 +329,16 @@ int main(int argc, char **argv)
                        perror(cmd);
                }
                printf("RIP %p\n", vmctl.regs.tf_rip);
+               showstatus(stdout, &vmctl);
+               // this will be in a function, someday.
+               // A rough check: is the GPA 
+               if (vmctl.gpa == virtiobase) {
+                       int virtio(struct vmctl *v, uint64_t);
+                       if (virtio(&vmctl, virtiobase))
+                               break;
+               }
        }
-       dumpvirtio_mmio(stdout, (void *)VIRTIOBASE);
+
        printf("shared is %d, blob is %d\n", shared, *mmap_blob);
 
        quit = 1;
diff --git a/tests/vmrunkernel.c b/tests/vmrunkernel.c
new file mode 100644 (file)
index 0000000..cccb493
--- /dev/null
@@ -0,0 +1,353 @@
+#include <stdio.h> 
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <parlib/arch/arch.h>
+#include <parlib/ros_debug.h>
+#include <unistd.h>
+#include <errno.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ros/syscall.h>
+#include <sys/mman.h>
+#include <vmm/coreboot_tables.h>
+#include <ros/vmm.h>
+#include <vmm/virtio.h>
+#include <vmm/virtio_mmio.h>
+#include <vmm/virtio_ids.h>
+
+/* this test will run the "kernel" in the negative address space. We hope. */
+int *mmap_blob;
+unsigned long long stack[1024];
+volatile int shared = 0;
+volatile int quit = 0;
+int mcp = 1;
+
+#define MiB 0x100000u
+#define GiB (1u<<30)
+#define VIRTIOBASE (15*MiB)
+#define GKERNBASE (16*MiB)
+#define KERNSIZE (128*MiB+GKERNBASE)
+uint8_t _kernel[KERNSIZE];
+
+unsigned long long *p512, *p1, *p2m;
+
+pthread_t *my_threads;
+void **my_retvals;
+int nr_threads = 2;
+char *line, *consline, *outline;
+struct scatterlist iov[32];
+unsigned int inlen, outlen, conslen;
+/* unlike Linux, this shared struct is for both host and guest. */
+//     struct virtqueue *constoguest = 
+//             vring_new_virtqueue(0, 512, 8192, 0, inpages, NULL, NULL, "test");
+volatile int gaveit = 0, gotitback = 0;
+struct virtqueue *guesttocons;
+struct scatterlist out[] = { {NULL, sizeof(outline)}, };
+struct scatterlist in[] = { {NULL, sizeof(line)}, };
+
+static inline uint32_t read32(const volatile void *addr)
+{
+       return *(const volatile uint32_t *)addr;
+}
+
+static inline void write32(volatile void *addr, uint32_t value)
+{
+       *(volatile uint32_t *)addr = value;
+}
+
+void dumpvirtio_mmio(FILE *f, void *v)
+{
+       fprintf(f, "VIRTIO_MMIO_MAGIC_VALUE: 0x%x\n", read32(v+VIRTIO_MMIO_MAGIC_VALUE));
+       fprintf(f, "VIRTIO_MMIO_VERSION: 0x%x\n", read32(v+VIRTIO_MMIO_VERSION));
+       fprintf(f, "VIRTIO_MMIO_DEVICE_ID: 0x%x\n", read32(v+VIRTIO_MMIO_DEVICE_ID));
+       fprintf(f, "VIRTIO_MMIO_VENDOR_ID: 0x%x\n", read32(v+VIRTIO_MMIO_VENDOR_ID));
+       fprintf(f, "VIRTIO_MMIO_DEVICE_FEATURES: 0x%x\n", read32(v+VIRTIO_MMIO_DEVICE_FEATURES));
+       fprintf(f, "VIRTIO_MMIO_DEVICE_FEATURES_SEL: 0x%x\n", read32(v+VIRTIO_MMIO_DEVICE_FEATURES_SEL));
+       fprintf(f, "VIRTIO_MMIO_DRIVER_FEATURES: 0x%x\n", read32(v+VIRTIO_MMIO_DRIVER_FEATURES));
+       fprintf(f, "VIRTIO_MMIO_DRIVER_FEATURES_SEL: 0x%x\n", read32(v+VIRTIO_MMIO_DRIVER_FEATURES_SEL));
+       fprintf(f, "VIRTIO_MMIO_GUEST_PAGE_SIZE: 0x%x\n", read32(v+VIRTIO_MMIO_GUEST_PAGE_SIZE));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_SEL: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_SEL));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_NUM_MAX: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_NUM_MAX));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_NUM: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_NUM));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_ALIGN: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_ALIGN));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_PFN: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_PFN));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_READY: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_READY));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_NOTIFY: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_NOTIFY));
+       fprintf(f, "VIRTIO_MMIO_INTERRUPT_STATUS: 0x%x\n", read32(v+VIRTIO_MMIO_INTERRUPT_STATUS));
+       fprintf(f, "VIRTIO_MMIO_INTERRUPT_ACK: 0x%x\n", read32(v+VIRTIO_MMIO_INTERRUPT_ACK));
+       fprintf(f, "VIRTIO_MMIO_STATUS: 0x%x\n", read32(v+VIRTIO_MMIO_STATUS));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_DESC_LOW: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_DESC_LOW));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_DESC_HIGH: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_DESC_HIGH));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_AVAIL_LOW: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_AVAIL_LOW));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_AVAIL_HIGH: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_AVAIL_HIGH));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_USED_LOW: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_USED_LOW));
+       fprintf(f, "VIRTIO_MMIO_QUEUE_USED_HIGH: 0x%x\n", read32(v+VIRTIO_MMIO_QUEUE_USED_HIGH));
+       fprintf(f, "VIRTIO_MMIO_CONFIG_GENERATION: 0x%x\n", read32(v+VIRTIO_MMIO_CONFIG_GENERATION));
+}
+static void setupconsole(void *v)
+{
+       // try to make linux happy.
+       // this is not really endian safe but ... well ... WE'RE ON THE SAME MACHINE
+       write32(v+VIRTIO_MMIO_MAGIC_VALUE, ('v' | 'i' << 8 | 'r' << 16 | 't' << 24));
+       // no constant for this is defined anywhere. It's just 1.
+       write32(v+VIRTIO_MMIO_VERSION, 1);
+       write32(v+VIRTIO_MMIO_DEVICE_ID, VIRTIO_ID_CONSOLE);
+       write32(v+VIRTIO_MMIO_QUEUE_NUM_MAX, 1);
+       write32(v+VIRTIO_MMIO_QUEUE_PFN, 0);
+}
+
+int debug = 1;
+
+struct ttargs {
+       void *virtio;
+};
+
+void *talk_thread(void *arg)
+{
+       struct ttargs *a = arg;
+       void *v = a->virtio;
+       fprintf(stderr, "talk thread ..\n");
+       uint16_t head;
+       uint32_t vv;
+       int i;
+       int num;
+       printf("Sleep 15 seconds\n");
+       uthread_sleep(15);
+       printf("----------------------- TT a %p\n", a);
+       printf("talk thread ttargs %x v %x\n", a, v);
+       
+       if (debug) printf("Spin on console being read, print num queues, halt\n");
+       while ((vv = read32(v+VIRTIO_MMIO_DRIVER_FEATURES)) == 0) {
+               printf("no ready ... \n");
+               if (debug) {
+                       dumpvirtio_mmio(stdout, v);
+               }
+               printf("sleep 1 second\n");
+               uthread_sleep(1);
+       }
+       if (debug)printf("vv %x, set selector %x\n", vv, read32(v + VIRTIO_MMIO_DRIVER_FEATURES_SEL));
+       if (debug) printf("loop forever");
+       while (! quit)
+               ;
+       for(num = 0;;num++) {
+               /* host: use any buffers we should have been sent. */
+               head = wait_for_vq_desc(guesttocons, iov, &outlen, &inlen);
+               if (debug)
+                       printf("vq desc head %d, gaveit %d gotitback %d\n", head, gaveit, gotitback);
+               for(i = 0; debug && i < outlen + inlen; i++)
+                       printf("v[%d/%d] v %p len %d\n", i, outlen + inlen, iov[i].v, iov[i].length);
+               /* host: if we got an output buffer, just output it. */
+               for(i = 0; i < outlen; i++) {
+                       num++;
+                       printf("Host:%s:\n", (char *)iov[i].v);
+               }
+               
+               if (debug)
+                       printf("outlen is %d; inlen is %d\n", outlen, inlen);
+               /* host: fill in the writeable buffers. */
+               for (i = outlen; i < outlen + inlen; i++) {
+                       /* host: read a line. */
+                       memset(consline, 0, 128);
+                       if (1) {
+                               if (fgets(consline, 4096-256, stdin) == NULL) {
+                                       exit(0);
+                               } 
+                               if (debug) printf("GOT A LINE:%s:\n", consline);
+                       } else {
+                               sprintf(consline, "hi there. %d\n", i);
+                       }
+                       memmove(iov[i].v, consline, strlen(consline)+ 1);
+                       iov[i].length = strlen(consline) + 1;
+               }
+               if (debug) printf("call add_used\n");
+               /* host: now ack that we used them all. */
+               add_used(guesttocons, head, outlen+inlen);
+               if (debug) printf("DONE call add_used\n");
+       }
+       fprintf(stderr, "All done\n");
+       return NULL;
+}
+
+struct ttargs t;
+       
+
+int main(int argc, char **argv)
+{
+       struct vmctl vmctl;
+       int amt;
+       int vmmflags = VMM_VMCALL_PRINTF;
+       uint64_t entry = 0x1000000, kerneladdress = 0x1000000;
+       int nr_gpcs = 1;
+       int fd = open("#c/vmctl", O_RDWR), ret;
+       void * x;
+       int kfd = -1;
+       static char cmd[512];
+       void *coreboot_tables = (void *) 0x1165000;
+       /* kernel has to be in the range VIRTIOBASE to KERNSIZE+GKERNBASE for now. */
+       // mmap is not working for us at present.
+       if ((uint64_t)_kernel > VIRTIOBASE) {
+               printf("kernel array @%p is above , VIRTIOBASE@%p sucks\n", _kernel, VIRTIOBASE);
+               exit(1);
+       }
+       memset(_kernel, 0, sizeof(_kernel));
+
+       if (fd < 0) {
+               perror("#c/sysctl");
+               exit(1);
+       }
+       argc--,argv++;
+       // switches ...
+       // Sorry, I don't much like the gnu opt parsing code.
+       while (1) {
+               if (*argv[0] != '-')
+                       break;
+               switch(argv[0][1]) {
+               case 'n':
+                       vmmflags &= ~VMM_VMCALL_PRINTF;
+                       break;
+               default:
+                       printf("BMAFR\n");
+                       break;
+               }
+               argc--,argv++;
+       }
+       if (argc < 1) {
+               fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)] [coreboot_tables [loadaddress [entrypoint]]]\n", argv[0]);
+               exit(1);
+       }
+       if (argc > 1)
+               coreboot_tables = (void *) strtoull(argv[1], 0, 0);
+       if (argc > 2)
+               kerneladdress = strtoull(argv[2], 0, 0);
+       if (argc > 3)
+               entry = strtoull(argv[3], 0, 0);
+       kfd = open(argv[0], O_RDONLY);
+       if (kfd < 0) {
+               perror(argv[0]);
+               exit(1);
+       }
+       // read in the kernel.
+       x = (void *)kerneladdress;
+       for(;;) {
+               amt = read(kfd, x, 1048576);
+               if (amt < 0) {
+                       perror("read");
+                       exit(1);
+               }
+               if (amt == 0) {
+                       break;
+               }
+               x += amt;
+       }
+       fprintf(stderr, "Read in %d bytes\n", x-kerneladdress);
+
+       fprintf(stderr, "Run with %d cores and vmmflags 0x%x\n", nr_gpcs, vmmflags);
+       if (ros_syscall(SYS_setup_vmm, nr_gpcs, vmmflags, 0, 0, 0, 0) != nr_gpcs) {
+               perror("Guest pcore setup failed");
+               exit(1);
+       }
+       /* blob that is faulted in from the EPT first.  we need this to be in low
+        * memory (not above the normal mmap_break), so the EPT can look it up.
+        * Note that we won't get 4096.  The min is 1MB now, and ld is there. */
+       mmap_blob = mmap((int*)4096, PGSIZE, PROT_READ | PROT_WRITE,
+                        MAP_ANONYMOUS, -1, 0);
+       if (mmap_blob == MAP_FAILED) {
+               perror("Unable to mmap");
+               exit(1);
+       }
+
+       mcp = 1;
+       if (mcp) {
+               my_threads = malloc(sizeof(pthread_t) * nr_threads);
+               my_retvals = malloc(sizeof(void*) * nr_threads);
+               if (!(my_retvals && my_threads))
+                       perror("Init threads/malloc");
+
+               pthread_can_vcore_request(FALSE);       /* 2LS won't manage vcores */
+               pthread_need_tls(FALSE);
+               pthread_mcp_init();                                     /* gives us one vcore */
+               vcore_request(nr_threads - 1);          /* ghetto incremental interface */
+               for (int i = 0; i < nr_threads; i++) {
+                       x = __procinfo.vcoremap;
+                       printf("%p\n", __procinfo.vcoremap);
+                       printf("Vcore %d mapped to pcore %d\n", i,
+                               __procinfo.vcoremap[i].pcoreid);
+               }
+       }
+
+       t.virtio = (void *)VIRTIOBASE;
+
+       ret = syscall(33, 1);
+       if (ret < 0) {
+               perror("vm setup");
+               exit(1);
+       }
+       ret = posix_memalign((void **)&p512, 4096, 3*4096);
+       printf("memalign is %p\n", p512);
+       if (ret) {
+               perror("ptp alloc");
+               exit(1);
+       }
+       p1 = &p512[512];
+       p2m = &p512[1024];
+       uint64_t kernbase = 0; //0xffffffff80000000;
+       uint64_t highkernbase = 0xffffffff80000000;
+       p512[PML4(kernbase)] = (unsigned long long)p1 | 7;
+       p1[PML3(kernbase)] = /*0x87; */(unsigned long long)p2m | 7;
+       p512[PML4(highkernbase)] = (unsigned long long)p1 | 7;
+       p1[PML3(highkernbase)] = /*0x87; */(unsigned long long)p2m | 7;
+#define _2MiB (0x200000)
+       int i;
+       for (i = 0; i < 512; i++) {
+               p2m[PML2(kernbase + i * _2MiB)] = 0x87 | i * _2MiB;
+       }
+
+       kernbase >>= (0+12);
+       kernbase <<= (0 + 12);
+       uint8_t *kernel = (void *)GKERNBASE;
+       write_coreboot_table(coreboot_tables, ((void *)VIRTIOBASE) /*kernel*/, KERNSIZE + 1048576);
+       hexdump(stdout, coreboot_tables, 512);
+       setupconsole((void *)VIRTIOBASE);
+       hexdump(stdout, (void *)VIRTIOBASE, 128);
+       printf("kernbase for pml4 is 0x%llx and entry is %llx\n", kernbase, entry);
+       printf("p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]);
+       vmctl.command = REG_RSP_RIP_CR3;
+       vmctl.cr3 = (uint64_t) p512;
+       vmctl.regs.tf_rip = entry;
+       vmctl.regs.tf_rsp = (uint64_t) &stack[1024];
+       if (mcp) {
+               if (pthread_create(&my_threads[0], NULL, &talk_thread, &t))
+                       perror("pth_create failed");
+       }
+       printf("threads started\n");
+       printf("Writing command :%s:\n", cmd);
+       // sys_getpcoreid
+       while (1) {
+               int c;
+               ret = write(fd, &vmctl, sizeof(vmctl));
+               if (ret != sizeof(vmctl)) {
+                       perror(cmd);
+               }
+               printf("RESUME?\n");
+               c = getchar();
+               if (c == 'q')
+                       break;
+               printf("RIP %p\n", vmctl.regs.tf_rip);
+               vmctl.command = RESUME;
+       }
+       dumpvirtio_mmio(stdout, (void *)VIRTIOBASE);
+       printf("shared is %d, blob is %d\n", shared, *mmap_blob);
+
+       quit = 1;
+       for (int i = 0; i < nr_threads-1; i++) {
+               int ret;
+               if (pthread_join(my_threads[i], &my_retvals[i]))
+                       perror("pth_join failed");
+               printf("%d %d\n", i, ret);
+       }
+
+       return 0;
+}
diff --git a/user/vmm/io.c b/user/vmm/io.c
new file mode 100644 (file)
index 0000000..cc3c257
--- /dev/null
@@ -0,0 +1,196 @@
+#include <stdio.h> 
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <parlib/arch/arch.h>
+#include <parlib/ros_debug.h>
+#include <unistd.h>
+#include <errno.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ros/syscall.h>
+#include <sys/mman.h>
+#include <vmm/coreboot_tables.h>
+#include <ros/common.h>
+#include <ros/vmm.h>
+#include <vmm/virtio.h>
+#include <vmm/virtio_mmio.h>
+#include <vmm/virtio_ids.h>
+
+/* nowhere on my linux system. */
+#define ARRAY_SIZE(x) (sizeof((x))/sizeof((x)[0]))
+
+/* crude PCI bus. Just enough to get virtio working. I would rather not add to this. */
+struct pciconfig {
+       uint32_t registers[256];
+};
+
+/* just index by devfn, i.e. 8 bits */
+struct pciconfig pcibus[] = {
+       /* linux requires that devfn 0 be a bridge. 
+        * 00:00.0 Host bridge: Intel Corporation 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (rev 01)
+        */
+       {
+               {0x71908086, 0x02000006, 0x06000001},
+       },
+};
+/* cf8 is a single-threaded resource. */
+static uint32_t cf8;
+static uint32_t allones = (uint32_t)-1;
+
+/* Return a pointer to the 32-bit "register" in the "pcibus" give an address. Use cf8.
+ * only for readonly access.
+ * this will fail if we ever want to do writes, but we don't.
+ */
+void regp(uint32_t **reg)
+{
+       *reg = &allones;
+       int devfn = (cf8>>8) & 0xff;
+       //printf("devfn %d\n", devfn);
+       if (devfn < ARRAY_SIZE(pcibus))
+               *reg = &pcibus[devfn].registers[(cf8>>2)&0x3f];
+       //printf("-->regp *reg 0x%lx\n", **reg);
+}
+
+static uint32_t configaddr(uint32_t val)
+{
+       printf("%s 0x%lx\n", __func__, val);
+       cf8 = val;
+       return 0;
+}
+
+static uint32_t configread32(uint32_t edx, uint64_t *reg)
+{
+       uint32_t *r = &cf8;
+       regp(&r);
+       *reg = *r;
+       printf("%s: 0x%lx 0x%lx, 0x%lx 0x%lx\n", __func__, cf8, edx, r, *reg);
+       return 0;
+}
+
+static uint32_t configread16(uint32_t edx, uint64_t *reg)
+{
+       uint64_t val;
+       int which = ((edx&2)>>1) * 16;
+       configread32(edx, &val);
+       val >>= which;
+       *reg = val;
+       printf("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
+       return 0;
+}
+
+static uint32_t configread8(uint32_t edx, uint64_t *reg)
+{
+       uint64_t val;
+       int which = (edx&3) * 8;
+       configread32(edx, &val);
+       val >>= which;
+       *reg = val;
+       printf("%s: 0x%lx, 0x%lx 0x%lx\n", __func__, edx, val, *reg);
+       return 0;
+}
+
+static int configwrite32(uint32_t addr, uint32_t val)
+{
+       uint32_t *r = &cf8;
+       regp(&r);
+       *r = val;
+       printf("%s 0x%lx 0x%lx\n", __func__, addr, val);
+       return 0;
+}
+
+static int configwrite16(uint32_t addr, uint16_t val)
+{
+       printf("%s 0x%lx 0x%lx\n", __func__, addr, val);
+       return 0;
+}
+
+static int configwrite8(uint32_t addr, uint8_t val)
+{
+       printf("%s 0x%lx 0x%lx\n", __func__, addr, val);
+       return 0;
+}
+
+/* this is very minimal. It needs to move to vmm/io.c but we don't
+ * know if this minimal approach will even be workable. It only (for
+ * now) handles pci config space. We'd like to hope that's all we will
+ * need.
+ * It would have been nice had intel encoded the IO exit info as nicely as they
+ * encoded, some of the other exits.
+ */
+static int io(struct vmctl *v)
+{
+
+       /* Get a pointer to the memory at %rip. This is quite messy and part of the
+        * reason we don't want to do this at all. It sucks. Would have been nice
+        * had linux had an option to ONLY do mmio config space access, but no such
+        * luck.
+        */
+       uint8_t *ip8 = NULL;
+       uint16_t *ip16;
+       uintptr_t ip;
+       uint32_t edx;
+       /* for now, we're going to be a bit crude. In kernel, p is about v, so we just blow away
+        * the upper 34 bits and take the rest as our address
+        */
+       ip = v->regs.tf_rip & 0x3fffffff;
+       edx = v->regs.tf_rdx;
+       ip8 = (void *)ip;
+       ip16 = (void *)ip;
+       //printf("io: ip16 %p\n", *ip16, edx);
+
+       if (*ip8 == 0xef) {
+               v->regs.tf_rip += 1;
+               /* out at %edx */
+               if (edx == 0xcf8) {
+                       //printf("Set cf8 ");
+                       return configaddr(v->regs.tf_rax);
+               }
+               if (edx == 0xcfc) {
+                       //printf("Set cfc ");
+                       return configwrite32(edx, v->regs.tf_rax);
+               }
+               printf("unhandled IO address dx @%p is 0x%x\n", ip8, edx);
+               return -1;
+       }
+       // out %al, %dx
+       if (*ip8 == 0xee) {
+               v->regs.tf_rip += 1;
+               /* out al %edx */
+               if (edx == 0xcfb) { // special!
+                       printf("Just ignore the damned cfb write\n");
+                       return 0;
+               }
+               if ((edx&~3) == 0xcfc) {
+                       //printf("ignoring write to cfc ");
+                       return 0;
+               }
+               printf("unhandled IO address dx @%p is 0x%x\n", ip8, edx);
+               return -1;
+       }
+       if (*ip8 == 0xec) {
+               v->regs.tf_rip += 1;
+               //printf("configread8 ");
+               return configread8(edx, &v->regs.tf_rax);
+       }
+       if (*ip8 == 0xed) {
+               v->regs.tf_rip += 1;
+               if (edx == 0xcf8) {
+                       //printf("read cf8 0x%lx\n", v->regs.tf_rax);
+                       v->regs.tf_rax = cf8;
+                       return 0;
+               }
+               //printf("configread32 ");
+               return configread32(edx, &v->regs.tf_rax);
+       }
+       if (*ip16 == 0xed66) {
+               v->regs.tf_rip += 2;
+               //printf("configread16 ");
+               return configread16(edx, &v->regs.tf_rax);
+       }
+       printf("unknown IO %p %x %x\n", ip8, *ip8, *ip16);
+       return -1;
+}
+
diff --git a/user/vmm/vmx.c b/user/vmm/vmx.c
new file mode 100644 (file)
index 0000000..05ea966
--- /dev/null
@@ -0,0 +1,134 @@
+#include <stdio.h> 
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <parlib/arch/arch.h>
+#include <parlib/ros_debug.h>
+#include <unistd.h>
+#include <errno.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ros/syscall.h>
+#include <sys/mman.h>
+#include <vmm/coreboot_tables.h>
+#include <ros/common.h>
+#include <ros/vmm.h>
+#include <vmm/virtio.h>
+#include <vmm/virtio_mmio.h>
+#include <vmm/virtio_ids.h>
+
+// TODO: put this some common place for user and kernel mode. Once
+// we know we need to. Not sure we want to expose this outside
+// vmrunkernel anyway. Users may claim they want to write a vmm, but ...
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW          8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY  41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD              54
+#define EXIT_REASON_XSETBV              55
+#define EXIT_REASON_INVPCID             58
+
+/* nowhere on my linux system. */
+#define ARRAY_SIZE(x) (sizeof((x))/sizeof((x)[0]))
+
+char *vmxexit[] = {
+       [EXIT_REASON_EXCEPTION_NMI]        "EXCEPTION_NMI",
+       [EXIT_REASON_EXTERNAL_INTERRUPT]   "EXTERNAL_INTERRUPT",
+       [EXIT_REASON_TRIPLE_FAULT]         "TRIPLE_FAULT",
+       [EXIT_REASON_PENDING_INTERRUPT]    "PENDING_INTERRUPT",
+       [EXIT_REASON_NMI_WINDOW]           "NMI_WINDOW",
+       [EXIT_REASON_TASK_SWITCH]          "TASK_SWITCH",
+       [EXIT_REASON_CPUID]                "CPUID",
+       [EXIT_REASON_HLT]                  "HLT",
+       [EXIT_REASON_INVLPG]               "INVLPG",
+       [EXIT_REASON_RDPMC]                "RDPMC",
+       [EXIT_REASON_RDTSC]                "RDTSC",
+       [EXIT_REASON_VMCALL]               "VMCALL",
+       [EXIT_REASON_VMCLEAR]              "VMCLEAR",
+       [EXIT_REASON_VMLAUNCH]             "VMLAUNCH",
+       [EXIT_REASON_VMPTRLD]              "VMPTRLD",
+       [EXIT_REASON_VMPTRST]              "VMPTRST",
+       [EXIT_REASON_VMREAD]               "VMREAD",
+       [EXIT_REASON_VMRESUME]             "VMRESUME",
+       [EXIT_REASON_VMWRITE]              "VMWRITE",
+       [EXIT_REASON_VMOFF]                "VMOFF",
+       [EXIT_REASON_VMON]                 "VMON",
+       [EXIT_REASON_CR_ACCESS]            "CR_ACCESS",
+       [EXIT_REASON_DR_ACCESS]            "DR_ACCESS",
+       [EXIT_REASON_IO_INSTRUCTION]       "IO_INSTRUCTION",
+       [EXIT_REASON_MSR_READ]             "MSR_READ",
+       [EXIT_REASON_MSR_WRITE]            "MSR_WRITE",
+       [EXIT_REASON_MWAIT_INSTRUCTION]    "MWAIT_INSTRUCTION",
+       [EXIT_REASON_MONITOR_INSTRUCTION]  "MONITOR_INSTRUCTION",
+       [EXIT_REASON_PAUSE_INSTRUCTION]    "PAUSE_INSTRUCTION",
+       [EXIT_REASON_MCE_DURING_VMENTRY]   "MCE_DURING_VMENTRY",
+       [EXIT_REASON_TPR_BELOW_THRESHOLD]  "TPR_BELOW_THRESHOLD",
+       [EXIT_REASON_APIC_ACCESS]          "APIC_ACCESS",
+       [EXIT_REASON_EPT_VIOLATION]        "EPT_VIOLATION",
+       [EXIT_REASON_EPT_MISCONFIG]        "EPT_MISCONFIG",
+       [EXIT_REASON_WBINVD]               "WBINVD"
+};
+
+void showstatus(FILE *f, struct vmctl *v)
+{
+       int shutdown;
+       char *when = shutdown & VMX_EXIT_REASONS_FAILED_VMENTRY ? "entry" : "exit";
+       shutdown &= 0xff;
+       char *reason = "UNKNOWN";
+       if (v->shutdown < ARRAY_SIZE(vmxexit) && vmxexit[v->shutdown])
+               reason = vmxexit[v->shutdown];
+       fprintf(f, "Shutdown: core %d, %s due to %s(0x%x); ret code 0x%x", v->core, when, reason, v->shutdown, v->ret_code);
+
+       fprintf(f, "  rax  0x%016lx\n",           v->regs.tf_rax);
+       fprintf(f, "  rbx  0x%016lx\n",           v->regs.tf_rbx);
+       fprintf(f, "  rcx  0x%016lx\n",           v->regs.tf_rcx);
+       fprintf(f, "  rdx  0x%016lx\n",           v->regs.tf_rdx);
+       fprintf(f, "  rbp  0x%016lx\n",           v->regs.tf_rbp);
+       fprintf(f, "  rsi  0x%016lx\n",           v->regs.tf_rsi);
+       fprintf(f, "  rdi  0x%016lx\n",           v->regs.tf_rdi);
+       fprintf(f, "  r8   0x%016lx\n",           v->regs.tf_r8);
+       fprintf(f, "  r9   0x%016lx\n",           v->regs.tf_r9);
+       fprintf(f, "  r10  0x%016lx\n",           v->regs.tf_r10);
+       fprintf(f, "  r11  0x%016lx\n",           v->regs.tf_r11);
+       fprintf(f, "  r12  0x%016lx\n",           v->regs.tf_r12);
+       fprintf(f, "  r13  0x%016lx\n",           v->regs.tf_r13);
+       fprintf(f, "  r14  0x%016lx\n",           v->regs.tf_r14);
+       fprintf(f, "  r15  0x%016lx\n",           v->regs.tf_r15);
+}
diff --git a/user/vmm/vmx.h b/user/vmm/vmx.h
new file mode 100644 (file)
index 0000000..e398a3c
--- /dev/null
@@ -0,0 +1,45 @@
+// TODO: put this some common place for user and kernel mode. Once
+// we know we need to. Not sure we want to expose this outside
+// vmrunkernel anyway. Users may claim they want to write a vmm, but ...
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW          8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY  41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD              54
+#define EXIT_REASON_XSETBV              55
+#define EXIT_REASON_INVPCID             58