Replacing timer support for VM Guests (XCC)
authorGan Shun <ganshun@gmail.com>
Tue, 11 Jul 2017 21:52:10 +0000 (14:52 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 12 Jul 2017 15:52:53 +0000 (11:52 -0400)
The old vm timer was a single thread that poked at all cores regardless
of the actual time the guest set the alarm for.

The new timers use the parlib alarm infrastructure, and actually sets up
independent timers for all cores so that we can properly emulate per-core
lapic timers.

Reinstall your kernel headers

Change-Id: I05bddbfb93b8ad62078e688175b54917b151cfaf
Signed-off-by: Gan Shun <ganshun@gmail.com>
[used gth_to_vmtf(), XCC warning]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/ros/vmm.h
kern/kfs/tinycore_cmdline
kern/kfs/vmimage_cmdline
tests/vmm/vmrunkernel.c
user/parlib/uthread.c
user/vmm/vmxmsr.c

index 7ef8948..3fa7cf2 100644 (file)
@@ -14,6 +14,7 @@ struct vmm_gpcore_init {
        void                                    *posted_irq_desc;
        void                                    *vapic_addr;
        void                                    *apic_addr;
+       void                                    *user_data;
 };
 
 /* Intel VM Trap Injection Fields */
index 9177a4a..e592402 100644 (file)
@@ -6,7 +6,7 @@ earlyprintk=vmcall,keep
  noexec=off
  nohlt
  lapic=notscdeadline
- lapictimerfreq=1000000
+ lapictimerfreq=1000
  clocksource=tsc
  pit=none
  nortc
index 1703786..6422012 100644 (file)
@@ -7,7 +7,7 @@ earlyprintk=vmcall,keep
  nohlt
  noht
  lapic=notscdeadline
- lapictimerfreq=1000000
+ lapictimerfreq=1000
  clocksource=tsc
  pit=none
  root=/dev/vda1
index e9c55f5..1013c69 100644 (file)
@@ -22,6 +22,7 @@
 #include <parlib/uthread.h>
 #include <vmm/linux_bootparam.h>
 #include <getopt.h>
+#include <parlib/alarm.h>
 
 #include <vmm/virtio.h>
 #include <vmm/virtio_blk.h>
@@ -49,24 +50,6 @@ void vapic_status_dump(FILE *f, void *vapic);
 #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
 #endif
 
-void *timer_thread(void *arg)
-{
-       uint8_t vector;
-       uint32_t initial_count;
-       while (1) {
-               for (int i = 0; i < vm->nr_gpcs; i++) {
-                       vector = ((uint32_t *)gpcis[i].vapic_addr)[0x32] & 0xff;
-                       initial_count = ((uint32_t *)gpcis[i].vapic_addr)[0x38];
-                       if (vector && initial_count)
-                               vmm_interrupt_guest(vm, i, vector);
-               }
-               uthread_usleep(1000);
-       }
-       fprintf(stderr, "SENDING TIMER\n");
-       return 0;
-}
-
-
 static void virtio_poke_guest(uint8_t vec, uint32_t dest)
 {
        if (dest < vm->nr_gpcs) {
@@ -303,6 +286,87 @@ void alloc_intr_pages(void)
        }
 }
 
+/* This rams all cores with a valid timer vector and initial count
+ * with a timer interrupt. Used only for debugging/as a temporary workaround */
+void *inject_timer_spurious(void *args)
+{
+       struct vmm_gpcore_init *curgpci;
+       uint32_t initial_count;
+       uint8_t vector;
+
+       for (int i = 0; i < vm->nr_gpcs; i++) {
+               curgpci = &gpcis[i];
+               vector = ((uint32_t *)curgpci->vapic_addr)[0x32] & 0xff;
+               initial_count = ((uint32_t *)curgpci->vapic_addr)[0x38];
+               if (initial_count && vector)
+                       vmm_interrupt_guest(vm, i, vector);
+       }
+       return 0;
+}
+
+/* This injects the timer interrupt to the guest. */
+void *inject_timer(void *args)
+{
+       uint64_t gpcoreid = (uint64_t)args;
+       struct vmm_gpcore_init *gpci = &gpcis[gpcoreid];
+       uint8_t vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
+
+       vmm_interrupt_guest(vm, gpcoreid, vector);
+       return 0;
+}
+
+/* This handler must never call __set_alarm after interrupting the guest,
+ * otherwise the guest could try to write to the timer msrs and cause a
+ * race condition. */
+void timer_alarm_handler(struct alarm_waiter *waiter)
+{
+       uint8_t vector;
+       uint32_t initial_count;
+       uint32_t divide_config_reg;
+       uint32_t multiplier;
+       uint32_t timer_mode;
+       uint64_t gpcoreid = *((uint64_t *)waiter->data);
+       struct vmm_gpcore_init *gpci = &gpcis[gpcoreid];
+
+       vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
+       timer_mode = (((uint32_t *)gpci->vapic_addr)[0x32] >> 17) & 0x03;
+       initial_count = ((uint32_t *)gpci->vapic_addr)[0x38];
+       divide_config_reg = ((uint32_t *)gpci->vapic_addr)[0x3E];
+
+       /* Don't blame me for this. Look at the intel manual
+        * Vol 3 10.5.4 APIC Timer */
+       multiplier = (((divide_config_reg & 0x08) >> 1) |
+                     (divide_config_reg & 0x03)) + 1;
+       multiplier &= 0x07;
+
+       if (vector && initial_count && timer_mode == 0x01) {
+               /* This is periodic, we reset the alarm */
+               set_awaiter_inc(waiter, initial_count << multiplier);
+               __set_alarm(waiter);
+       }
+
+       /* We spin up a task to inject the timer because vmm_interrupt_guest
+        * may block and we can't do that from vcore context. */
+       vmm_run_task(vm, inject_timer, (void *)gpcoreid);
+}
+
+/* This sets up the structs for each of the guest pcore's timers, but
+ * doesn't actually start the alarms until the core writes all the reasonable
+ * values to the x2apic msrs. */
+void init_timer_alarms(void)
+{
+       uint64_t *gpcoreids = malloc(sizeof(uint64_t) * vm->nr_gpcs);
+
+       for (uint64_t i = 0; i < vm->nr_gpcs; i++) {
+               struct alarm_waiter *timer_alarm = malloc(sizeof(struct alarm_waiter));
+
+               gpcoreids[i] = i;
+               gpcis[i].user_data = (void *)timer_alarm;
+               timer_alarm->data = gpcoreids + i;
+               init_awaiter(timer_alarm, timer_alarm_handler);
+       }
+}
+
 int main(int argc, char **argv)
 {
        void *cr3;
@@ -558,7 +622,8 @@ int main(int argc, char **argv)
 
        /* Set maxcpus to the number of cores we're giving the guest. */
        len = snprintf(cmdlinep, cmdlinesz,
-                      "\n maxcpus=%lld", vm->nr_gpcs);
+                      "\n maxcpus=%lld\n possible_cpus=%lld", vm->nr_gpcs,
+                      vm->nr_gpcs);
        if (len >= cmdlinesz) {
                fprintf(stderr, "Too many arguments to the linux command line.");
                exit(1);
@@ -570,7 +635,7 @@ int main(int argc, char **argv)
        assert(!ret);
 
        cr3 = setup_paging(memstart, memsize, debug);
-       vmm_run_task(vm, timer_thread, 0);
+       init_timer_alarms();
 
        vm_tf = gth_to_vmtf(vm->gths[0]);
        vm_tf->tf_cr3 = (uint64_t) cr3;
index d68ef82..5f5ec9e 100644 (file)
@@ -301,7 +301,10 @@ void uthread_init(struct uthread *new_thread, struct uth_thread_attr *attr)
        assert(new_thread);
        new_thread->state = UT_NOT_RUNNING;
        /* Set the signal state. */
-       new_thread->sigstate.mask = current_uthread->sigstate.mask;
+       if (current_uthread)
+               new_thread->sigstate.mask = current_uthread->sigstate.mask;
+       else
+               new_thread->sigstate.mask = 0;
        __sigemptyset(&new_thread->sigstate.pending);
        new_thread->sigstate.data = NULL;
        /* They should have zero'd the uthread.  Let's check critical things: */
index a8c448b..48aa1fb 100644 (file)
@@ -29,6 +29,7 @@
 #include <vmm/sched.h>
 #include <vmm/vmm.h>
 #include <ros/arch/trapframe.h>
+#include <parlib/alarm.h>
 
 struct emmsr {
        uint32_t reg;
@@ -213,12 +214,102 @@ static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *msr,
        return 0;
 }
 
+static int apic_icr_write(struct guest_thread *vm_thread,
+                          struct vmm_gpcore_init *gpci)
+{
+       /* We currently only handle physical destinations.
+        * TODO(ganshun): Support logical destinations if needed. */
+       struct virtual_machine *vm = gth_to_vm(vm_thread);
+       struct vm_trapframe *vm_tf = gth_to_vmtf(vm_thread);
+       uint32_t destination = vm_tf->tf_rdx & 0xffffffff;
+       uint8_t vector = vm_tf->tf_rax & 0xff;
+       uint8_t type = (vm_tf->tf_rax >> 8) & 0x7;
+       int apic_offset = vm_tf->tf_rcx & 0xff;
+
+       if (destination >= vm->nr_gpcs && destination != 0xffffffff) {
+               fprintf(stderr, "UNSUPPORTED DESTINATION 0x%02x!\n",
+                               destination);
+               return SHUTDOWN_UNHANDLED_EXIT_REASON;
+       }
+       switch (type) {
+       case 0:
+               /* Send IPI */
+               if (destination == 0xffffffff) {
+                       /* Broadcast */
+                       for (int i = 0; i < vm->nr_gpcs; i++)
+                               vmm_interrupt_guest(vm, i, vector);
+               } else {
+                       /* Send individual IPI */
+                       vmm_interrupt_guest(vm, destination, vector);
+               }
+               break;
+       default:
+               /* This is not a terrible error, we don't currently support
+                * SIPIs and INIT IPIs. The guest is allowed to try to make
+                * them for now even though we don't do anything. */
+               fprintf(stderr, "Unsupported IPI type %d!\n", type);
+               break;
+       }
+
+       ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
+                                              (uint32_t)(vm_tf->tf_rax);
+       ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1] =
+                                              (uint32_t)(vm_tf->tf_rdx);
+       return 0;
+}
+
+static int apic_timer_write(struct guest_thread *vm_thread,
+                            struct vmm_gpcore_init *gpci)
+{
+       uint32_t multiplier;
+       uint8_t vector;
+       uint32_t initial_count;
+       uint32_t divide_config_reg;
+       struct alarm_waiter *timer_waiter;
+       struct vm_trapframe *vm_tf = gth_to_vmtf(vm_thread);
+       int apic_offset = vm_tf->tf_rcx & 0xff;
+
+       ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
+                                       (uint32_t)(vm_tf->tf_rax);
+
+       /* See if we can set the timer. */
+       vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
+       initial_count = ((uint32_t *)gpci->vapic_addr)[0x38];
+       divide_config_reg = ((uint32_t *)gpci->vapic_addr)[0x3E];
+       timer_waiter = (struct alarm_waiter *)gpci->user_data;
+
+       uint64_t gpcoreid = *((uint64_t *)timer_waiter->data);
+
+       /* This is a precaution on my part, in case the guest tries to look at
+        * the current count on the lapic. I wanted it to be something other than
+        * 0 just in case. The current count will never be right short of us
+        * properly emulating it. */
+       ((uint32_t *)(gpci->vapic_addr))[0x39] = initial_count;
+
+       if (!timer_waiter)
+               panic("NO WAITER");
+
+       /* Look at the intel manual Vol 3 10.5.4 APIC Timer */
+       multiplier = (((divide_config_reg & 0x08) >> 1) |
+                     (divide_config_reg & 0x03)) + 1;
+       multiplier &= 0x07;
+
+       unset_alarm(timer_waiter);
+
+       if (vector && initial_count) {
+               set_awaiter_rel(timer_waiter, initial_count << multiplier);
+               set_alarm(timer_waiter);
+       }
+       return 0;
+}
+
 static int emsr_apic(struct guest_thread *vm_thread,
                      struct vmm_gpcore_init *gpci, uint32_t opcode)
 {
        struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
        int apic_offset = vm_tf->tf_rcx & 0xff;
        uint64_t value;
+       int error;
 
        if (opcode == EXIT_REASON_MSR_READ) {
                if (vm_tf->tf_rcx != MSR_LAPIC_ICR) {
@@ -229,46 +320,20 @@ static int emsr_apic(struct guest_thread *vm_thread,
                        vm_tf->tf_rdx = ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1];
                }
        } else {
-               if (vm_tf->tf_rcx != MSR_LAPIC_ICR)
-                       ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
-                                                              (uint32_t)(vm_tf->tf_rax);
-               else {
-                       /* We currently only handle physical destinations.
-                        * TODO(ganshun): Support logical destinations if needed. */
-                       struct virtual_machine *vm = gth_to_vm(vm_thread);
-                       uint32_t destination = vm_tf->tf_rdx & 0xffffffff;
-                       uint8_t vector = vm_tf->tf_rax & 0xff;
-                       uint8_t type = (vm_tf->tf_rax >> 8) & 0x7;
-
-                       if (destination >= vm->nr_gpcs && destination != 0xffffffff) {
-                               fprintf(stderr, "UNSUPPORTED DESTINATION 0x%02x!\n",
-                                               destination);
-                               return SHUTDOWN_UNHANDLED_EXIT_REASON;
-                       }
-                       switch (type) {
-                               case 0:
-                                       /* Send IPI */
-                                       if (destination == 0xffffffff) {
-                                               /* Broadcast */
-                                               for (int i = 0; i < vm->nr_gpcs; i++)
-                                                       vmm_interrupt_guest(vm, i, vector);
-                                       } else {
-                                               /* Send individual IPI */
-                                               vmm_interrupt_guest(vm, destination, vector);
-                                       }
-                                       break;
-                               default:
-                                       /* This is not a terrible error, we don't currently support
-                                        * SIPIs and INIT IPIs. The guest is allowed to try to make
-                                        * them for now even though we don't do anything. */
-                                       fprintf(stderr, "Unsupported IPI type %d!\n", type);
-                                       break;
-                       }
-
+               switch (vm_tf->tf_rcx) {
+               case MSR_LAPIC_ICR:
+                       error = apic_icr_write(vm_thread, gpci);
+                       if (error != 0)
+                               return error;
+                       break;
+               case MSR_LAPIC_DIVIDE_CONFIG_REG:
+               case MSR_LAPIC_LVT_TIMER:
+               case MSR_LAPIC_INITIAL_COUNT:
+                       apic_timer_write(vm_thread, gpci);
+                       break;
+               default:
                        ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
-                                                              (uint32_t)(vm_tf->tf_rax);
-                       ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1] =
-                                                              (uint32_t)(vm_tf->tf_rdx);
+                                                      (uint32_t)(vm_tf->tf_rax);
                }
        }
        return 0;