4404ec92b98478fafd8c72c11cef19a8cbce3caf
[akaros.git] / user / vmm / vmexit.c
1 /* Copyright (c) 2015-2016 Google Inc.
2  * See LICENSE for details. */
3
4 #include <parlib/common.h>
5 #include <vmm/virtio.h>
6 #include <vmm/virtio_mmio.h>
7 #include <vmm/virtio_ids.h>
8 #include <vmm/virtio_config.h>
9 #include <vmm/vmm.h>
10 #include <parlib/arch/trap.h>
11 #include <parlib/bitmask.h>
12 #include <stdio.h>
13
14 static bool pir_notif_is_set(struct vmm_gpcore_init *gpci)
15 {
16         return GET_BITMASK_BIT(gpci->posted_irq_desc, VMX_POSTED_OUTSTANDING_NOTIF);
17 }
18
19 static bool rvi_is_set(struct guest_thread *gth)
20 {
21         uint8_t rvi = gth_to_vmtf(gth)->tf_guest_intr_status & 0xff;
22
23         return rvi != 0;
24 }
25
26 /* Blocks a guest pcore / thread until it has an IRQ pending.  Syncs with
27  * vmm_interrupt_guest(). */
28 static void sleep_til_irq(struct guest_thread *gth)
29 {
30         struct vmm_gpcore_init *gpci = gth_to_gpci(gth);
31
32         /* The invariant is that if an IRQ is posted, but not delivered, we will not
33          * sleep.  Anyone who posts an IRQ must signal after setting it.
34          * vmm_interrupt_guest() does this.  If we use alternate sources of IRQ
35          * posting, we'll need to revist this.
36          *
37          * Although vmm_interrupt_guest() only writes OUTSTANDING_NOTIF, it's
38          * possible that the hardware attempted to post the interrupt.  In SDM
39          * parlance, the processor could have "recognized" the virtual IRQ, but not
40          * delivered it yet.  This could happen if the guest had executed "sti", but
41          * not "hlt" yet.  The IRQ was posted and recognized, but not delivered
42          * ("sti blocking").  Then the guest executes "hlt", and vmexits.
43          * OUTSTANDING_NOTIF will be clear in this case.  RVI should be set - at
44          * least to the vector we just sent, but possibly to a greater vector if
45          * multiple were sent.  RVI should only be cleared after virtual IRQs were
46          * actually delivered.  So checking OUTSTANDING_NOTIF and RVI should
47          * suffice.
48          *
49          * Generally, we should also check GUEST_INTERRUPTIBILITY_INFO to see if
50          * there's some reason to not deliver the interrupt and check things like
51          * the VPPR (priority register).  But since we're emulating a halt, mwait,
52          * or something else that needs to be woken by an IRQ, we can ignore that
53          * and just wake them up.  Note that we won't actually deliver the IRQ,
54          * we'll just restart the guest and the hardware will deliver the virtual
55          * IRQ at the appropriate time.  So in the event that something weird
56          * happens, the halt/mwait just returns spuriously.
57          *
58          * The more traditional race here is if the halt starts concurrently with
59          * the post; that's why we sync with the mutex to make sure there is an
60          * ordering between the actual halt (this function) and the posting. */
61         uth_mutex_lock(gth->halt_mtx);
62         while (!(pir_notif_is_set(gpci) || rvi_is_set(gth)))
63                 uth_cond_var_wait(gth->halt_cv, gth->halt_mtx);
64         uth_mutex_unlock(gth->halt_mtx);
65 }
66
67 static bool handle_ept_fault(struct guest_thread *gth)
68 {
69         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
70         struct virtual_machine *vm = gth_to_vm(gth);
71         uint64_t gpa, *regp;
72         uint8_t regx;
73         int store, size;
74         int advance;
75
76         int ret = decode(gth, &gpa, &regx, &regp, &store, &size, &advance);
77
78         if (ret < 0)
79                 return FALSE;
80         if (ret == VM_PAGE_FAULT) {
81                 /* We were unable to translate RIP due to an ept fault */
82                 vm_tf->tf_trap_inject = VM_TRAP_VALID
83                                       | VM_TRAP_ERROR_CODE
84                                       | VM_TRAP_HARDWARE
85                                       | HW_TRAP_PAGE_FAULT;
86                 return TRUE;
87         }
88
89         assert(size >= 0);
90         /* TODO use helpers for some of these addr checks.  the fee/fec ones might
91          * be wrong too. */
92         for (int i = 0; i < VIRTIO_MMIO_MAX_NUM_DEV; i++) {
93                 if (vm->virtio_mmio_devices[i] == NULL)
94                         continue;
95                 if (PG_ADDR(gpa) != vm->virtio_mmio_devices[i]->addr)
96                         continue;
97                 /* TODO: can the guest cause us to spawn off infinite threads? */
98                 if (store)
99                         virtio_mmio_wr(vm, vm->virtio_mmio_devices[i], gpa, size,
100                                        (uint32_t *)regp);
101                 else
102                         *regp = virtio_mmio_rd(vm, vm->virtio_mmio_devices[i], gpa, size);
103                 vm_tf->tf_rip += advance;
104                 return TRUE;
105         }
106         if (PG_ADDR(gpa) == 0xfec00000) {
107                 do_ioapic(gth, gpa, regx, regp, store);
108         } else if (PG_ADDR(gpa) == 0) {
109                 memmove(regp, &vm->low4k[gpa], size);
110         } else {
111                 fprintf(stderr, "EPT violation: can't handle %p\n", gpa);
112                 fprintf(stderr, "RIP %p, exit reason 0x%x\n", vm_tf->tf_rip,
113                                 vm_tf->tf_exit_reason);
114                 fprintf(stderr, "Returning 0xffffffff\n");
115                 showstatus(stderr, gth);
116                 /* Just fill the whole register for now. */
117                 *regp = (uint64_t) -1;
118                 return FALSE;
119         }
120         vm_tf->tf_rip += advance;
121         return TRUE;
122 }
123
124 static bool handle_vmcall(struct guest_thread *gth)
125 {
126         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
127         uint8_t byte;
128
129         byte = vm_tf->tf_rdi;
130         printf("%c", byte);
131         if (byte == '\n')
132                 printf("%c", '%');
133         vm_tf->tf_rip += 3;
134         return TRUE;
135 }
136
137 static bool handle_io(struct guest_thread *gth)
138 {
139         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
140         int ret = io(gth);
141
142         if (ret < 0)
143                 return FALSE;
144         if (ret == VM_PAGE_FAULT) {
145                 /* We were unable to translate RIP due to an ept fault */
146                 vm_tf->tf_trap_inject = VM_TRAP_VALID
147                                       | VM_TRAP_ERROR_CODE
148                                       | VM_TRAP_HARDWARE
149                                       | HW_TRAP_PAGE_FAULT;
150         }
151         return TRUE;
152 }
153
154 static bool handle_msr(struct guest_thread *gth)
155 {
156         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
157
158         /* TODO: consider pushing the gth into msrio */
159         if (msrio(gth, gth_to_gpci(gth), vm_tf->tf_exit_reason)) {
160                 /* Use event injection through vmctl to send a general protection fault
161                  * vmctl.interrupt gets written to the VM-Entry Interruption-Information
162                  * Field by vmx */
163                 vm_tf->tf_trap_inject = VM_TRAP_VALID
164                                       | VM_TRAP_ERROR_CODE
165                                       | VM_TRAP_HARDWARE
166                                       | HW_TRAP_GP_FAULT;
167         } else {
168                 vm_tf->tf_rip += 2;
169         }
170         return TRUE;
171 }
172
173 static bool handle_apic_access(struct guest_thread *gth)
174 {
175         uint64_t gpa, *regp;
176         uint8_t regx;
177         int store, size;
178         int advance;
179         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
180
181         if (decode(gth, &gpa, &regx, &regp, &store, &size, &advance))
182                 return FALSE;
183         if (__apic_access(gth, gpa, regx, regp, store))
184                 return FALSE;
185         vm_tf->tf_rip += advance;
186         return TRUE;
187 }
188
189 static bool handle_halt(struct guest_thread *gth)
190 {
191         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
192
193         if (gth->halt_exit)
194                 return FALSE;
195         /* It's possible the guest disabled IRQs and halted, perhaps waiting on an
196          * NMI or something.  If we need to support that, we can change this.  */
197         sleep_til_irq(gth);
198         vm_tf->tf_rip += 1;
199         return TRUE;
200 }
201
202 static bool handle_mwait(struct guest_thread *gth)
203 {
204         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
205
206         /* TODO: we need to handle the actual monitor part of mwait.  This just
207          * implements the power management / halting.  Likewise, it's possible IRQs
208          * are disabled (as with halt). */
209         sleep_til_irq(gth);
210         vm_tf->tf_rip += 3;
211         return TRUE;
212 }
213
214 /* Is this a vmm specific thing?  or generic?
215  *
216  * what do we do when we want to kill the vm?  what are our other options? */
217 bool handle_vmexit(struct guest_thread *gth)
218 {
219         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
220
221         switch (vm_tf->tf_exit_reason) {
222         case EXIT_REASON_EPT_VIOLATION:
223                 return handle_ept_fault(gth);
224         case EXIT_REASON_VMCALL:
225                 return handle_vmcall(gth);
226         case EXIT_REASON_IO_INSTRUCTION:
227                 return handle_io(gth);
228         case EXIT_REASON_MSR_WRITE:
229         case EXIT_REASON_MSR_READ:
230                 return handle_msr(gth);
231         case EXIT_REASON_APIC_ACCESS:
232                 return handle_apic_access(gth);
233         case EXIT_REASON_HLT:
234                 return handle_halt(gth);
235         case EXIT_REASON_MWAIT_INSTRUCTION:
236                 return handle_mwait(gth);
237         case EXIT_REASON_EXTERNAL_INTERRUPT:
238         case EXIT_REASON_APIC_WRITE:
239                 /* TODO: just ignore these? */
240                 return TRUE;
241         default:
242                 fprintf(stderr, "Don't know how to handle exit %d\n",
243                         vm_tf->tf_exit_reason);
244                 fprintf(stderr, "RIP %p, shutdown 0x%x\n", vm_tf->tf_rip,
245                         vm_tf->tf_exit_reason);
246                 return FALSE;
247         }
248 }