vmm: Squelch noisy prints
[akaros.git] / user / vmm / vmexit.c
1 /* Copyright (c) 2015-2016 Google Inc.
2  * See LICENSE for details. */
3
4 #include <parlib/common.h>
5 #include <vmm/virtio.h>
6 #include <vmm/virtio_mmio.h>
7 #include <vmm/virtio_ids.h>
8 #include <vmm/virtio_config.h>
9 #include <vmm/vmm.h>
10 #include <parlib/arch/trap.h>
11 #include <parlib/bitmask.h>
12 #include <stdio.h>
13
14 static bool pir_notif_is_set(struct vmm_gpcore_init *gpci)
15 {
16         return GET_BITMASK_BIT(gpci->posted_irq_desc, VMX_POSTED_OUTSTANDING_NOTIF);
17 }
18
19 static bool rvi_is_set(struct guest_thread *gth)
20 {
21         uint8_t rvi = gth_to_vmtf(gth)->tf_guest_intr_status & 0xff;
22
23         return rvi != 0;
24 }
25
26 /* Blocks a guest pcore / thread until it has an IRQ pending.  Syncs with
27  * vmm_interrupt_guest(). */
28 static void sleep_til_irq(struct guest_thread *gth)
29 {
30         struct vmm_gpcore_init *gpci = gth_to_gpci(gth);
31
32         /* The invariant is that if an IRQ is posted, but not delivered, we will not
33          * sleep.  Anyone who posts an IRQ must signal after setting it.
34          * vmm_interrupt_guest() does this.  If we use alternate sources of IRQ
35          * posting, we'll need to revist this.
36          *
37          * Although vmm_interrupt_guest() only writes OUTSTANDING_NOTIF, it's
38          * possible that the hardware attempted to post the interrupt.  In SDM
39          * parlance, the processor could have "recognized" the virtual IRQ, but not
40          * delivered it yet.  This could happen if the guest had executed "sti", but
41          * not "hlt" yet.  The IRQ was posted and recognized, but not delivered
42          * ("sti blocking").  Then the guest executes "hlt", and vmexits.
43          * OUTSTANDING_NOTIF will be clear in this case.  RVI should be set - at
44          * least to the vector we just sent, but possibly to a greater vector if
45          * multiple were sent.  RVI should only be cleared after virtual IRQs were
46          * actually delivered.  So checking OUTSTANDING_NOTIF and RVI should
47          * suffice.
48          *
49          * Generally, we should also check GUEST_INTERRUPTIBILITY_INFO to see if
50          * there's some reason to not deliver the interrupt and check things like
51          * the VPPR (priority register).  But since we're emulating a halt, mwait,
52          * or something else that needs to be woken by an IRQ, we can ignore that
53          * and just wake them up.  Note that we won't actually deliver the IRQ,
54          * we'll just restart the guest and the hardware will deliver the virtual
55          * IRQ at the appropriate time.  So in the event that something weird
56          * happens, the halt/mwait just returns spuriously.
57          *
58          * The more traditional race here is if the halt starts concurrently with
59          * the post; that's why we sync with the mutex to make sure there is an
60          * ordering between the actual halt (this function) and the posting. */
61         uth_mutex_lock(gth->halt_mtx);
62         while (!(pir_notif_is_set(gpci) || rvi_is_set(gth)))
63                 uth_cond_var_wait(gth->halt_cv, gth->halt_mtx);
64         uth_mutex_unlock(gth->halt_mtx);
65 }
66
67 static bool handle_ept_fault(struct guest_thread *gth)
68 {
69         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
70         struct virtual_machine *vm = gth_to_vm(gth);
71         uint64_t gpa, *regp;
72         uint8_t regx;
73         int store, size;
74         int advance;
75
76         int ret = decode(gth, &gpa, &regx, &regp, &store, &size, &advance);
77
78         if (ret < 0)
79                 return FALSE;
80         if (ret == VM_PAGE_FAULT) {
81                 /* We were unable to translate RIP due to an ept fault */
82                 vm_tf->tf_trap_inject = VM_TRAP_VALID
83                                       | VM_TRAP_ERROR_CODE
84                                       | VM_TRAP_HARDWARE
85                                       | HW_TRAP_PAGE_FAULT;
86                 return TRUE;
87         }
88
89         assert(size >= 0);
90         /* TODO use helpers for some of these addr checks.  the fee/fec ones might
91          * be wrong too. */
92         for (int i = 0; i < VIRTIO_MMIO_MAX_NUM_DEV; i++) {
93                 if (vm->virtio_mmio_devices[i] == NULL)
94                         continue;
95                 if (PG_ADDR(gpa) != vm->virtio_mmio_devices[i]->addr)
96                         continue;
97                 /* TODO: can the guest cause us to spawn off infinite threads? */
98                 if (store)
99                         virtio_mmio_wr(vm, vm->virtio_mmio_devices[i], gpa, size,
100                                        (uint32_t *)regp);
101                 else
102                         *regp = virtio_mmio_rd(vm, vm->virtio_mmio_devices[i], gpa, size);
103                 vm_tf->tf_rip += advance;
104                 return TRUE;
105         }
106         if (PG_ADDR(gpa) == 0xfec00000) {
107                 do_ioapic(gth, gpa, regx, regp, store);
108         } else if (PG_ADDR(gpa) == 0) {
109                 memmove(regp, &vm->low4k[gpa], size);
110         } else {
111                 fprintf(stderr, "EPT violation: can't handle %p\n", gpa);
112                 fprintf(stderr, "RIP %p, exit reason 0x%x\n", vm_tf->tf_rip,
113                                 vm_tf->tf_exit_reason);
114                 fprintf(stderr, "Returning 0xffffffff\n");
115                 showstatus(stderr, gth);
116                 /* Just fill the whole register for now. */
117                 *regp = (uint64_t) -1;
118                 return FALSE;
119         }
120         vm_tf->tf_rip += advance;
121         return TRUE;
122 }
123
124 static bool handle_vmcall_printc(struct guest_thread *gth)
125 {
126         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
127         uint8_t byte;
128
129         byte = vm_tf->tf_rdi;
130         printf("%c", byte);
131         if (byte == '\n')
132                 printf("%c", '%');
133         fflush(stdout);
134         return TRUE;
135 }
136
137 static bool handle_vmcall_smpboot(struct guest_thread *gth)
138 {
139         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
140         struct vm_trapframe *vm_tf_ap;
141         struct virtual_machine *vm = gth_to_vm(gth);
142         int cur_pcores = vm->up_gpcs;
143
144         /* Check if we're guest pcore 0. Only the BSP is allowed to start APs. */
145         if (vm_tf->tf_guest_pcoreid != 0) {
146                 fprintf(stderr,
147                         "Only guest pcore 0 is allowed to start APs. core was %ld\n",
148                         vm_tf->tf_guest_pcoreid);
149                 return FALSE;
150         }
151
152         /* Check if we've reached the maximum, if yes, blow out. */
153         if (vm->nr_gpcs == cur_pcores) {
154                 fprintf(stderr,
155                         "guest tried to start up too many cores. max was %ld, current up %ld\n",
156                         vm->nr_gpcs, cur_pcores);
157                 return FALSE;
158         }
159
160         /* Start up secondary core. */
161         vm_tf_ap = gth_to_vmtf(vm->gths[cur_pcores]);
162         /* We use the BSP's CR3 for now. This should be fine because they
163          * change it later anyway. */
164         vm_tf_ap->tf_cr3 = vm_tf->tf_cr3;
165
166         /* Starting RIP is passed in via rdi. */
167         vm_tf_ap->tf_rip = vm_tf->tf_rdi;
168
169         /* Starting RSP is passed in via rsi. */
170         vm_tf_ap->tf_rsp = vm_tf->tf_rsi;
171
172         vm->up_gpcs++;
173
174         start_guest_thread(vm->gths[cur_pcores]);
175
176         return TRUE;
177 }
178
179 static bool handle_vmcall(struct guest_thread *gth)
180 {
181         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
182         bool retval = FALSE;
183
184         if (gth->vmcall)
185                 return gth->vmcall(gth, vm_tf);
186
187         switch (vm_tf->tf_rax) {
188                 case VMCALL_PRINTC:
189                         retval = handle_vmcall_printc(gth);
190                         break;
191                 case VMCALL_SMPBOOT:
192                         retval = handle_vmcall_smpboot(gth);
193                         break;
194         }
195
196         if (retval)
197                 vm_tf->tf_rip += 3;
198
199         return retval;
200 }
201
202 static bool handle_io(struct guest_thread *gth)
203 {
204         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
205         int ret = io(gth);
206
207         if (ret < 0)
208                 return FALSE;
209         if (ret == VM_PAGE_FAULT) {
210                 /* We were unable to translate RIP due to an ept fault */
211                 vm_tf->tf_trap_inject = VM_TRAP_VALID
212                                       | VM_TRAP_ERROR_CODE
213                                       | VM_TRAP_HARDWARE
214                                       | HW_TRAP_PAGE_FAULT;
215         }
216         return TRUE;
217 }
218
219 static bool handle_msr(struct guest_thread *gth)
220 {
221         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
222
223         if (msrio(gth, gth_to_gpci(gth), vm_tf->tf_exit_reason)) {
224                 /* Use event injection through vmctl to send a general protection fault
225                  * vmctl.interrupt gets written to the VM-Entry Interruption-Information
226                  * Field by vmx */
227                 vm_tf->tf_trap_inject = VM_TRAP_VALID
228                                       | VM_TRAP_ERROR_CODE
229                                       | VM_TRAP_HARDWARE
230                                       | HW_TRAP_GP_FAULT;
231         } else {
232                 vm_tf->tf_rip += 2;
233         }
234         return TRUE;
235 }
236
237 static bool handle_apic_access(struct guest_thread *gth)
238 {
239         uint64_t gpa, *regp;
240         uint8_t regx;
241         int store, size;
242         int advance;
243         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
244
245         if (decode(gth, &gpa, &regx, &regp, &store, &size, &advance))
246                 return FALSE;
247         if (__apic_access(gth, gpa, regx, regp, store))
248                 return FALSE;
249         vm_tf->tf_rip += advance;
250         return TRUE;
251 }
252
253 static bool handle_halt(struct guest_thread *gth)
254 {
255         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
256
257         if (gth->halt_exit)
258                 return FALSE;
259         /* It's possible the guest disabled IRQs and halted, perhaps waiting on an
260          * NMI or something.  If we need to support that, we can change this.  */
261         sleep_til_irq(gth);
262         vm_tf->tf_rip += 1;
263         return TRUE;
264 }
265
266 static bool handle_mwait(struct guest_thread *gth)
267 {
268         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
269
270         /* TODO: we need to handle the actual monitor part of mwait.  This just
271          * implements the power management / halting.  Likewise, it's possible IRQs
272          * are disabled (as with halt). */
273         sleep_til_irq(gth);
274         vm_tf->tf_rip += 3;
275         return TRUE;
276 }
277
278 /* Is this a vmm specific thing?  or generic?
279  *
280  * what do we do when we want to kill the vm?  what are our other options? */
281 bool handle_vmexit(struct guest_thread *gth)
282 {
283         struct vm_trapframe *vm_tf = gth_to_vmtf(gth);
284
285         switch (vm_tf->tf_exit_reason) {
286         case EXIT_REASON_EPT_VIOLATION:
287                 return handle_ept_fault(gth);
288         case EXIT_REASON_VMCALL:
289                 return handle_vmcall(gth);
290         case EXIT_REASON_IO_INSTRUCTION:
291                 return handle_io(gth);
292         case EXIT_REASON_MSR_WRITE:
293         case EXIT_REASON_MSR_READ:
294                 return handle_msr(gth);
295         case EXIT_REASON_APIC_ACCESS:
296                 return handle_apic_access(gth);
297         case EXIT_REASON_HLT:
298                 return handle_halt(gth);
299         case EXIT_REASON_MWAIT_INSTRUCTION:
300                 return handle_mwait(gth);
301         case EXIT_REASON_EXTERNAL_INTERRUPT:
302         case EXIT_REASON_APIC_WRITE:
303                 /* TODO: just ignore these? */
304                 return TRUE;
305         default:
306                 fprintf(stderr, "Don't know how to handle exit %d\n",
307                         vm_tf->tf_exit_reason);
308                 fprintf(stderr, "RIP %p, shutdown 0x%x\n", vm_tf->tf_rip,
309                         vm_tf->tf_exit_reason);
310                 return FALSE;
311         }
312 }