Update to linuxemu syscall structure
[akaros.git] / user / vmm / vmxmsr.c
1 /*
2  * MSR emulation
3  *
4  * Copyright 2015 Google Inc.
5  *
6  * See LICENSE for details.
7  */
8
9 #include <stdio.h>
10 #include <sys/types.h>
11 #include <sys/stat.h>
12 #include <fcntl.h>
13 #include <parlib/arch/arch.h>
14 #include <parlib/ros_debug.h>
15 #include <unistd.h>
16 #include <errno.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/uio.h>
20 #include <stdint.h>
21 #include <err.h>
22 #include <sys/mman.h>
23 #include <ros/vmm.h>
24 #include <ros/arch/msr-index.h>
25 #include <vmm/virtio.h>
26 #include <vmm/virtio_mmio.h>
27 #include <vmm/virtio_ids.h>
28 #include <vmm/virtio_config.h>
29 #include <vmm/sched.h>
30 #include <vmm/vmm.h>
31 #include <ros/arch/trapframe.h>
32 #include <parlib/alarm.h>
33
34 struct emmsr {
35         uint32_t reg;
36         char *name;
37         int (*f)(struct guest_thread *vm_thread, struct emmsr *, uint32_t);
38         bool written;
39         uint32_t edx, eax;
40 };
41 // Might need to mfence rdmsr.  supposedly wrmsr serializes, but not for x2APIC
42 static inline uint64_t read_msr(uint32_t reg)
43 {
44         uint32_t edx, eax;
45         asm volatile("rdmsr; mfence" : "=d"(edx), "=a"(eax) : "c"(reg));
46         return (uint64_t)edx << 32 | eax;
47 }
48
49 static inline void write_msr(uint32_t reg, uint64_t val)
50 {
51         asm volatile("wrmsr" : : "d"((uint32_t)(val >> 32)),
52                                  "a"((uint32_t)(val & 0xFFFFFFFF)),
53                                  "c"(reg));
54 }
55
56 static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *,
57                            uint32_t);
58 static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *,
59                           uint32_t);
60 static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *,
61                          uint32_t);
62 static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *,
63                          uint32_t);
64 static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *,
65                           uint32_t);
66 static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *, uint32_t);
67
68 struct emmsr emmsrs[] = {
69         {MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT", emsr_readzero},
70 };
71
72 static uint64_t set_low32(uint64_t hi, uint32_t lo)
73 {
74         return (hi & 0xffffffff00000000ULL) | lo;
75 }
76
77 static uint64_t set_low16(uint64_t hi, uint16_t lo)
78 {
79         return (hi & 0xffffffffffff0000ULL) | lo;
80 }
81
82 static uint64_t set_low8(uint64_t hi, uint8_t lo)
83 {
84         return (hi & 0xffffffffffffff00ULL) | lo;
85 }
86
87 /* this may be the only register that needs special handling.
88  * If there others then we might want to extend teh emmsr struct.
89  */
90 static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *msr,
91                            uint32_t opcode) {
92         uint32_t eax, edx;
93         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
94
95         rdmsr(msr->reg, eax, edx);
96         /* we just let them read the misc msr for now. */
97         if (opcode == EXIT_REASON_MSR_READ) {
98                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
99                 vm_tf->tf_rax |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
100                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
101                 return 0;
102         } else {
103                 /* if they are writing what is already written, that's ok. */
104                 if (((uint32_t) vm_tf->tf_rax == eax)
105                     && ((uint32_t) vm_tf->tf_rdx == edx))
106                         return 0;
107         }
108         fprintf(stderr,
109                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
110                  msr->name, (uint32_t) vm_tf->tf_rdx,
111                  (uint32_t) vm_tf->tf_rax, edx, eax);
112         return SHUTDOWN_UNHANDLED_EXIT_REASON;
113 }
114
115 static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *msr,
116                           uint32_t opcode) {
117         uint32_t eax, edx;
118         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
119
120         rdmsr(msr->reg, eax, edx);
121         /* we just let them read the misc msr for now. */
122         if (opcode == EXIT_REASON_MSR_READ) {
123                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
124                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
125                 return 0;
126         } else {
127                 /* if they are writing what is already written, that's ok. */
128                 if (((uint32_t) vm_tf->tf_rax == eax)
129                     && ((uint32_t) vm_tf->tf_rdx == edx))
130                         return 0;
131         }
132         fprintf(stderr,
133                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
134                  msr->name, (uint32_t) vm_tf->tf_rdx,
135                  (uint32_t) vm_tf->tf_rax, edx, eax);
136         return SHUTDOWN_UNHANDLED_EXIT_REASON;
137 }
138
139 static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *msr,
140                    uint32_t opcode)
141 {
142         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
143
144         if (opcode == EXIT_REASON_MSR_READ) {
145                 rdmsr(msr->reg, vm_tf->tf_rdx, vm_tf->tf_rax);
146         } else {
147                 uint64_t val =
148                         (uint64_t) vm_tf->tf_rdx << 32 | vm_tf->tf_rax;
149                 write_msr(msr->reg, val);
150         }
151         return 0;
152 }
153
154 static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *msr,
155                          uint32_t opcode)
156 {
157         uint32_t eax, edx;
158         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
159
160         rdmsr((uint32_t) vm_tf->tf_rcx, eax, edx);
161         /* we just let them read the misc msr for now. */
162         if (opcode == EXIT_REASON_MSR_READ) {
163                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
164                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
165                 return 0;
166         }
167
168         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
169         return SHUTDOWN_UNHANDLED_EXIT_REASON;
170 }
171
172 static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *msr,
173                          uint32_t opcode)
174 {
175         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
176
177         if (opcode == EXIT_REASON_MSR_READ) {
178                 vm_tf->tf_rax = 0;
179                 vm_tf->tf_rdx = 0;
180                 return 0;
181         }
182
183         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
184         return SHUTDOWN_UNHANDLED_EXIT_REASON;
185 }
186
187 /* pretend to write it, but don't write it. */
188 static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *msr,
189                           uint32_t opcode)
190 {
191         uint32_t eax, edx;
192         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
193
194         if (!msr->written) {
195                 rdmsr(msr->reg, eax, edx);
196         } else {
197                 edx = msr->edx;
198                 eax = msr->eax;
199         }
200         /* we just let them read the misc msr for now. */
201         if (opcode == EXIT_REASON_MSR_READ) {
202                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
203                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
204                 return 0;
205         } else {
206                 /* if they are writing what is already written, that's ok. */
207                 if (((uint32_t) vm_tf->tf_rax == eax)
208                     && ((uint32_t) vm_tf->tf_rdx == edx))
209                         return 0;
210                 msr->edx = vm_tf->tf_rdx;
211                 msr->eax = vm_tf->tf_rax;
212                 msr->written = true;
213         }
214         return 0;
215 }
216
217 static int apic_icr_write(struct guest_thread *vm_thread,
218                           struct vmm_gpcore_init *gpci)
219 {
220         /* We currently only handle physical destinations.
221          * TODO(ganshun): Support logical destinations if needed. */
222         struct virtual_machine *vm = gth_to_vm(vm_thread);
223         struct vm_trapframe *vm_tf = gth_to_vmtf(vm_thread);
224         uint32_t destination = vm_tf->tf_rdx & 0xffffffff;
225         uint8_t vector = vm_tf->tf_rax & 0xff;
226         uint8_t type = (vm_tf->tf_rax >> 8) & 0x7;
227         int apic_offset = vm_tf->tf_rcx & 0xff;
228
229         if (destination >= vm->nr_gpcs && destination != 0xffffffff) {
230                 fprintf(stderr, "UNSUPPORTED DESTINATION 0x%02x!\n",
231                                 destination);
232                 return SHUTDOWN_UNHANDLED_EXIT_REASON;
233         }
234         switch (type) {
235         case 0:
236                 /* Send IPI */
237                 if (destination == 0xffffffff) {
238                         /* Broadcast */
239                         for (int i = 0; i < vm->nr_gpcs; i++)
240                                 vmm_interrupt_guest(vm, i, vector);
241                 } else {
242                         /* Send individual IPI */
243                         vmm_interrupt_guest(vm, destination, vector);
244                 }
245                 break;
246         default:
247                 /* This is not a terrible error, we don't currently support
248                  * SIPIs and INIT IPIs. The guest is allowed to try to make
249                  * them for now even though we don't do anything. */
250                 fprintf(stderr, "Unsupported IPI type %d!\n", type);
251                 break;
252         }
253
254         ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
255                                                (uint32_t)(vm_tf->tf_rax);
256         ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1] =
257                                                (uint32_t)(vm_tf->tf_rdx);
258         return 0;
259 }
260
261 static int apic_timer_write(struct guest_thread *vm_thread,
262                             struct vmm_gpcore_init *gpci)
263 {
264         uint32_t multiplier;
265         uint8_t vector;
266         uint32_t initial_count;
267         uint32_t divide_config_reg;
268         struct alarm_waiter *timer_waiter;
269         struct vm_trapframe *vm_tf = gth_to_vmtf(vm_thread);
270         int apic_offset = vm_tf->tf_rcx & 0xff;
271
272         ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
273                                        (uint32_t)(vm_tf->tf_rax);
274
275         /* See if we can set the timer. */
276         vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
277         initial_count = ((uint32_t *)gpci->vapic_addr)[0x38];
278         divide_config_reg = ((uint32_t *)gpci->vapic_addr)[0x3E];
279         timer_waiter = (struct alarm_waiter *)gpci->user_data;
280
281         uint64_t gpcoreid = *((uint64_t *)timer_waiter->data);
282
283         /* This is a precaution on my part, in case the guest tries to look at
284          * the current count on the lapic. I wanted it to be something other than
285          * 0 just in case. The current count will never be right short of us
286          * properly emulating it. */
287         ((uint32_t *)(gpci->vapic_addr))[0x39] = initial_count;
288
289         if (!timer_waiter)
290                 panic("NO WAITER");
291
292         /* Look at the intel manual Vol 3 10.5.4 APIC Timer */
293         multiplier = (((divide_config_reg & 0x08) >> 1) |
294                       (divide_config_reg & 0x03)) + 1;
295         multiplier &= 0x07;
296
297         unset_alarm(timer_waiter);
298
299         if (vector && initial_count) {
300                 set_awaiter_rel(timer_waiter, initial_count << multiplier);
301                 set_alarm(timer_waiter);
302         }
303         return 0;
304 }
305
306 static int emsr_apic(struct guest_thread *vm_thread,
307                      struct vmm_gpcore_init *gpci, uint32_t opcode)
308 {
309         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
310         int apic_offset = vm_tf->tf_rcx & 0xff;
311         uint64_t value;
312         int error;
313
314         if (opcode == EXIT_REASON_MSR_READ) {
315                 if (vm_tf->tf_rcx != MSR_LAPIC_ICR) {
316                         vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset];
317                         vm_tf->tf_rdx = 0;
318                 } else {
319                         vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset];
320                         vm_tf->tf_rdx = ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1];
321                 }
322         } else {
323                 switch (vm_tf->tf_rcx) {
324                 case MSR_LAPIC_ICR:
325                         error = apic_icr_write(vm_thread, gpci);
326                         if (error != 0)
327                                 return error;
328                         break;
329                 case MSR_LAPIC_DIVIDE_CONFIG_REG:
330                 case MSR_LAPIC_LVT_TIMER:
331                 case MSR_LAPIC_INITIAL_COUNT:
332                         apic_timer_write(vm_thread, gpci);
333                         break;
334                 default:
335                         ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
336                                                        (uint32_t)(vm_tf->tf_rax);
337                 }
338         }
339         return 0;
340 }
341
342 int msrio(struct guest_thread *vm_thread, struct vmm_gpcore_init *gpci,
343           uint32_t opcode)
344 {
345         int i;
346         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
347
348         if (vm_tf->tf_rcx >= MSR_LAPIC_ID && vm_tf->tf_rcx < MSR_LAPIC_END)
349                 return emsr_apic(vm_thread, gpci, opcode);
350
351         for (i = 0; i < sizeof(emmsrs)/sizeof(emmsrs[0]); i++) {
352                 if (emmsrs[i].reg != vm_tf->tf_rcx)
353                         continue;
354                 return emmsrs[i].f(vm_thread, &emmsrs[i], opcode);
355         }
356         printd("msrio for 0x%lx failed\n", vm_tf->tf_rcx);
357         return SHUTDOWN_UNHANDLED_EXIT_REASON;
358 }
359