Allow uth_disable_notifs without current_uthread
[akaros.git] / user / vmm / vmxmsr.c
1 /*
2  * MSR emulation
3  *
4  * Copyright 2015 Google Inc.
5  *
6  * See LICENSE for details.
7  */
8
9 #include <stdio.h>
10 #include <sys/types.h>
11 #include <pthread.h>
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <parlib/arch/arch.h>
15 #include <parlib/ros_debug.h>
16 #include <unistd.h>
17 #include <errno.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/uio.h>
21 #include <stdint.h>
22 #include <err.h>
23 #include <sys/mman.h>
24 #include <ros/vmm.h>
25 #include <ros/arch/msr-index.h>
26 #include <vmm/virtio.h>
27 #include <vmm/virtio_mmio.h>
28 #include <vmm/virtio_ids.h>
29 #include <vmm/virtio_config.h>
30 #include <vmm/sched.h>
31 #include <ros/arch/trapframe.h>
32
33 struct emmsr {
34         uint32_t reg;
35         char *name;
36         int (*f)(struct guest_thread *vm_thread, struct emmsr *, uint32_t);
37         bool written;
38         uint32_t edx, eax;
39 };
40 // Might need to mfence rdmsr.  supposedly wrmsr serializes, but not for x2APIC
41 static inline uint64_t read_msr(uint32_t reg)
42 {
43         uint32_t edx, eax;
44         asm volatile("rdmsr; mfence" : "=d"(edx), "=a"(eax) : "c"(reg));
45         return (uint64_t)edx << 32 | eax;
46 }
47
48 static inline void write_msr(uint32_t reg, uint64_t val)
49 {
50         asm volatile("wrmsr" : : "d"((uint32_t)(val >> 32)),
51                                  "a"((uint32_t)(val & 0xFFFFFFFF)), 
52                                  "c"(reg));
53 }
54
55 static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *,
56                            uint32_t);
57 static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *,
58                           uint32_t);
59 static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *,
60                          uint32_t);
61 static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *,
62                          uint32_t);
63 static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *,
64                           uint32_t);
65 static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *, uint32_t);
66
67 struct emmsr emmsrs[] = {
68         {MSR_IA32_MISC_ENABLE, "MSR_IA32_MISC_ENABLE", emsr_miscenable},
69         {MSR_IA32_SYSENTER_CS, "MSR_IA32_SYSENTER_CS", emsr_ok},
70         {MSR_IA32_SYSENTER_EIP, "MSR_IA32_SYSENTER_EIP", emsr_ok},
71         {MSR_IA32_SYSENTER_ESP, "MSR_IA32_SYSENTER_ESP", emsr_ok},
72         {MSR_IA32_UCODE_REV, "MSR_IA32_UCODE_REV", emsr_fakewrite},
73         {MSR_CSTAR, "MSR_CSTAR", emsr_fakewrite},
74         {MSR_IA32_VMX_BASIC_MSR, "MSR_IA32_VMX_BASIC_MSR", emsr_fakewrite},
75         {MSR_IA32_VMX_PINBASED_CTLS_MSR, "MSR_IA32_VMX_PINBASED_CTLS_MSR",
76          emsr_fakewrite},
77         {MSR_IA32_VMX_PROCBASED_CTLS_MSR, "MSR_IA32_VMX_PROCBASED_CTLS_MSR",
78          emsr_fakewrite},
79         {MSR_IA32_VMX_PROCBASED_CTLS2, "MSR_IA32_VMX_PROCBASED_CTLS2",
80          emsr_fakewrite},
81         {MSR_IA32_VMX_EXIT_CTLS_MSR, "MSR_IA32_VMX_EXIT_CTLS_MSR",
82          emsr_fakewrite},
83         {MSR_IA32_VMX_ENTRY_CTLS_MSR, "MSR_IA32_VMX_ENTRY_CTLS_MSR",
84          emsr_fakewrite},
85         {MSR_IA32_ENERGY_PERF_BIAS, "MSR_IA32_ENERGY_PERF_BIAS",
86          emsr_fakewrite},
87         {MSR_LBR_SELECT, "MSR_LBR_SELECT", emsr_ok},
88         {MSR_LBR_TOS, "MSR_LBR_TOS", emsr_ok},
89         {MSR_LBR_NHM_FROM, "MSR_LBR_NHM_FROM", emsr_ok},
90         {MSR_LBR_NHM_TO, "MSR_LBR_NHM_TO", emsr_ok},
91         {MSR_LBR_CORE_FROM, "MSR_LBR_CORE_FROM", emsr_ok},
92         {MSR_LBR_CORE_TO, "MSR_LBR_CORE_TO", emsr_ok},
93
94         // grumble. 
95         {MSR_OFFCORE_RSP_0, "MSR_OFFCORE_RSP_0", emsr_ok},
96         {MSR_OFFCORE_RSP_1, "MSR_OFFCORE_RSP_1", emsr_ok},
97         // louder.
98         {MSR_PEBS_LD_LAT_THRESHOLD, "MSR_PEBS_LD_LAT_THRESHOLD", emsr_ok},
99         // aaaaaahhhhhhhhhhhhhhhhhhhhh
100         {MSR_ARCH_PERFMON_EVENTSEL0, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok},
101         {MSR_ARCH_PERFMON_EVENTSEL1, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok},
102         {MSR_IA32_PERF_CAPABILITIES, "MSR_IA32_PERF_CAPABILITIES", emsr_ok},
103         // unsafe.
104         {MSR_IA32_APICBASE, "MSR_IA32_APICBASE", emsr_fakewrite},
105
106         // mostly harmless.
107         {MSR_TSC_AUX, "MSR_TSC_AUX", emsr_fakewrite},
108         {MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT", emsr_readzero},
109 };
110
111 static uint64_t set_low32(uint64_t hi, uint32_t lo)
112 {
113         return (hi & 0xffffffff00000000ULL) | lo;
114 }
115
116 static uint64_t set_low16(uint64_t hi, uint16_t lo)
117 {
118         return (hi & 0xffffffffffff0000ULL) | lo;
119 }
120
121 static uint64_t set_low8(uint64_t hi, uint8_t lo)
122 {
123         return (hi & 0xffffffffffffff00ULL) | lo;
124 }
125
126 /* this may be the only register that needs special handling.
127  * If there others then we might want to extend teh emmsr struct.
128  */
129 static int emsr_miscenable(struct guest_thread *vm_thread, struct emmsr *msr,
130                            uint32_t opcode) {
131         uint32_t eax, edx;
132         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
133
134         rdmsr(msr->reg, eax, edx);
135         /* we just let them read the misc msr for now. */
136         if (opcode == EXIT_REASON_MSR_READ) {
137                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
138                 vm_tf->tf_rax |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
139                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
140                 return 0;
141         } else {
142                 /* if they are writing what is already written, that's ok. */
143                 if (((uint32_t) vm_tf->tf_rax == eax)
144                     && ((uint32_t) vm_tf->tf_rdx == edx))
145                         return 0;
146         }
147         fprintf(stderr,
148                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
149                  msr->name, (uint32_t) vm_tf->tf_rdx,
150                  (uint32_t) vm_tf->tf_rax, edx, eax);
151         return SHUTDOWN_UNHANDLED_EXIT_REASON;
152 }
153
154 static int emsr_mustmatch(struct guest_thread *vm_thread, struct emmsr *msr,
155                           uint32_t opcode) {
156         uint32_t eax, edx;
157         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
158
159         rdmsr(msr->reg, eax, edx);
160         /* we just let them read the misc msr for now. */
161         if (opcode == EXIT_REASON_MSR_READ) {
162                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
163                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
164                 return 0;
165         } else {
166                 /* if they are writing what is already written, that's ok. */
167                 if (((uint32_t) vm_tf->tf_rax == eax)
168                     && ((uint32_t) vm_tf->tf_rdx == edx))
169                         return 0;
170         }
171         fprintf(stderr,
172                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
173                  msr->name, (uint32_t) vm_tf->tf_rdx,
174                  (uint32_t) vm_tf->tf_rax, edx, eax);
175         return SHUTDOWN_UNHANDLED_EXIT_REASON;
176 }
177
178 static int emsr_ok(struct guest_thread *vm_thread, struct emmsr *msr,
179                    uint32_t opcode)
180 {
181         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
182
183         if (opcode == EXIT_REASON_MSR_READ) {
184                 rdmsr(msr->reg, vm_tf->tf_rdx, vm_tf->tf_rax);
185         } else {
186                 uint64_t val =
187                         (uint64_t) vm_tf->tf_rdx << 32 | vm_tf->tf_rax;
188                 write_msr(msr->reg, val);
189         }
190         return 0;
191 }
192
193 static int emsr_readonly(struct guest_thread *vm_thread, struct emmsr *msr,
194                          uint32_t opcode)
195 {
196         uint32_t eax, edx;
197         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
198
199         rdmsr((uint32_t) vm_tf->tf_rcx, eax, edx);
200         /* we just let them read the misc msr for now. */
201         if (opcode == EXIT_REASON_MSR_READ) {
202                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
203                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
204                 return 0;
205         }
206
207         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
208         return SHUTDOWN_UNHANDLED_EXIT_REASON;
209 }
210
211 static int emsr_readzero(struct guest_thread *vm_thread, struct emmsr *msr,
212                          uint32_t opcode)
213 {
214         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
215
216         if (opcode == EXIT_REASON_MSR_READ) {
217                 vm_tf->tf_rax = 0;
218                 vm_tf->tf_rdx = 0;
219                 return 0;
220         }
221
222         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
223         return SHUTDOWN_UNHANDLED_EXIT_REASON;
224 }
225
226 /* pretend to write it, but don't write it. */
227 static int emsr_fakewrite(struct guest_thread *vm_thread, struct emmsr *msr,
228                           uint32_t opcode)
229 {
230         uint32_t eax, edx;
231         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
232
233         if (!msr->written) {
234                 rdmsr(msr->reg, eax, edx);
235         } else {
236                 edx = msr->edx;
237                 eax = msr->eax;
238         }
239         /* we just let them read the misc msr for now. */
240         if (opcode == EXIT_REASON_MSR_READ) {
241                 vm_tf->tf_rax = set_low32(vm_tf->tf_rax, eax);
242                 vm_tf->tf_rdx = set_low32(vm_tf->tf_rdx, edx);
243                 return 0;
244         } else {
245                 /* if they are writing what is already written, that's ok. */
246                 if (((uint32_t) vm_tf->tf_rax == eax)
247                     && ((uint32_t) vm_tf->tf_rdx == edx))
248                         return 0;
249                 msr->edx = vm_tf->tf_rdx;
250                 msr->eax = vm_tf->tf_rax;
251                 msr->written = true;
252         }
253         return 0;
254 }
255
256 static int emsr_apic(struct guest_thread *vm_thread,
257                      struct vmm_gpcore_init *gpci, uint32_t opcode)
258 {
259         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
260         int apic_offset = vm_tf->tf_rcx & 0xff;
261         uint64_t value;
262
263         if (opcode == EXIT_REASON_MSR_READ) {
264                 if (vm_tf->tf_rcx != MSR_LAPIC_ICR) {
265                         vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset];
266                         vm_tf->tf_rdx = 0;
267                 } else {
268                         vm_tf->tf_rax = ((uint32_t *)(gpci->vapic_addr))[apic_offset];
269                         vm_tf->tf_rdx = ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1];
270                 }
271         } else {
272                 if (vm_tf->tf_rcx != MSR_LAPIC_ICR)
273                         ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
274                                                                (uint32_t)(vm_tf->tf_rax);
275                 else {
276                         ((uint32_t *)(gpci->vapic_addr))[apic_offset] =
277                                                                (uint32_t)(vm_tf->tf_rax);
278                         ((uint32_t *)(gpci->vapic_addr))[apic_offset + 1] =
279                                                                (uint32_t)(vm_tf->tf_rdx);
280                 }
281         }
282         return 0;
283 }
284
285 int msrio(struct guest_thread *vm_thread, struct vmm_gpcore_init *gpci,
286           uint32_t opcode)
287 {
288         int i;
289         struct vm_trapframe *vm_tf = &(vm_thread->uthread.u_ctx.tf.vm_tf);
290
291         if (vm_tf->tf_rcx >= MSR_LAPIC_ID && vm_tf->tf_rcx < MSR_LAPIC_END)
292                 return emsr_apic(vm_thread, gpci, opcode);
293
294         for (i = 0; i < sizeof(emmsrs)/sizeof(emmsrs[0]); i++) {
295                 if (emmsrs[i].reg != vm_tf->tf_rcx)
296                         continue;
297                 return emmsrs[i].f(vm_thread, &emmsrs[i], opcode);
298         }
299         fprintf(stderr, "msrio for 0x%lx failed\n", vm_tf->tf_rcx);
300         return SHUTDOWN_UNHANDLED_EXIT_REASON;
301 }
302