user level msr support for register not requiring read/write msr
[akaros.git] / user / vmm / vmxmsr.c
1 /*
2  * MSR emulation
3  *
4  * Copyright 2015 Google Inc.
5  *
6  * See LICENSE for details.
7  */
8
9 #include <stdio.h>
10 #include <sys/types.h>
11 #include <pthread.h>
12 #include <sys/stat.h>
13 #include <fcntl.h>
14 #include <parlib/arch/arch.h>
15 #include <parlib/ros_debug.h>
16 #include <unistd.h>
17 #include <errno.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/uio.h>
21 #include <stdint.h>
22 #include <err.h>
23 #include <sys/mman.h>
24 #include <ros/vmm.h>
25 #include <ros/vmx.h>
26 #include <ros/arch/msr-index.h>
27 #include <vmm/virtio.h>
28 #include <vmm/virtio_mmio.h>
29 #include <vmm/virtio_ids.h>
30 #include <vmm/virtio_config.h>
31
32 struct emmsr {
33         uint32_t reg;
34         char *name;
35         int (*f) (struct vmctl * vcpu, struct emmsr *, uint32_t);
36         bool written;
37         uint32_t edx, eax;
38 };
39 // Might need to mfence rdmsr.  supposedly wrmsr serializes, but not for x2APIC
40 static inline uint64_t read_msr(uint32_t reg)
41 {
42         uint32_t edx, eax;
43         asm volatile("rdmsr; mfence" : "=d"(edx), "=a"(eax) : "c"(reg));
44         return (uint64_t)edx << 32 | eax;
45 }
46
47 static inline void write_msr(uint32_t reg, uint64_t val)
48 {
49         asm volatile("wrmsr" : : "d"((uint32_t)(val >> 32)),
50                                  "a"((uint32_t)(val & 0xFFFFFFFF)), 
51                                  "c"(reg));
52 }
53
54 int emsr_miscenable(struct vmctl *vcpu, struct emmsr *, uint32_t);
55 int emsr_mustmatch(struct vmctl *vcpu, struct emmsr *, uint32_t);
56 int emsr_readonly(struct vmctl *vcpu, struct emmsr *, uint32_t);
57 int emsr_readzero(struct vmctl *vcpu, struct emmsr *, uint32_t);
58 int emsr_fakewrite(struct vmctl *vcpu, struct emmsr *, uint32_t);
59 int emsr_ok(struct vmctl *vcpu, struct emmsr *, uint32_t);
60
61 struct emmsr emmsrs[] = {
62         {MSR_IA32_MISC_ENABLE, "MSR_IA32_MISC_ENABLE", emsr_miscenable},
63         {MSR_IA32_SYSENTER_CS, "MSR_IA32_SYSENTER_CS", emsr_ok},
64         {MSR_IA32_SYSENTER_EIP, "MSR_IA32_SYSENTER_EIP", emsr_ok},
65         {MSR_IA32_SYSENTER_ESP, "MSR_IA32_SYSENTER_ESP", emsr_ok},
66         {MSR_IA32_UCODE_REV, "MSR_IA32_UCODE_REV", emsr_fakewrite},
67         {MSR_CSTAR, "MSR_CSTAR", emsr_fakewrite},
68         {MSR_IA32_VMX_BASIC_MSR, "MSR_IA32_VMX_BASIC_MSR", emsr_fakewrite},
69         {MSR_IA32_VMX_PINBASED_CTLS_MSR, "MSR_IA32_VMX_PINBASED_CTLS_MSR",
70          emsr_fakewrite},
71         {MSR_IA32_VMX_PROCBASED_CTLS_MSR, "MSR_IA32_VMX_PROCBASED_CTLS_MSR",
72          emsr_fakewrite},
73         {MSR_IA32_VMX_PROCBASED_CTLS2, "MSR_IA32_VMX_PROCBASED_CTLS2",
74          emsr_fakewrite},
75         {MSR_IA32_VMX_EXIT_CTLS_MSR, "MSR_IA32_VMX_EXIT_CTLS_MSR",
76          emsr_fakewrite},
77         {MSR_IA32_VMX_ENTRY_CTLS_MSR, "MSR_IA32_VMX_ENTRY_CTLS_MSR",
78          emsr_fakewrite},
79         {MSR_IA32_ENERGY_PERF_BIAS, "MSR_IA32_ENERGY_PERF_BIAS",
80          emsr_fakewrite},
81         {MSR_LBR_SELECT, "MSR_LBR_SELECT", emsr_ok},
82         {MSR_LBR_TOS, "MSR_LBR_TOS", emsr_ok},
83         {MSR_LBR_NHM_FROM, "MSR_LBR_NHM_FROM", emsr_ok},
84         {MSR_LBR_NHM_TO, "MSR_LBR_NHM_TO", emsr_ok},
85         {MSR_LBR_CORE_FROM, "MSR_LBR_CORE_FROM", emsr_ok},
86         {MSR_LBR_CORE_TO, "MSR_LBR_CORE_TO", emsr_ok},
87
88         // grumble. 
89         {MSR_OFFCORE_RSP_0, "MSR_OFFCORE_RSP_0", emsr_ok},
90         {MSR_OFFCORE_RSP_1, "MSR_OFFCORE_RSP_1", emsr_ok},
91         // louder.
92         {MSR_PEBS_LD_LAT_THRESHOLD, "MSR_PEBS_LD_LAT_THRESHOLD", emsr_ok},
93         // aaaaaahhhhhhhhhhhhhhhhhhhhh
94         {MSR_ARCH_PERFMON_EVENTSEL0, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok},
95         {MSR_ARCH_PERFMON_EVENTSEL1, "MSR_ARCH_PERFMON_EVENTSEL0", emsr_ok},
96         {MSR_IA32_PERF_CAPABILITIES, "MSR_IA32_PERF_CAPABILITIES", emsr_ok},
97         // unsafe.
98         {MSR_IA32_APICBASE, "MSR_IA32_APICBASE", emsr_fakewrite},
99
100         // mostly harmless.
101         {MSR_TSC_AUX, "MSR_TSC_AUX", emsr_fakewrite},
102         {MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT", emsr_readzero},
103 };
104
105 static uint64_t set_low32(uint64_t hi, uint32_t lo)
106 {
107         return (hi & 0xffffffff00000000ULL) | lo;
108 }
109
110 static uint64_t set_low16(uint64_t hi, uint16_t lo)
111 {
112         return (hi & 0xffffffffffff0000ULL) | lo;
113 }
114
115 static uint64_t set_low8(uint64_t hi, uint8_t lo)
116 {
117         return (hi & 0xffffffffffffff00ULL) | lo;
118 }
119
120 /* this may be the only register that needs special handling.
121  * If there others then we might want to extend teh emmsr struct.
122  */
123 int emsr_miscenable(struct vmctl *vcpu, struct emmsr *msr,
124                     uint32_t opcode) {
125         uint32_t eax, edx;
126         rdmsr(msr->reg, eax, edx);
127         /* we just let them read the misc msr for now. */
128         if (opcode == EXIT_REASON_MSR_READ) {
129                 vcpu->regs.tf_rax = set_low32(vcpu->regs.tf_rax, eax);
130                 vcpu->regs.tf_rax |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
131                 vcpu->regs.tf_rdx = set_low32(vcpu->regs.tf_rdx, edx);
132                 return 0;
133         } else {
134                 /* if they are writing what is already written, that's ok. */
135                 if (((uint32_t) vcpu->regs.tf_rax == eax)
136                     && ((uint32_t) vcpu->regs.tf_rdx == edx))
137                         return 0;
138         }
139         fprintf(stderr, 
140                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
141                  msr->name, (uint32_t) vcpu->regs.tf_rdx,
142                  (uint32_t) vcpu->regs.tf_rax, edx, eax);
143         return SHUTDOWN_UNHANDLED_EXIT_REASON;
144 }
145
146 int emsr_mustmatch(struct vmctl *vcpu, struct emmsr *msr,
147                    uint32_t opcode) {
148         uint32_t eax, edx;
149         rdmsr(msr->reg, eax, edx);
150         /* we just let them read the misc msr for now. */
151         if (opcode == EXIT_REASON_MSR_READ) {
152                 vcpu->regs.tf_rax = set_low32(vcpu->regs.tf_rax, eax);
153                 vcpu->regs.tf_rdx = set_low32(vcpu->regs.tf_rdx, edx);
154                 return 0;
155         } else {
156                 /* if they are writing what is already written, that's ok. */
157                 if (((uint32_t) vcpu->regs.tf_rax == eax)
158                     && ((uint32_t) vcpu->regs.tf_rdx == edx))
159                         return 0;
160         }
161         fprintf(stderr,
162                 "%s: Wanted to write 0x%x:0x%x, but could not; value was 0x%x:0x%x\n",
163                  msr->name, (uint32_t) vcpu->regs.tf_rdx,
164                  (uint32_t) vcpu->regs.tf_rax, edx, eax);
165         return SHUTDOWN_UNHANDLED_EXIT_REASON;
166 }
167
168 int emsr_ok(struct vmctl *vcpu, struct emmsr *msr, uint32_t opcode)
169 {
170         if (opcode == EXIT_REASON_MSR_READ) {
171                 rdmsr(msr->reg, vcpu->regs.tf_rdx, vcpu->regs.tf_rax);
172         } else {
173                 uint64_t val =
174                         (uint64_t) vcpu->regs.tf_rdx << 32 | vcpu->regs.tf_rax;
175                 write_msr(msr->reg, val);
176         }
177         return 0;
178 }
179
180 int emsr_readonly(struct vmctl *vcpu, struct emmsr *msr, uint32_t opcode)
181 {
182         uint32_t eax, edx;
183         rdmsr((uint32_t) vcpu->regs.tf_rcx, eax, edx);
184         /* we just let them read the misc msr for now. */
185         if (opcode == EXIT_REASON_MSR_READ) {
186                 vcpu->regs.tf_rax = set_low32(vcpu->regs.tf_rax, eax);
187                 vcpu->regs.tf_rdx = set_low32(vcpu->regs.tf_rdx, edx);
188                 return 0;
189         }
190
191         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
192         return SHUTDOWN_UNHANDLED_EXIT_REASON;
193 }
194
195 int emsr_readzero(struct vmctl *vcpu, struct emmsr *msr, uint32_t opcode)
196 {
197         if (opcode == EXIT_REASON_MSR_READ) {
198                 vcpu->regs.tf_rax = 0;
199                 vcpu->regs.tf_rdx = 0;
200                 return 0;
201         }
202
203         fprintf(stderr,"%s: Tried to write a readonly register\n", msr->name);
204         return SHUTDOWN_UNHANDLED_EXIT_REASON;
205 }
206
207 /* pretend to write it, but don't write it. */
208 int emsr_fakewrite(struct vmctl *vcpu, struct emmsr *msr, uint32_t opcode)
209 {
210         uint32_t eax, edx;
211         if (!msr->written) {
212                 rdmsr(msr->reg, eax, edx);
213         } else {
214                 edx = msr->edx;
215                 eax = msr->eax;
216         }
217         /* we just let them read the misc msr for now. */
218         if (opcode == EXIT_REASON_MSR_READ) {
219                 vcpu->regs.tf_rax = set_low32(vcpu->regs.tf_rax, eax);
220                 vcpu->regs.tf_rdx = set_low32(vcpu->regs.tf_rdx, edx);
221                 return 0;
222         } else {
223                 /* if they are writing what is already written, that's ok. */
224                 if (((uint32_t) vcpu->regs.tf_rax == eax)
225                     && ((uint32_t) vcpu->regs.tf_rdx == edx))
226                         return 0;
227                 msr->edx = vcpu->regs.tf_rdx;
228                 msr->eax = vcpu->regs.tf_rax;
229                 msr->written = true;
230         }
231         return 0;
232 }
233
234 int
235 msrio(struct vmctl *vcpu, uint32_t opcode) {
236         int i;
237         for (i = 0; i < sizeof(emmsrs)/sizeof(emmsrs[0]); i++) {
238                 if (emmsrs[i].reg != vcpu->regs.tf_rcx)
239                         continue;
240                 return emmsrs[i].f(vcpu, &emmsrs[i], opcode);
241         }
242         fprintf(stderr,"msrio for 0x%lx failed\n", vcpu->regs.tf_rcx);
243         return SHUTDOWN_UNHANDLED_EXIT_REASON;
244 }
245