x86: use setters/getters for MSR_{FS,GS}_BASE
[akaros.git] / kern / arch / x86 / perfmon.c
1 /* Copyright (c) 2015 Google Inc
2  * Davide Libenzi <dlibenzi@google.com>
3  * See LICENSE for details.
4  */
5
6 #include <sys/types.h>
7 #include <arch/ros/msr-index.h>
8 #include <arch/ros/membar.h>
9 #include <arch/x86.h>
10 #include <arch/msr.h>
11 #include <arch/uaccess.h>
12 #include <ros/errno.h>
13 #include <assert.h>
14 #include <trap.h>
15 #include <smp.h>
16 #include <atomic.h>
17 #include <core_set.h>
18 #include <kref.h>
19 #include <percpu.h>
20 #include <kmalloc.h>
21 #include <err.h>
22 #include <string.h>
23 #include <profiler.h>
24 #include <arch/perfmon.h>
25
26 #define FIXCNTR_NBITS 4
27 #define FIXCNTR_MASK (((uint64_t) 1 << FIXCNTR_NBITS) - 1)
28
29 struct perfmon_cpu_context {
30         spinlock_t lock;
31         struct perfmon_event counters[MAX_VAR_COUNTERS];
32         struct perfmon_event fixed_counters[MAX_FIX_COUNTERS];
33 };
34
35 struct perfmon_status_env {
36         struct perfmon_alloc *pa;
37         struct perfmon_status *pef;
38 };
39
40 static struct perfmon_cpu_caps cpu_caps;
41 static DEFINE_PERCPU(struct perfmon_cpu_context, counters_env);
42 DEFINE_PERCPU_INIT(perfmon_counters_env_init);
43
44 static void perfmon_counters_env_init(void)
45 {
46         for (int i = 0; i < num_cores; i++) {
47                 struct perfmon_cpu_context *cctx = _PERCPU_VARPTR(counters_env, i);
48
49                 spinlock_init_irqsave(&cctx->lock);
50         }
51 }
52
53 static void perfmon_read_cpu_caps(struct perfmon_cpu_caps *pcc)
54 {
55         uint32_t a, b, c, d;
56
57         cpuid(0x0a, 0, &a, &b, &c, &d);
58
59         pcc->proc_arch_events = a >> 24;
60         pcc->bits_x_counter = (a >> 16) & 0xff;
61         pcc->counters_x_proc = (a >> 8) & 0xff;
62         pcc->bits_x_fix_counter = (d >> 5) & 0xff;
63         pcc->fix_counters_x_proc = d & 0x1f;
64         pcc->perfmon_version = a & 0xff;
65 }
66
67 static void perfmon_enable_event(int event, bool enable)
68 {
69         uint64_t gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
70
71         if (enable)
72                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl | (1 << event));
73         else
74                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl & ~(1 << event));
75 }
76
77 static void perfmon_enable_fix_event(int event, bool enable)
78 {
79         uint64_t gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
80
81         if (enable)
82                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL,
83                                   gctrl | ((uint64_t) 1 << (32 + event)));
84         else
85                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL,
86                                   gctrl & ~((uint64_t) 1 << (32 + event)));
87 }
88
89 static bool perfmon_event_available(uint32_t event)
90 {
91         return read_msr(MSR_ARCH_PERFMON_EVENTSEL0 + event) == 0;
92 }
93
94 static uint64_t perfmon_get_fixevent_mask(const struct perfmon_event *pev,
95                                                                                   int eventno, uint64_t base)
96 {
97         uint64_t m = 0;
98
99         if (PMEV_GET_EN(pev->event))
100                 m |= 1 << 3;
101         if (PMEV_GET_OS(pev->event))
102                 m |= (1 << 0);
103         if (PMEV_GET_USR(pev->event))
104                 m |= (1 << 1);
105
106         m <<= eventno * FIXCNTR_NBITS;
107         m |= base & ~(FIXCNTR_MASK << (eventno * FIXCNTR_NBITS));
108
109         return m;
110 }
111
112 static void perfmon_do_cores_alloc(void *opaque)
113 {
114         struct perfmon_alloc *pa = (struct perfmon_alloc *) opaque;
115         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
116         int i;
117
118         spin_lock_irqsave(&cctx->lock);
119         if (perfmon_is_fixed_event(&pa->ev)) {
120                 uint64_t fxctrl_value = read_msr(MSR_CORE_PERF_FIXED_CTR_CTRL), tmp;
121
122                 i = PMEV_GET_EVENT(pa->ev.event);
123                 if (i >= (int) cpu_caps.fix_counters_x_proc) {
124                         i = -EINVAL;
125                 } else if (fxctrl_value & (FIXCNTR_MASK << i)) {
126                         i = -EBUSY;
127                 } else {
128                         cctx->fixed_counters[i] = pa->ev;
129                         PMEV_SET_EN(cctx->fixed_counters[i].event, 1);
130
131                         tmp = perfmon_get_fixevent_mask(&pa->ev, i, fxctrl_value);
132
133                         perfmon_enable_fix_event(i, TRUE);
134
135                         write_msr(MSR_CORE_PERF_FIXED_CTR0 + i,
136                                           -(int64_t) pa->ev.trigger_count);
137                         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL, tmp);
138                 }
139         } else {
140                 for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
141                         if (cctx->counters[i].event == 0) {
142                                 if (!perfmon_event_available(i))
143                                         warn_once("Counter %d is free but not available", i);
144                                 else
145                                         break;
146                         }
147                 }
148                 if (i < (int) cpu_caps.counters_x_proc) {
149                         cctx->counters[i] = pa->ev;
150                         PMEV_SET_EN(cctx->counters[i].event, 1);
151
152                         perfmon_enable_event(i, TRUE);
153
154                         write_msr(MSR_IA32_PERFCTR0 + i, -(int64_t) pa->ev.trigger_count);
155                         write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + i,
156                                           cctx->counters[i].event);
157                 } else {
158                         i = -ENOSPC;
159                 }
160         }
161         spin_unlock_irqsave(&cctx->lock);
162
163         pa->cores_counters[core_id()] = (counter_t) i;
164 }
165
166 static void perfmon_do_cores_free(void *opaque)
167 {
168         struct perfmon_alloc *pa = (struct perfmon_alloc *) opaque;
169         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
170         int err = 0, coreno = core_id();
171         counter_t ccno = pa->cores_counters[coreno];
172
173         spin_lock_irqsave(&cctx->lock);
174         if (perfmon_is_fixed_event(&pa->ev)) {
175                 unsigned int ccbitsh = ccno * FIXCNTR_NBITS;
176                 uint64_t fxctrl_value = read_msr(MSR_CORE_PERF_FIXED_CTR_CTRL);
177
178                 if ((ccno >= cpu_caps.fix_counters_x_proc) ||
179                         !(fxctrl_value & (FIXCNTR_MASK << ccbitsh))) {
180                         err = -ENOENT;
181                 } else {
182                         perfmon_init_event(&cctx->fixed_counters[ccno]);
183
184                         perfmon_enable_fix_event((int) ccno, FALSE);
185
186                         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL,
187                                           fxctrl_value & ~(FIXCNTR_MASK << ccbitsh));
188                         write_msr(MSR_CORE_PERF_FIXED_CTR0 + ccno, 0);
189                 }
190         } else {
191                 if (ccno < (int) cpu_caps.counters_x_proc) {
192                         perfmon_init_event(&cctx->counters[ccno]);
193
194                         perfmon_enable_event((int) ccno, FALSE);
195
196                         write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + ccno, 0);
197                         write_msr(MSR_IA32_PERFCTR0 + ccno, 0);
198                 } else {
199                         err = -ENOENT;
200                 }
201         }
202         spin_unlock_irqsave(&cctx->lock);
203
204         pa->cores_counters[coreno] = (counter_t) err;
205 }
206
207 static void perfmon_do_cores_status(void *opaque)
208 {
209         struct perfmon_status_env *env = (struct perfmon_status_env *) opaque;
210         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
211         int coreno = core_id();
212         counter_t ccno = env->pa->cores_counters[coreno];
213
214         spin_lock_irqsave(&cctx->lock);
215         if (perfmon_is_fixed_event(&env->pa->ev))
216                 env->pef->cores_values[coreno] =
217                         read_msr(MSR_CORE_PERF_FIXED_CTR0 + ccno);
218         else
219                 env->pef->cores_values[coreno] =
220                         read_msr(MSR_IA32_PERFCTR0 + ccno);
221         spin_unlock_irqsave(&cctx->lock);
222 }
223
224 static void perfmon_setup_alloc_core_set(const struct perfmon_alloc *pa,
225                                                                                  struct core_set *cset)
226 {
227         int i;
228
229         core_set_init(cset);
230         for (i = 0; i < num_cores; i++) {
231                 if (pa->cores_counters[i] >= 0)
232                         core_set_setcpu(cset, i);
233         }
234 }
235
236 static void perfmon_cleanup_cores_alloc(struct perfmon_alloc *pa)
237 {
238         struct core_set cset;
239
240         perfmon_setup_alloc_core_set(pa, &cset);
241         smp_do_in_cores(&cset, perfmon_do_cores_free, pa);
242 }
243
244 static void perfmon_free_alloc(struct perfmon_alloc *pa)
245 {
246         kfree(pa);
247 }
248
249 static void perfmon_destroy_alloc(struct perfmon_alloc *pa)
250 {
251         if (pa) {
252                 perfmon_cleanup_cores_alloc(pa);
253                 perfmon_free_alloc(pa);
254         }
255 }
256
257 static void perfmon_release_alloc(struct kref *kref)
258 {
259         struct perfmon_alloc *pa = container_of(kref, struct perfmon_alloc, ref);
260
261         perfmon_destroy_alloc(pa);
262 }
263
264 static struct perfmon_alloc *perfmon_create_alloc(const struct perfmon_event *pev)
265 {
266         int i;
267         struct perfmon_alloc *pa = kzmalloc(sizeof(struct perfmon_alloc) +
268                                                                                 num_cores * sizeof(counter_t),
269                                                                                 KMALLOC_WAIT);
270
271         kref_init(&pa->ref, perfmon_release_alloc, 1);
272         pa->ev = *pev;
273         for (i = 0; i < num_cores; i++)
274                 pa->cores_counters[i] = INVALID_COUNTER;
275
276         return pa;
277 }
278
279 static struct perfmon_status *perfmon_alloc_status(void)
280 {
281         struct perfmon_status *pef = kzmalloc(sizeof(struct perfmon_status) +
282                                                                                   num_cores * sizeof(uint64_t),
283                                                                                   KMALLOC_WAIT);
284
285         return pef;
286 }
287
288 static void perfmon_arm_irq(void)
289 {
290         apicrput(MSR_LAPIC_LVT_PERFMON, IdtLAPIC_PCINT);
291 }
292
293 bool perfmon_supported(void)
294 {
295         return cpu_caps.perfmon_version >= 2;
296 }
297
298 void perfmon_global_init(void)
299 {
300         perfmon_read_cpu_caps(&cpu_caps);
301 }
302
303 void perfmon_pcpu_init(void)
304 {
305         int i;
306
307         if (!perfmon_supported())
308                 return;
309         /* Enable user level access to the performance counters */
310         lcr4(rcr4() | CR4_PCE);
311
312         /* Reset all the counters and selectors to zero.
313          */
314         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
315         for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
316                 write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0);
317                 write_msr(MSR_IA32_PERFCTR0 + i, 0);
318         }
319         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
320         for (i = 0; i < (int) cpu_caps.fix_counters_x_proc; i++)
321                 write_msr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
322
323         perfmon_arm_irq();
324 }
325
326 static uint64_t perfmon_make_sample_event(const struct perfmon_event *pev)
327 {
328         uint64_t ei = ((uint64_t) PMEV_GET_MASK(pev->event) << 8) |
329                 PMEV_GET_EVENT(pev->event);
330
331         if (perfmon_is_fixed_event(pev))
332                 ei |= 1 << 16;
333
334         return PROF_MKINFO(PROF_DOM_PMU, ei);
335 }
336
337 void perfmon_interrupt(struct hw_trapframe *hw_tf, void *data)
338 {
339         int i;
340         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
341         uint64_t gctrl, status;
342
343         spin_lock_irqsave(&cctx->lock);
344         /* We need to save the global control status, because we need to disable
345          * counters in order to be able to reset their values.
346          * We will restore the global control status on exit.
347          */
348         status = read_msr(MSR_CORE_PERF_GLOBAL_STATUS);
349         gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
350         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
351         for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
352                 if (status & ((uint64_t) 1 << i)) {
353                         if (cctx->counters[i].event) {
354                                 profiler_add_hw_sample(
355                                         hw_tf, perfmon_make_sample_event(cctx->counters + i));
356                                 write_msr(MSR_IA32_PERFCTR0 + i,
357                                                   -(int64_t) cctx->counters[i].trigger_count);
358                         }
359                 }
360         }
361         for (i = 0; i < (int) cpu_caps.fix_counters_x_proc; i++) {
362                 if (status & ((uint64_t) 1 << (32 + i))) {
363                         if (cctx->fixed_counters[i].event) {
364                                 profiler_add_hw_sample(
365                                         hw_tf, perfmon_make_sample_event(cctx->fixed_counters + i));
366                                 write_msr(MSR_CORE_PERF_FIXED_CTR0 + i,
367                                                   -(int64_t) cctx->fixed_counters[i].trigger_count);
368                         }
369                 }
370         }
371         write_msr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
372         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl);
373         spin_unlock_irqsave(&cctx->lock);
374
375         /* We need to re-arm the IRQ as the PFM IRQ gets masked on trigger.
376          * Note that KVM and real HW seems to be doing two different things WRT
377          * re-arming the IRQ. KVM re-arms does not mask the IRQ, while real HW does.
378          */
379         perfmon_arm_irq();
380 }
381
382 void perfmon_get_cpu_caps(struct perfmon_cpu_caps *pcc)
383 {
384         memcpy(pcc, &cpu_caps, sizeof(*pcc));
385 }
386
387 static int perfmon_install_session_alloc(struct perfmon_session *ps,
388                                                                                  struct perfmon_alloc *pa)
389 {
390         int i;
391
392         spin_lock(&ps->lock);
393         for (i = 0; (i < ARRAY_SIZE(ps->allocs)) && (ps->allocs[i] != NULL); i++)
394                 ;
395         if (likely(i < ARRAY_SIZE(ps->allocs)))
396                 ps->allocs[i] = pa;
397         else
398                 i = -ENFILE;
399         spin_unlock(&ps->lock);
400         if (unlikely(i < 0))
401                 error(-i, ERROR_FIXME);
402
403         return i;
404 }
405
406 int perfmon_open_event(const struct core_set *cset, struct perfmon_session *ps,
407                                            const struct perfmon_event *pev)
408 {
409         ERRSTACK(1);
410         int i;
411         struct perfmon_alloc *pa = perfmon_create_alloc(pev);
412
413         if (waserror()) {
414                 perfmon_destroy_alloc(pa);
415                 nexterror();
416         }
417         smp_do_in_cores(cset, perfmon_do_cores_alloc, pa);
418
419         for (i = 0; i < num_cores; i++) {
420                 if (core_set_getcpu(cset, i)) {
421                         counter_t ccno = pa->cores_counters[i];
422
423                         if (unlikely(ccno < 0)) {
424                                 perfmon_destroy_alloc(pa);
425                                 return (int) ccno;
426                         }
427                 }
428         }
429         /* The perfmon_alloc data structure will not be visible to userspace,
430          * until the perfmon_install_session_alloc() completes, and at that
431          * time the smp_do_in_cores(perfmon_do_cores_alloc) will have run on
432          * all cores.
433          * The perfmon_alloc data structure will never be changed once published.
434          */
435         i = perfmon_install_session_alloc(ps, pa);
436         poperror();
437
438         return i;
439 }
440
441 static void perfmon_alloc_get(struct perfmon_session *ps, int ped, bool reset,
442                                                           struct perfmon_alloc **ppa)
443 {
444         struct perfmon_alloc *pa;
445
446         if (unlikely((ped < 0) || (ped >= ARRAY_SIZE(ps->allocs))))
447                 error(EBADFD, ERROR_FIXME);
448         spin_lock(&ps->lock);
449         pa = ps->allocs[ped];
450         if (likely(pa)) {
451                 if (reset)
452                         ps->allocs[ped] = NULL;
453                 else
454                         kref_get(&pa->ref, 1);
455         }
456         spin_unlock(&ps->lock);
457         if (unlikely(!pa))
458                 error(ENOENT, ERROR_FIXME);
459         *ppa = pa;
460 }
461
462 void perfmon_close_event(struct perfmon_session *ps, int ped)
463 {
464         struct perfmon_alloc *pa;
465
466         perfmon_alloc_get(ps, ped, TRUE, &pa);
467         kref_put(&pa->ref);
468 }
469
470 struct perfmon_status *perfmon_get_event_status(struct perfmon_session *ps,
471                                                                                                 int ped)
472 {
473         struct core_set cset;
474         struct perfmon_status_env env;
475
476         perfmon_alloc_get(ps, ped, FALSE, &env.pa);
477         env.pef = perfmon_alloc_status();
478         perfmon_setup_alloc_core_set(env.pa, &cset);
479
480         smp_do_in_cores(&cset, perfmon_do_cores_status, &env);
481
482         kref_put(&env.pa->ref);
483
484         return env.pef;
485 }
486
487 void perfmon_free_event_status(struct perfmon_status *pef)
488 {
489         kfree(pef);
490 }
491
492 static void perfmon_release_session(struct kref *kref)
493 {
494         struct perfmon_session *ps = container_of(kref, struct perfmon_session,
495                                                                                           ref);
496         int i;
497
498         for (i = 0; i < ARRAY_SIZE(ps->allocs); i++) {
499                 struct perfmon_alloc *pa = ps->allocs[i];
500
501                 if (pa)
502                         kref_put(&pa->ref);
503         }
504         kfree(ps);
505 }
506
507 struct perfmon_session *perfmon_create_session(void)
508 {
509         struct perfmon_session *ps = kzmalloc(sizeof(struct perfmon_session),
510                                                                                   KMALLOC_WAIT);
511
512         kref_init(&ps->ref, perfmon_release_session, 1);
513         spinlock_init(&ps->lock);
514
515         return ps;
516 }
517
518 void perfmon_get_session(struct perfmon_session *ps)
519 {
520         kref_get(&ps->ref, 1);
521 }
522
523 void perfmon_close_session(struct perfmon_session *ps)
524 {
525         if (likely(ps))
526                 kref_put(&ps->ref);
527 }