Enable the PFM sampling to pass an 64bit info value
[akaros.git] / kern / arch / x86 / perfmon.c
1 /* Copyright (c) 2015 Google Inc
2  * Davide Libenzi <dlibenzi@google.com>
3  * See LICENSE for details.
4  */
5
6 #include <sys/types.h>
7 #include <arch/ros/msr-index.h>
8 #include <arch/ros/membar.h>
9 #include <arch/x86.h>
10 #include <arch/msr.h>
11 #include <arch/uaccess.h>
12 #include <ros/errno.h>
13 #include <assert.h>
14 #include <trap.h>
15 #include <smp.h>
16 #include <atomic.h>
17 #include <core_set.h>
18 #include <kref.h>
19 #include <percpu.h>
20 #include <kmalloc.h>
21 #include <err.h>
22 #include <string.h>
23 #include <profiler.h>
24 #include <arch/perfmon.h>
25
26 #define FIXCNTR_NBITS 4
27 #define FIXCNTR_MASK (((uint64_t) 1 << FIXCNTR_NBITS) - 1)
28
29 struct perfmon_cpu_context {
30         spinlock_t lock;
31         struct perfmon_event counters[MAX_VAR_COUNTERS];
32         struct perfmon_event fixed_counters[MAX_FIX_COUNTERS];
33 };
34
35 struct perfmon_status_env {
36         struct perfmon_alloc *pa;
37         struct perfmon_status *pef;
38 };
39
40 static struct perfmon_cpu_caps cpu_caps;
41 static DEFINE_PERCPU(struct perfmon_cpu_context, counters_env);
42 DEFINE_PERCPU_INIT(perfmon_counters_env_init);
43
44 static void perfmon_counters_env_init(void)
45 {
46         for (int i = 0; i < num_cores; i++) {
47                 struct perfmon_cpu_context *cctx = _PERCPU_VARPTR(counters_env, i);
48
49                 spinlock_init_irqsave(&cctx->lock);
50         }
51 }
52
53 static void perfmon_read_cpu_caps(struct perfmon_cpu_caps *pcc)
54 {
55         uint32_t a, b, c, d;
56
57         cpuid(0x0a, 0, &a, &b, &c, &d);
58
59         pcc->proc_arch_events = a >> 24;
60         pcc->bits_x_counter = (a >> 16) & 0xff;
61         pcc->counters_x_proc = (a >> 8) & 0xff;
62         pcc->bits_x_fix_counter = (d >> 5) & 0xff;
63         pcc->fix_counters_x_proc = d & 0x1f;
64         wmb_f();
65         pcc->perfmon_version = a & 0xff;
66 }
67
68 static void perfmon_enable_event(int event, bool enable)
69 {
70         uint64_t gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
71
72         if (enable)
73                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl | (1 << event));
74         else
75                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl & ~(1 << event));
76 }
77
78 static void perfmon_enable_fix_event(int event, bool enable)
79 {
80         uint64_t gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
81
82         if (enable)
83                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL,
84                                   gctrl | ((uint64_t) 1 << (32 + event)));
85         else
86                 write_msr(MSR_CORE_PERF_GLOBAL_CTRL,
87                                   gctrl & ~((uint64_t) 1 << (32 + event)));
88 }
89
90 static bool perfmon_event_available(uint32_t event)
91 {
92         return read_msr(MSR_ARCH_PERFMON_EVENTSEL0 + event) == 0;
93 }
94
95 static uint64_t perfmon_get_fixevent_mask(const struct perfmon_event *pev,
96                                                                                   int eventno, uint64_t base)
97 {
98         uint64_t m = 0;
99
100         if (PMEV_GET_EN(pev->event))
101                 m |= 1 << 3;
102         if (PMEV_GET_OS(pev->event))
103                 m |= (1 << 0);
104         if (PMEV_GET_USR(pev->event))
105                 m |= (1 << 1);
106
107         m <<= eventno * FIXCNTR_NBITS;
108         m |= base & ~(FIXCNTR_MASK << (eventno * FIXCNTR_NBITS));
109
110         return m;
111 }
112
113 static void perfmon_do_cores_alloc(void *opaque)
114 {
115         struct perfmon_alloc *pa = (struct perfmon_alloc *) opaque;
116         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
117         int i;
118
119         spin_lock_irqsave(&cctx->lock);
120         if (perfmon_is_fixed_event(&pa->ev)) {
121                 uint64_t fxctrl_value = read_msr(MSR_CORE_PERF_FIXED_CTR_CTRL), tmp;
122
123                 i = PMEV_GET_EVENT(pa->ev.event);
124                 if (i >= (int) cpu_caps.fix_counters_x_proc) {
125                         i = -EINVAL;
126                 } else if (fxctrl_value & (FIXCNTR_MASK << i)) {
127                         i = -EBUSY;
128                 } else {
129                         cctx->fixed_counters[i] = pa->ev;
130                         PMEV_SET_EN(cctx->fixed_counters[i].event, 1);
131
132                         tmp = perfmon_get_fixevent_mask(&pa->ev, i, fxctrl_value);
133
134                         perfmon_enable_fix_event(i, TRUE);
135
136                         write_msr(MSR_CORE_PERF_FIXED_CTR0 + i,
137                                           -(int64_t) pa->ev.trigger_count);
138                         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL, tmp);
139                 }
140         } else {
141                 for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
142                         if (cctx->counters[i].event == 0) {
143                                 if (!perfmon_event_available(i))
144                                         warn_once("Counter %d is free but not available", i);
145                                 else
146                                         break;
147                         }
148                 }
149                 if (i < (int) cpu_caps.counters_x_proc) {
150                         cctx->counters[i] = pa->ev;
151                         PMEV_SET_EN(cctx->counters[i].event, 1);
152
153                         perfmon_enable_event(i, TRUE);
154
155                         write_msr(MSR_IA32_PERFCTR0 + i, -(int64_t) pa->ev.trigger_count);
156                         write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + i,
157                                           cctx->counters[i].event);
158                 } else {
159                         i = -ENOSPC;
160                 }
161         }
162         spin_unlock_irqsave(&cctx->lock);
163
164         pa->cores_counters[core_id()] = (counter_t) i;
165 }
166
167 static void perfmon_do_cores_free(void *opaque)
168 {
169         struct perfmon_alloc *pa = (struct perfmon_alloc *) opaque;
170         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
171         int err = 0, coreno = core_id();
172         counter_t ccno = pa->cores_counters[coreno];
173
174         spin_lock_irqsave(&cctx->lock);
175         if (perfmon_is_fixed_event(&pa->ev)) {
176                 unsigned int ccbitsh = ccno * FIXCNTR_NBITS;
177                 uint64_t fxctrl_value = read_msr(MSR_CORE_PERF_FIXED_CTR_CTRL);
178
179                 if ((ccno >= cpu_caps.fix_counters_x_proc) ||
180                         !(fxctrl_value & (FIXCNTR_MASK << ccbitsh))) {
181                         err = -ENOENT;
182                 } else {
183                         perfmon_init_event(&cctx->fixed_counters[ccno]);
184
185                         perfmon_enable_fix_event((int) ccno, FALSE);
186
187                         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL,
188                                           fxctrl_value & ~(FIXCNTR_MASK << ccbitsh));
189                         write_msr(MSR_CORE_PERF_FIXED_CTR0 + ccno, 0);
190                 }
191         } else {
192                 if (ccno < (int) cpu_caps.counters_x_proc) {
193                         perfmon_init_event(&cctx->counters[ccno]);
194
195                         perfmon_enable_event((int) ccno, FALSE);
196
197                         write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + ccno, 0);
198                         write_msr(MSR_IA32_PERFCTR0 + ccno, 0);
199                 } else {
200                         err = -ENOENT;
201                 }
202         }
203         spin_unlock_irqsave(&cctx->lock);
204
205         pa->cores_counters[coreno] = (counter_t) err;
206 }
207
208 static void perfmon_do_cores_status(void *opaque)
209 {
210         struct perfmon_status_env *env = (struct perfmon_status_env *) opaque;
211         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
212         int coreno = core_id();
213         counter_t ccno = env->pa->cores_counters[coreno];
214
215         spin_lock_irqsave(&cctx->lock);
216         if (perfmon_is_fixed_event(&env->pa->ev))
217                 env->pef->cores_values[coreno] =
218                         read_msr(MSR_CORE_PERF_FIXED_CTR0 + ccno);
219         else
220                 env->pef->cores_values[coreno] =
221                         read_msr(MSR_IA32_PERFCTR0 + ccno);
222         spin_unlock_irqsave(&cctx->lock);
223 }
224
225 static void perfmon_setup_alloc_core_set(const struct perfmon_alloc *pa,
226                                                                                  struct core_set *cset)
227 {
228         int i;
229
230         core_set_init(cset);
231         for (i = 0; i < num_cores; i++) {
232                 if (pa->cores_counters[i] >= 0)
233                         core_set_setcpu(cset, i);
234         }
235 }
236
237 static void perfmon_cleanup_cores_alloc(struct perfmon_alloc *pa)
238 {
239         struct core_set cset;
240
241         perfmon_setup_alloc_core_set(pa, &cset);
242         smp_do_in_cores(&cset, perfmon_do_cores_free, pa);
243 }
244
245 static void perfmon_free_alloc(struct perfmon_alloc *pa)
246 {
247         kfree(pa);
248 }
249
250 static void perfmon_destroy_alloc(struct perfmon_alloc *pa)
251 {
252         if (pa) {
253                 perfmon_cleanup_cores_alloc(pa);
254                 perfmon_free_alloc(pa);
255         }
256 }
257
258 static void perfmon_release_alloc(struct kref *kref)
259 {
260         struct perfmon_alloc *pa = container_of(kref, struct perfmon_alloc, ref);
261
262         perfmon_destroy_alloc(pa);
263 }
264
265 static struct perfmon_alloc *perfmon_create_alloc(const struct perfmon_event *pev)
266 {
267         int i;
268         struct perfmon_alloc *pa = kzmalloc(sizeof(struct perfmon_alloc) +
269                                                                                 num_cores * sizeof(counter_t),
270                                                                                 KMALLOC_WAIT);
271
272         kref_init(&pa->ref, perfmon_release_alloc, 1);
273         pa->ev = *pev;
274         for (i = 0; i < num_cores; i++)
275                 pa->cores_counters[i] = INVALID_COUNTER;
276
277         return pa;
278 }
279
280 static struct perfmon_status *perfmon_alloc_status(void)
281 {
282         struct perfmon_status *pef = kzmalloc(sizeof(struct perfmon_status) +
283                                                                                   num_cores * sizeof(uint64_t),
284                                                                                   KMALLOC_WAIT);
285
286         return pef;
287 }
288
289 static void perfmon_arm_irq(void)
290 {
291         write_mmreg32(LAPIC_LVT_PERFMON, IdtLAPIC_PCINT);
292 }
293
294 void perfmon_init(void)
295 {
296         int i;
297
298         /* Enable user level access to the performance counters */
299         lcr4(rcr4() | CR4_PCE);
300
301         /* This will be called from every core, no need to execute more than once.
302          * All the call to perfmon_init() will be done when the core boots, so
303          * they will be no perfmon users calling it, while perfmon_read_cpu_caps()
304          * is executing.
305          * All the cores will be writing the same values, so even from that POV,
306          * no serialization is required.
307          */
308         if (cpu_caps.perfmon_version == 0)
309                 perfmon_read_cpu_caps(&cpu_caps);
310
311         /* Reset all the counters and selectors to zero.
312          */
313         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
314         for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
315                 write_msr(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0);
316                 write_msr(MSR_IA32_PERFCTR0 + i, 0);
317         }
318         write_msr(MSR_CORE_PERF_FIXED_CTR_CTRL, 0);
319         for (i = 0; i < (int) cpu_caps.fix_counters_x_proc; i++)
320                 write_msr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
321
322         perfmon_arm_irq();
323 }
324
325 static uint64_t perfmon_make_sample_event(const struct perfmon_event *pev)
326 {
327         uint64_t ei = ((uint64_t) PMEV_GET_MASK(pev->event) << 8) |
328                 PMEV_GET_EVENT(pev->event);
329
330         if (perfmon_is_fixed_event(pev))
331                 ei |= 1 << 16;
332
333         return PROF_MKINFO(PROF_DOM_PMU, ei);
334 }
335
336 void perfmon_interrupt(struct hw_trapframe *hw_tf, void *data)
337 {
338         int i;
339         struct perfmon_cpu_context *cctx = PERCPU_VARPTR(counters_env);
340         uint64_t gctrl, status;
341
342         spin_lock_irqsave(&cctx->lock);
343         /* We need to save the global control status, because we need to disable
344          * counters in order to be able to reset their values.
345          * We will restore the global control status on exit.
346          */
347         status = read_msr(MSR_CORE_PERF_GLOBAL_STATUS);
348         gctrl = read_msr(MSR_CORE_PERF_GLOBAL_CTRL);
349         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
350         for (i = 0; i < (int) cpu_caps.counters_x_proc; i++) {
351                 if (status & ((uint64_t) 1 << i)) {
352                         if (cctx->counters[i].event) {
353                                 profiler_add_hw_sample(
354                                         hw_tf, perfmon_make_sample_event(cctx->counters + i));
355                                 write_msr(MSR_IA32_PERFCTR0 + i,
356                                                   -(int64_t) cctx->counters[i].trigger_count);
357                         }
358                 }
359         }
360         for (i = 0; i < (int) cpu_caps.fix_counters_x_proc; i++) {
361                 if (status & ((uint64_t) 1 << (32 + i))) {
362                         if (cctx->fixed_counters[i].event) {
363                                 profiler_add_hw_sample(
364                                         hw_tf, perfmon_make_sample_event(cctx->fixed_counters + i));
365                                 write_msr(MSR_CORE_PERF_FIXED_CTR0 + i,
366                                                   -(int64_t) cctx->fixed_counters[i].trigger_count);
367                         }
368                 }
369         }
370         write_msr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
371         write_msr(MSR_CORE_PERF_GLOBAL_CTRL, gctrl);
372         spin_unlock_irqsave(&cctx->lock);
373
374         /* We need to re-arm the IRQ as the PFM IRQ gets masked on trigger.
375          * Note that KVM and real HW seems to be doing two different things WRT
376          * re-arming the IRQ. KVM re-arms does not mask the IRQ, while real HW does.
377          */
378         perfmon_arm_irq();
379 }
380
381 void perfmon_get_cpu_caps(struct perfmon_cpu_caps *pcc)
382 {
383         memcpy(pcc, &cpu_caps, sizeof(*pcc));
384 }
385
386 static int perfmon_install_session_alloc(struct perfmon_session *ps,
387                                                                                  struct perfmon_alloc *pa)
388 {
389         int i;
390
391         spin_lock(&ps->lock);
392         for (i = 0; (i < ARRAY_SIZE(ps->allocs)) && (ps->allocs[i] != NULL); i++)
393                 ;
394         if (likely(i < ARRAY_SIZE(ps->allocs)))
395                 ps->allocs[i] = pa;
396         else
397                 i = -ENFILE;
398         spin_unlock(&ps->lock);
399         if (unlikely(i < 0))
400                 error(-i, NULL);
401
402         return i;
403 }
404
405 int perfmon_open_event(const struct core_set *cset, struct perfmon_session *ps,
406                                            const struct perfmon_event *pev)
407 {
408         ERRSTACK(1);
409         int i;
410         struct perfmon_alloc *pa = perfmon_create_alloc(pev);
411
412         if (waserror()) {
413                 perfmon_destroy_alloc(pa);
414                 nexterror();
415         }
416         smp_do_in_cores(cset, perfmon_do_cores_alloc, pa);
417
418         for (i = 0; i < num_cores; i++) {
419                 if (core_set_getcpu(cset, i)) {
420                         counter_t ccno = pa->cores_counters[i];
421
422                         if (unlikely(ccno < 0)) {
423                                 perfmon_destroy_alloc(pa);
424                                 return (int) ccno;
425                         }
426                 }
427         }
428         /* The perfmon_alloc data structure will not be visible to userspace,
429          * until the perfmon_install_session_alloc() completes, and at that
430          * time the smp_do_in_cores(perfmon_do_cores_alloc) will have run on
431          * all cores.
432          * The perfmon_alloc data structure will never be changed once published.
433          */
434         i = perfmon_install_session_alloc(ps, pa);
435         poperror();
436
437         return i;
438 }
439
440 static void perfmon_alloc_get(struct perfmon_session *ps, int ped, bool reset,
441                                                           struct perfmon_alloc **ppa)
442 {
443         struct perfmon_alloc *pa;
444
445         if (unlikely((ped < 0) || (ped >= ARRAY_SIZE(ps->allocs))))
446                 error(EBADFD, NULL);
447         spin_lock(&ps->lock);
448         pa = ps->allocs[ped];
449         if (likely(pa)) {
450                 if (reset)
451                         ps->allocs[ped] = NULL;
452                 else
453                         kref_get(&pa->ref, 1);
454         }
455         spin_unlock(&ps->lock);
456         if (unlikely(!pa))
457                 error(ENOENT, NULL);
458         *ppa = pa;
459 }
460
461 void perfmon_close_event(struct perfmon_session *ps, int ped)
462 {
463         struct perfmon_alloc *pa;
464
465         perfmon_alloc_get(ps, ped, TRUE, &pa);
466         kref_put(&pa->ref);
467 }
468
469 struct perfmon_status *perfmon_get_event_status(struct perfmon_session *ps,
470                                                                                                 int ped)
471 {
472         struct core_set cset;
473         struct perfmon_status_env env;
474
475         perfmon_alloc_get(ps, ped, FALSE, &env.pa);
476         env.pef = perfmon_alloc_status();
477         perfmon_setup_alloc_core_set(env.pa, &cset);
478
479         smp_do_in_cores(&cset, perfmon_do_cores_status, &env);
480
481         kref_put(&env.pa->ref);
482
483         return env.pef;
484 }
485
486 void perfmon_free_event_status(struct perfmon_status *pef)
487 {
488         kfree(pef);
489 }
490
491 static void perfmon_release_session(struct kref *kref)
492 {
493         struct perfmon_session *ps = container_of(kref, struct perfmon_session,
494                                                                                           ref);
495         int i;
496
497         for (i = 0; i < ARRAY_SIZE(ps->allocs); i++) {
498                 struct perfmon_alloc *pa = ps->allocs[i];
499
500                 if (pa)
501                         kref_put(&pa->ref);
502         }
503         kfree(ps);
504 }
505
506 struct perfmon_session *perfmon_create_session(void)
507 {
508         struct perfmon_session *ps = kzmalloc(sizeof(struct perfmon_session),
509                                                                                   KMALLOC_WAIT);
510
511         kref_init(&ps->ref, perfmon_release_session, 1);
512         spinlock_init(&ps->lock);
513
514         return ps;
515 }
516
517 void perfmon_get_session(struct perfmon_session *ps)
518 {
519         kref_get(&ps->ref, 1);
520 }
521
522 void perfmon_close_session(struct perfmon_session *ps)
523 {
524         if (likely(ps))
525                 kref_put(&ps->ref);
526 }