STFU useless printk messages
[akaros.git] / kern / src / profiler.c
1 /* Copyright (c) 2015 Google Inc
2  * Davide Libenzi <dlibenzi@google.com>
3  * See LICENSE for details.
4  */
5
6 #include <ros/common.h>
7 #include <ros/mman.h>
8 #include <sys/types.h>
9 #include <smp.h>
10 #include <trap.h>
11 #include <kthread.h>
12 #include <env.h>
13 #include <process.h>
14 #include <mm.h>
15 #include <vfs.h>
16 #include <kmalloc.h>
17 #include <pmap.h>
18 #include <kref.h>
19 #include <atomic.h>
20 #include <umem.h>
21 #include <elf.h>
22 #include <ns.h>
23 #include <err.h>
24 #include <string.h>
25 #include "profiler.h"
26
27 #define PROFILER_MAX_PRG_PATH   256
28 #define PROFILER_BT_DEPTH 16
29
30 #define VBE_MAX_SIZE(t) ((8 * sizeof(t) + 6) / 7)
31
32 struct profiler_cpu_context {
33         struct block *block;
34     int cpu;
35         int tracing;
36         size_t dropped_data_size;
37 };
38
39 static int profiler_queue_limit = 64 * 1024 * 1024;
40 static size_t profiler_cpu_buffer_size = 65536;
41 static qlock_t profiler_mtx = QLOCK_INITIALIZER(profiler_mtx);
42 static int tracing;
43 static struct kref profiler_kref;
44 static struct profiler_cpu_context *profiler_percpu_ctx;
45 static struct queue *profiler_queue;
46
47 static inline struct profiler_cpu_context *profiler_get_cpu_ctx(int cpu)
48 {
49         return profiler_percpu_ctx + cpu;
50 }
51
52 static inline char *vb_encode_uint64(char *data, uint64_t n)
53 {
54         /* Classical variable bytes encoding. Encodes 7 bits at a time, using bit
55          * number 7 in the byte, as indicator of end of sequence (when zero).
56          */
57         for (; n >= 0x80; n >>= 7)
58                 *data++ = (char) (n | 0x80);
59         *data++ = (char) n;
60
61         return data;
62 }
63
64 static struct block *profiler_buffer_write(struct profiler_cpu_context *cpu_buf,
65                                                                                    struct block *b)
66 {
67         if (b) {
68                 qibwrite(profiler_queue, b);
69
70                 if (qlen(profiler_queue) > profiler_queue_limit) {
71                         b = qget(profiler_queue);
72                         if (likely(b)) {
73                                 cpu_buf->dropped_data_size += BLEN(b);
74                                 freeb(b);
75                         }
76                 }
77         }
78
79         return iallocb(profiler_cpu_buffer_size);
80 }
81
82 static char *profiler_cpu_buffer_write_reserve(
83         struct profiler_cpu_context *cpu_buf, size_t size, struct block **pb)
84 {
85         struct block *b = cpu_buf->block;
86
87         if (unlikely((!b) || (b->lim - b->wp) < size)) {
88                 cpu_buf->block = b = profiler_buffer_write(cpu_buf, b);
89         if (unlikely(!b))
90                         return NULL;
91         }
92         *pb = b;
93
94         return (char *) b->wp;
95 }
96
97 static inline void profiler_cpu_buffer_write_commit(
98         struct profiler_cpu_context *cpu_buf, struct block *b, size_t size)
99 {
100         b->wp += size;
101 }
102
103 static inline size_t profiler_max_envelope_size(void)
104 {
105         return 2 * VBE_MAX_SIZE(uint64_t);
106 }
107
108 static void profiler_push_kernel_trace64(struct profiler_cpu_context *cpu_buf,
109                                                                                  const uintptr_t *trace, size_t count)
110 {
111         size_t i, size = sizeof(struct proftype_kern_trace64) +
112                 count * sizeof(uint64_t);
113         struct block *b;
114         char *resptr = profiler_cpu_buffer_write_reserve(
115                 cpu_buf, size + profiler_max_envelope_size(), &b);
116         char *ptr = resptr;
117
118         if (likely(ptr)) {
119                 struct proftype_kern_trace64 *record;
120
121                 ptr = vb_encode_uint64(ptr, PROFTYPE_KERN_TRACE64);
122                 ptr = vb_encode_uint64(ptr, size);
123
124                 record = (struct proftype_kern_trace64 *) ptr;
125                 ptr += size;
126
127                 record->tstamp = nsec();
128                 record->cpu = cpu_buf->cpu;
129                 record->num_traces = count;
130                 for (i = 0; i < count; i++)
131                         record->trace[i] = (uint64_t) trace[i];
132
133                 profiler_cpu_buffer_write_commit(cpu_buf, b, ptr - resptr);
134         }
135 }
136
137 static void profiler_push_user_trace64(struct profiler_cpu_context *cpu_buf,
138                                                                            struct proc *p, const uintptr_t *trace,
139                                                                            size_t count)
140 {
141         size_t i, size = sizeof(struct proftype_user_trace64) +
142                 count * sizeof(uint64_t);
143         struct block *b;
144         char *resptr = profiler_cpu_buffer_write_reserve(
145                 cpu_buf, size + profiler_max_envelope_size(), &b);
146         char *ptr = resptr;
147
148         if (likely(ptr)) {
149                 struct proftype_user_trace64 *record;
150
151                 ptr = vb_encode_uint64(ptr, PROFTYPE_USER_TRACE64);
152                 ptr = vb_encode_uint64(ptr, size);
153
154                 record = (struct proftype_user_trace64 *) ptr;
155                 ptr += size;
156
157                 record->tstamp = nsec();
158                 record->pid = p->pid;
159                 record->cpu = cpu_buf->cpu;
160                 record->num_traces = count;
161                 for (i = 0; i < count; i++)
162                         record->trace[i] = (uint64_t) trace[i];
163
164                 profiler_cpu_buffer_write_commit(cpu_buf, b, ptr - resptr);
165         }
166 }
167
168 static void profiler_push_pid_mmap(struct proc *p, uintptr_t addr, size_t msize,
169                                                                    size_t offset, const char *path)
170 {
171         size_t i, plen = strlen(path) + 1,
172                 size = sizeof(struct proftype_pid_mmap64) + plen;
173         char *resptr = kmalloc(size + profiler_max_envelope_size(), 0);
174
175         if (likely(resptr)) {
176                 char *ptr = resptr;
177                 struct proftype_pid_mmap64 *record;
178
179                 ptr = vb_encode_uint64(ptr, PROFTYPE_PID_MMAP64);
180                 ptr = vb_encode_uint64(ptr, size);
181
182                 record = (struct proftype_pid_mmap64 *) ptr;
183                 ptr += size;
184
185                 record->tstamp = nsec();
186                 record->pid = p->pid;
187                 record->addr = addr;
188                 record->size = msize;
189                 record->offset = offset;
190                 memcpy(record->path, path, plen);
191
192                 qiwrite(profiler_queue, resptr, (int) (ptr - resptr));
193
194                 kfree(resptr);
195         }
196 }
197
198 static void profiler_push_new_process(struct proc *p)
199 {
200         size_t i, plen = strlen(p->binary_path) + 1,
201                 size = sizeof(struct proftype_new_process) + plen;
202         char *resptr = kmalloc(size + profiler_max_envelope_size(), 0);
203
204         if (likely(resptr)) {
205                 char *ptr = resptr;
206                 struct proftype_new_process *record;
207
208                 ptr = vb_encode_uint64(ptr, PROFTYPE_NEW_PROCESS);
209                 ptr = vb_encode_uint64(ptr, size);
210
211                 record = (struct proftype_new_process *) ptr;
212                 ptr += size;
213
214                 record->tstamp = nsec();
215                 record->pid = p->pid;
216                 memcpy(record->path, p->binary_path, plen);
217
218                 qiwrite(profiler_queue, resptr, (int) (ptr - resptr));
219
220                 kfree(resptr);
221         }
222 }
223
224 static void profiler_emit_current_system_status(void)
225 {
226         void enum_proc(struct vm_region *vmr, void *opaque)
227         {
228                 struct proc *p = (struct proc *) opaque;
229
230                 profiler_notify_mmap(p, vmr->vm_base, vmr->vm_end - vmr->vm_base,
231                                                          vmr->vm_prot, vmr->vm_flags, vmr->vm_file,
232                                                          vmr->vm_foff);
233         }
234
235         ERRSTACK(1);
236         struct process_set pset;
237
238         proc_get_set(&pset);
239         if (waserror()) {
240                 proc_free_set(&pset);
241                 nexterror();
242         }
243
244         for (size_t i = 0; i < pset.num_processes; i++)
245                 enumerate_vmrs(pset.procs[i], enum_proc, pset.procs[i]);
246
247         poperror();
248         proc_free_set(&pset);
249 }
250
251 static inline bool profiler_is_tracing(struct profiler_cpu_context *cpu_buf)
252 {
253         if (unlikely(cpu_buf->tracing < 0)) {
254                 if (cpu_buf->block) {
255                         qibwrite(profiler_queue, cpu_buf->block);
256
257                         cpu_buf->block = NULL;
258                 }
259
260                 cpu_buf->tracing = 0;
261         }
262
263         return (cpu_buf->tracing != 0) ? TRUE : FALSE;
264 }
265
266 static void free_cpu_buffers(void)
267 {
268         kfree(profiler_percpu_ctx);
269         profiler_percpu_ctx = NULL;
270
271         if (profiler_queue) {
272                 qclose(profiler_queue);
273                 profiler_queue = NULL;
274         }
275 }
276
277 static void alloc_cpu_buffers(void)
278 {
279         ERRSTACK(1);
280         int i;
281
282         profiler_queue = qopen(profiler_queue_limit, 0, NULL, NULL);
283         if (!profiler_queue)
284                 error(ENOMEM, NULL);
285         if (waserror()) {
286                 free_cpu_buffers();
287                 nexterror();
288         }
289
290         qdropoverflow(profiler_queue, TRUE);
291         qnonblock(profiler_queue, TRUE);
292
293         profiler_percpu_ctx =
294                 kzmalloc(sizeof(*profiler_percpu_ctx) * num_cores, KMALLOC_WAIT);
295
296         for (i = 0; i < num_cores; i++) {
297                 struct profiler_cpu_context *b = &profiler_percpu_ctx[i];
298
299                 b->cpu = i;
300         }
301 }
302
303 static long profiler_get_checked_value(const char *value, long k, long minval,
304                                                                            long maxval)
305 {
306         long lvalue = strtol(value, NULL, 0) * k;
307
308         if (lvalue < minval)
309                 error(EFAIL, "Value should be greater than %ld", minval);
310         if (lvalue > maxval)
311                 error(EFAIL, "Value should be lower than %ld", maxval);
312
313         return lvalue;
314 }
315
316 int profiler_configure(struct cmdbuf *cb)
317 {
318         if (!strcmp(cb->f[0], "prof_qlimit")) {
319                 if (cb->nf < 2)
320                         error(EFAIL, "prof_qlimit KB");
321                 if (kref_refcnt(&profiler_kref) > 0)
322                         error(EFAIL, "Profiler already running");
323                 profiler_queue_limit = (int) profiler_get_checked_value(
324                         cb->f[1], 1024, 1024 * 1024, max_pmem / 32);
325         } else if (!strcmp(cb->f[0], "prof_cpubufsz")) {
326                 if (cb->nf < 2)
327                         error(EFAIL, "prof_cpubufsz KB");
328                 profiler_cpu_buffer_size = (size_t) profiler_get_checked_value(
329                         cb->f[1], 1024, 16 * 1024, 1024 * 1024);
330         } else {
331                 return 0;
332         }
333
334         return 1;
335 }
336
337 const char* const *profiler_configure_cmds(void)
338 {
339         static const char * const cmds[] = {
340                 "prof_qlimit", "prof_cpubufsz",
341                 NULL
342         };
343
344         return cmds;
345 }
346
347 static void profiler_release(struct kref *kref)
348 {
349         bool got_reference = FALSE;
350
351         assert(kref == &profiler_kref);
352         qlock(&profiler_mtx);
353         /* Make sure we did not race with profiler_setup(), that got the
354          * profiler_mtx lock just before us, and re-initialized the profiler
355          * for a new user.
356          * If we race here from another profiler_release() (user did a
357          * profiler_setup() immediately followed by a profiler_cleanup()) we are
358          * fine because free_cpu_buffers() can be called multiple times.
359          */
360         if (!kref_get_not_zero(kref, 1))
361                 free_cpu_buffers();
362         else
363                 got_reference = TRUE;
364         qunlock(&profiler_mtx);
365         /* We cannot call kref_put() within the profiler_kref lock, as such call
366          * might trigger anohter call to profiler_release().
367          */
368         if (got_reference)
369                 kref_put(kref);
370 }
371
372 void profiler_init(void)
373 {
374         assert(kref_refcnt(&profiler_kref) == 0);
375         kref_init(&profiler_kref, profiler_release, 0);
376 }
377
378 void profiler_setup(void)
379 {
380         ERRSTACK(1);
381
382         qlock(&profiler_mtx);
383         if (waserror()) {
384                 qunlock(&profiler_mtx);
385                 nexterror();
386         }
387         if (!profiler_queue)
388                 alloc_cpu_buffers();
389
390         profiler_emit_current_system_status();
391
392         /* Do this only when everything is initialized (as last init operation).
393          */
394         __kref_get(&profiler_kref, 1);
395
396         poperror();
397         qunlock(&profiler_mtx);
398 }
399
400 void profiler_cleanup(void)
401 {
402         kref_put(&profiler_kref);
403 }
404
405 void profiler_control_trace(int onoff)
406 {
407         int core;
408
409         tracing = onoff;
410         for (core = 0; core < num_cores; core++) {
411                 struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core);
412
413                 /*
414                  * We cannot access directly other CPU buffers from here, in order
415                  * to issue a flush. So, when disabling, we set tracing = -1, and
416                  * we let profiler_is_tracing() to perform it at the next timer tick.
417                  */
418                 cpu_buf->tracing = onoff ? 1 : -1;
419         }
420 }
421
422 void profiler_add_trace(uintptr_t pc)
423 {
424         if (is_user_raddr((void *) pc, 1))
425                 profiler_add_user_backtrace(pc, 0);
426         else
427                 profiler_add_kernel_backtrace(pc, 0);
428 }
429
430 void profiler_add_kernel_backtrace(uintptr_t pc, uintptr_t fp)
431 {
432         if (kref_get_not_zero(&profiler_kref, 1)) {
433                 struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core_id());
434
435                 if (profiler_percpu_ctx && profiler_is_tracing(cpu_buf)) {
436                         uintptr_t trace[PROFILER_BT_DEPTH];
437                         size_t n = 1;
438
439                         trace[0] = pc;
440                         if (likely(fp))
441                                 n = backtrace_list(pc, fp, trace + 1,
442                                                                    PROFILER_BT_DEPTH - 1) + 1;
443
444                         profiler_push_kernel_trace64(cpu_buf, trace, n);
445                 }
446                 kref_put(&profiler_kref);
447         }
448 }
449
450 void profiler_add_user_backtrace(uintptr_t pc, uintptr_t fp)
451 {
452         if (kref_get_not_zero(&profiler_kref, 1)) {
453                 struct proc *p = current;
454                 struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core_id());
455
456                 if (p && profiler_percpu_ctx && profiler_is_tracing(cpu_buf)) {
457                         uintptr_t trace[PROFILER_BT_DEPTH];
458                         size_t n = 1;
459
460                         trace[0] = pc;
461                         if (likely(fp))
462                                 n = backtrace_user_list(pc, fp, trace + 1,
463                                                                                 PROFILER_BT_DEPTH - 1) + 1;
464
465                         profiler_push_user_trace64(cpu_buf, p, trace, n);
466                 }
467                 kref_put(&profiler_kref);
468         }
469 }
470
471 void profiler_add_hw_sample(struct hw_trapframe *hw_tf)
472 {
473         if (in_kernel(hw_tf))
474                 profiler_add_kernel_backtrace(get_hwtf_pc(hw_tf), get_hwtf_fp(hw_tf));
475         else
476                 profiler_add_user_backtrace(get_hwtf_pc(hw_tf), get_hwtf_fp(hw_tf));
477 }
478
479 int profiler_size(void)
480 {
481         return profiler_queue ? qlen(profiler_queue) : 0;
482 }
483
484 int profiler_read(void *va, int n)
485 {
486         return profiler_queue ? qread(profiler_queue, va, n) : 0;
487 }
488
489 void profiler_notify_mmap(struct proc *p, uintptr_t addr, size_t size, int prot,
490                                                   int flags, struct file *f, size_t offset)
491 {
492         if (kref_get_not_zero(&profiler_kref, 1)) {
493                 if (f && (prot & PROT_EXEC) && profiler_percpu_ctx && tracing) {
494                         char path_buf[PROFILER_MAX_PRG_PATH];
495                         char *path = file_abs_path(f, path_buf, sizeof(path_buf));
496
497                         if (likely(path))
498                                 profiler_push_pid_mmap(p, addr, size, offset, path);
499                 }
500                 kref_put(&profiler_kref);
501         }
502 }
503
504 void profiler_notify_new_process(struct proc *p)
505 {
506         if (kref_get_not_zero(&profiler_kref, 1)) {
507                 if (profiler_percpu_ctx && tracing && p->binary_path)
508                         profiler_push_new_process(p);
509                 kref_put(&profiler_kref);
510         }
511 }