kprof: use parsecmd() instead of strncmp on user pointers
[akaros.git] / kern / drivers / dev / kprof.c
1 /*
2  * This file is part of the UCB release of Plan 9. It is subject to the license
3  * terms in the LICENSE file found in the top-level directory of this
4  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
5  * part of the UCB release of Plan 9, including this file, may be copied,
6  * modified, propagated, or distributed except according to the terms contained
7  * in the LICENSE file.
8  */
9
10 #include <ros/profiler_records.h>
11 #include <arch/time.h>
12 #include <slab.h>
13 #include <kmalloc.h>
14 #include <kref.h>
15 #include <atomic.h>
16 #include <kthread.h>
17 #include <string.h>
18 #include <stdio.h>
19 #include <assert.h>
20 #include <error.h>
21 #include <pmap.h>
22 #include <smp.h>
23 #include <time.h>
24 #include <circular_buffer.h>
25 #include <umem.h>
26 #include <profiler.h>
27 #include <kprof.h>
28 #include <ros/procinfo.h>
29 #include <init.h>
30
31 #define KTRACE_BUFFER_SIZE (128 * 1024)
32 #define TRACE_PRINTK_BUFFER_SIZE (8 * 1024)
33
34 enum {
35         Kprofdirqid = 0,
36         Kprofdataqid,
37         Kprofctlqid,
38         Kptracectlqid,
39         Kptraceqid,
40         Kprintxqid,
41         Kmpstatqid,
42         Kmpstatrawqid,
43 };
44
45 struct trace_printk_buffer {
46         atomic_t in_use;
47         char buffer[TRACE_PRINTK_BUFFER_SIZE];
48 };
49
50 struct kprof {
51         qlock_t lock;
52         bool mpstat_ipi;
53         bool profiling;
54         bool opened;
55 };
56
57 struct dev kprofdevtab;
58 struct dirtab kproftab[] = {
59         {".",           {Kprofdirqid, 0, QTDIR},0,      DMDIR|0550},
60         {"kpdata",      {Kprofdataqid},         0,      0600},
61         {"kpctl",       {Kprofctlqid},          0,      0600},
62         {"kptrace_ctl", {Kptracectlqid},        0,      0660},
63         {"kptrace",     {Kptraceqid},           0,      0600},
64         {"kprintx",     {Kprintxqid},           0,      0600},
65         {"mpstat",      {Kmpstatqid},           0,      0600},
66         {"mpstat-raw",  {Kmpstatrawqid},        0,      0600},
67 };
68
69 static struct kprof kprof;
70 static bool ktrace_init_done = FALSE;
71 static spinlock_t ktrace_lock = SPINLOCK_INITIALIZER_IRQSAVE;
72 static struct circular_buffer ktrace_data;
73 static char ktrace_buffer[KTRACE_BUFFER_SIZE];
74 static char kprof_control_usage[128];
75
76 static size_t mpstat_len(void)
77 {
78         size_t each_row = 7 + NR_CPU_STATES * 26;
79
80         return each_row * (num_cores + 1) + 1;
81 }
82
83 static size_t mpstatraw_len(void)
84 {
85         size_t header_row = 27 + NR_CPU_STATES * 7 + 1;
86         size_t cpu_row = 7 + NR_CPU_STATES * 17;
87
88         return header_row + cpu_row * num_cores + 1;
89 }
90
91 static char *devname(void)
92 {
93         return kprofdevtab.name;
94 }
95
96 static struct chan *kprof_attach(char *spec)
97 {
98         return devattach(devname(), spec);
99 }
100
101 /* Start collecting samples from perf events into the profiler.
102  *
103  * This command only runs if the user successfully opened kpctl, which gives
104  * them a profiler (the global profiler, for now). */
105 static void kprof_start_profiler(void)
106 {
107         ERRSTACK(1);
108
109         qlock(&kprof.lock);
110         if (waserror()) {
111                 qunlock(&kprof.lock);
112                 nexterror();
113         }
114         if (!kprof.profiling) {
115                 profiler_start();
116                 kprof.profiling = TRUE;
117         }
118         poperror();
119         qunlock(&kprof.lock);
120 }
121
122 /* Stops collecting samples from perf events.
123  *
124  * This command only runs if the user successfully opened kpctl, which gives
125  * them a profiler (the global profiler, for now). */
126 static void kprof_stop_profiler(void)
127 {
128         ERRSTACK(1);
129
130         qlock(&kprof.lock);
131         if (waserror()) {
132                 qunlock(&kprof.lock);
133                 nexterror();
134         }
135         if (kprof.profiling) {
136                 profiler_stop();
137                 kprof.profiling = FALSE;
138         }
139         poperror();
140         qunlock(&kprof.lock);
141 }
142
143 /* Makes each core flush its results into the profiler queue.  You can do this
144  * while the profiler is still running.  However, this does not hang up the
145  * queue, so reads on kpdata will block. */
146 static void kprof_flush_profiler(void)
147 {
148         ERRSTACK(1);
149
150         qlock(&kprof.lock);
151         if (waserror()) {
152                 qunlock(&kprof.lock);
153                 nexterror();
154         }
155         if (kprof.profiling)
156                 profiler_trace_data_flush();
157         poperror();
158         qunlock(&kprof.lock);
159 }
160
161 static void kprof_init(void)
162 {
163         profiler_init();
164
165         qlock_init(&kprof.lock);
166         kprof.profiling = FALSE;
167         kprof.opened = FALSE;
168
169         for (int i = 0; i < ARRAY_SIZE(kproftab); i++)
170                 kproftab[i].length = 0;
171
172         kprof.mpstat_ipi = TRUE;
173         kproftab[Kmpstatqid].length = mpstat_len();
174         kproftab[Kmpstatrawqid].length = mpstatraw_len();
175
176         strlcpy(kprof_control_usage, "start|stop|flush",
177                 sizeof(kprof_control_usage));
178         profiler_append_configure_usage(kprof_control_usage,
179                                         sizeof(kprof_control_usage));
180 }
181
182 static void kprof_shutdown(void)
183 {
184 }
185
186 static struct walkqid *kprof_walk(struct chan *c, struct chan *nc, char **name,
187                                   unsigned int nname)
188 {
189         return devwalk(c, nc, name, nname, kproftab, ARRAY_SIZE(kproftab),
190                        devgen);
191 }
192
193 static size_t kprof_profdata_size(void)
194 {
195         return profiler_size();
196 }
197
198 static long kprof_profdata_read(void *dest, long size, int64_t off)
199 {
200         return profiler_read(dest, size);
201 }
202
203 static size_t kprof_stat(struct chan *c, uint8_t *db, size_t n)
204 {
205         kproftab[Kprofdataqid].length = kprof_profdata_size();
206         kproftab[Kptraceqid].length = kprof_tracedata_size();
207
208         return devstat(c, db, n, kproftab, ARRAY_SIZE(kproftab), devgen);
209 }
210
211 static struct chan *kprof_open(struct chan *c, int omode)
212 {
213         if (c->qid.type & QTDIR) {
214                 if (openmode(omode) != O_READ)
215                         error(EPERM, ERROR_FIXME);
216         }
217         switch ((int) c->qid.path) {
218         case Kprofctlqid:
219                 /* We have one global profiler.  Only one FD may be opened at a
220                  * time for it.  If we ever have separate profilers, we can
221                  * create the profiler here, and every open would get a separate
222                  * instance. */
223                 qlock(&kprof.lock);
224                 if (kprof.opened) {
225                         qunlock(&kprof.lock);
226                         error(EBUSY, "Global profiler is already open");
227                 }
228                 kprof.opened = TRUE;
229                 /* TODO: have a creation function for a non-global profiler */
230                 profiler_setup();
231                 qunlock(&kprof.lock);
232                 break;
233         }
234         c->mode = openmode(omode);
235         c->flag |= COPEN;
236         c->offset = 0;
237         return c;
238 }
239
240 static void kprof_close(struct chan *c)
241 {
242         if (c->flag & COPEN) {
243                 switch ((int) c->qid.path) {
244                 case Kprofctlqid:
245                         kprof_stop_profiler();
246                         qlock(&kprof.lock);
247                         profiler_cleanup();
248                         kprof.opened = FALSE;
249                         qunlock(&kprof.lock);
250                         break;
251                 }
252         }
253 }
254
255 static long mpstat_read(void *va, long n, int64_t off)
256 {
257         size_t bufsz = mpstat_len();
258         char *buf = kmalloc(bufsz, MEM_WAIT);
259         int len = 0;
260         struct per_cpu_info *pcpui;
261         uint64_t cpu_total;
262         struct timespec ts;
263
264         /* the IPI interferes with other cores, might want to disable that. */
265         if (kprof.mpstat_ipi)
266                 send_broadcast_ipi(I_POKE_CORE);
267
268         len += snprintf(buf + len, bufsz - len, "  CPU: ");
269         for (int j = 0; j < NR_CPU_STATES; j++)
270                 len += snprintf(buf + len, bufsz - len, "%23s%s",
271                                 cpu_state_names[j],
272                                 j != NR_CPU_STATES - 1 ? " " : "  \n");
273
274         for (int i = 0; i < num_cores; i++) {
275                 pcpui = &per_cpu_info[i];
276                 cpu_total = 0;
277                 len += snprintf(buf + len, bufsz - len, "%5d: ", i);
278                 for (int j = 0; j < NR_CPU_STATES; j++)
279                         cpu_total += pcpui->state_ticks[j];
280                 cpu_total = MAX(cpu_total, 1);  /* for the divide later */
281                 for (int j = 0; j < NR_CPU_STATES; j++) {
282                         ts = tsc2timespec(pcpui->state_ticks[j]);
283                         len += snprintf(buf + len, bufsz - len,
284                                         "%10d.%06d (%3d%%)%s",
285                                         ts.tv_sec, ts.tv_nsec / 1000,
286                                         MIN((pcpui->state_ticks[j] * 100) /
287                                             cpu_total, 100),
288                                         j != NR_CPU_STATES - 1 ? ", " : " \n");
289                 }
290         }
291         n = readstr(off, va, n, buf);
292         kfree(buf);
293         return n;
294 }
295
296 static long mpstatraw_read(void *va, long n, int64_t off)
297 {
298         size_t bufsz = mpstatraw_len();
299         char *buf = kmalloc(bufsz, MEM_WAIT);
300         int len = 0;
301         struct per_cpu_info *pcpui;
302
303         /* could spit it all out in binary, though then it'd be harder to
304          * process the data across a mnt (if we export #K).  probably not a big
305          * deal. */
306
307         /* header line: version, num_cores, tsc freq, state names */
308         len += snprintf(buf + len, bufsz - len, "v%03d %5d %16llu", 1,
309                         num_cores, __proc_global_info.tsc_freq);
310         for (int j = 0; j < NR_CPU_STATES; j++)
311                 len += snprintf(buf + len, bufsz - len, " %6s",
312                                 cpu_state_names[j]);
313         len += snprintf(buf + len, bufsz - len, "\n");
314
315         for (int i = 0; i < num_cores; i++) {
316                 pcpui = &per_cpu_info[i];
317                 len += snprintf(buf + len, bufsz - len, "%5d: ", i);
318                 for (int j = 0; j < NR_CPU_STATES; j++) {
319                         len += snprintf(buf + len, bufsz - len, "%16llx%s",
320                                         pcpui->state_ticks[j],
321                                         j != NR_CPU_STATES - 1 ? " " : "\n");
322                 }
323         }
324         n = readstr(off, va, n, buf);
325         kfree(buf);
326         return n;
327 }
328
329 static size_t kprof_read(struct chan *c, void *va, size_t n, off64_t off)
330 {
331         uint64_t w, *bp;
332         char *a, *ea;
333         uintptr_t offset = off;
334         uint64_t pc;
335
336         switch ((int) c->qid.path) {
337         case Kprofdirqid:
338                 return devdirread(c, va, n, kproftab, ARRAY_SIZE(kproftab),
339                                   devgen);
340         case Kprofdataqid:
341                 n = kprof_profdata_read(va, n, off);
342                 break;
343         case Kptraceqid:
344                 n = kprof_tracedata_read(va, n, off);
345                 break;
346         case Kprintxqid:
347                 n = readstr(offset, va, n, printx_on ? "on" : "off");
348                 break;
349         case Kmpstatqid:
350                 n = mpstat_read(va, n, offset);
351                 break;
352         case Kmpstatrawqid:
353                 n = mpstatraw_read(va, n, offset);
354                 break;
355         default:
356                 n = 0;
357                 break;
358         }
359         return n;
360 }
361
362 static size_t kprof_write(struct chan *c, void *a, size_t n, off64_t unused)
363 {
364         ERRSTACK(1);
365         struct cmdbuf *cb = parsecmd(a, n);
366
367         if (waserror()) {
368                 kfree(cb);
369                 nexterror();
370         }
371         switch ((int) c->qid.path) {
372         case Kprofctlqid:
373                 if (cb->nf < 1)
374                         error(EFAIL, kprof_control_usage);
375                 if (profiler_configure(cb))
376                         break;
377                 if (!strcmp(cb->f[0], "start")) {
378                         kprof_start_profiler();
379                 } else if (!strcmp(cb->f[0], "flush")) {
380                         kprof_flush_profiler();
381                 } else if (!strcmp(cb->f[0], "stop")) {
382                         kprof_stop_profiler();
383                 } else {
384                         error(EFAIL, kprof_control_usage);
385                 }
386                 break;
387         case Kptracectlqid:
388                 if (cb->nf < 1)
389                         error(EFAIL, "Bad kptrace_ctl option (reset)");
390                 if (!strcmp(cb->f[0], "clear")) {
391                         spin_lock_irqsave(&ktrace_lock);
392                         circular_buffer_clear(&ktrace_data);
393                         spin_unlock_irqsave(&ktrace_lock);
394                 }
395                 break;
396         case Kptraceqid:
397                 if (a && (n > 0)) {
398                         char *uptr = user_strdup_errno(current, a, n);
399
400                         if (uptr) {
401                                 trace_printk("%s", uptr);
402                                 user_memdup_free(current, uptr);
403                         } else {
404                                 n = -1;
405                         }
406                 }
407                 break;
408         case Kprintxqid:
409                 if (cb->nf < 1)
410                         error(EFAIL, "no printx option: (on|off|toggle)");
411                 if (!strcmp(cb->f[0], "on"))
412                         set_printx(1);
413                 else if (!strcmp(cb->f[0], "off"))
414                         set_printx(0);
415                 else if (!strcmp(cb->f[0], "toggle"))
416                         set_printx(2);
417                 else
418                         error(EFAIL, "bad printx option: (on|off|toggle)");
419                 break;
420         case Kmpstatqid:
421         case Kmpstatrawqid:
422                 if (cb->nf < 1)
423                         error(EFAIL, "Bad mpstat option (reset|ipi|on|off)");
424                 if (!strcmp(cb->f[0], "reset")) {
425                         for (int i = 0; i < num_cores; i++)
426                                 reset_cpu_state_ticks(i);
427                 } else if (!strcmp(cb->f[0], "on")) {
428                         /* TODO: enable the ticks */ ;
429                 } else if (!strcmp(cb->f[0], "off")) {
430                         /* TODO: disable the ticks */ ;
431                 } else if (!strcmp(cb->f[0], "ipi")) {
432                         if (cb->nf < 2)
433                                 error(EFAIL, "Need another arg: ipi [on|off]");
434                         if (!strcmp(cb->f[1], "on"))
435                                 kprof.mpstat_ipi = TRUE;
436                         else if (!strcmp(cb->f[1], "off"))
437                                 kprof.mpstat_ipi = FALSE;
438                         else
439                                 error(EFAIL, "ipi [on|off]");
440                 } else {
441                         error(EFAIL, "Bad mpstat option (reset|ipi|on|off)");
442                 }
443                 break;
444         default:
445                 error(EBADFD, ERROR_FIXME);
446         }
447         kfree(cb);
448         poperror();
449         return n;
450 }
451
452 size_t kprof_tracedata_size(void)
453 {
454         return circular_buffer_size(&ktrace_data);
455 }
456
457 size_t kprof_tracedata_read(void *data, size_t size, size_t offset)
458 {
459         spin_lock_irqsave(&ktrace_lock);
460         if (likely(ktrace_init_done))
461                 size = circular_buffer_read(&ktrace_data, data, size, offset);
462         else
463                 size = 0;
464         spin_unlock_irqsave(&ktrace_lock);
465
466         return size;
467 }
468
469 void kprof_dump_data(void)
470 {
471         void *buf;
472         size_t len = kprof_tracedata_size();
473
474         buf = kmalloc(len, MEM_WAIT);
475         kprof_tracedata_read(buf, len, 0);
476         printk("%s", buf);
477         kfree(buf);
478 }
479
480 void kprof_tracedata_write(const char *pretty_buf, size_t len)
481 {
482         spin_lock_irqsave(&ktrace_lock);
483         if (unlikely(!ktrace_init_done)) {
484                 circular_buffer_init(&ktrace_data, sizeof(ktrace_buffer),
485                                      ktrace_buffer);
486                 ktrace_init_done = TRUE;
487         }
488         circular_buffer_write(&ktrace_data, pretty_buf, len);
489         spin_unlock_irqsave(&ktrace_lock);
490 }
491
492 static struct trace_printk_buffer *kprof_get_printk_buffer(void)
493 {
494         static struct trace_printk_buffer boot_tpb;
495         static struct trace_printk_buffer *cpu_tpbs;
496         static atomic_t alloc_done;
497
498         if (unlikely(booting))
499                 return &boot_tpb;
500         if (unlikely(!cpu_tpbs)) {
501                 /* Poor man per-CPU data structure. I really do no like
502                  * littering global data structures with module specific data.
503                  * We cannot take the ktrace_lock to protect the kzmalloc()
504                  * call, as that might trigger printk()s, and we would reenter
505                  * here.  Let only one core into the kzmalloc() path, and let
506                  * the others get the boot_tpb until finished. */
507                 if (!atomic_cas(&alloc_done, 0, 1))
508                         return &boot_tpb;
509                 cpu_tpbs = kzmalloc(num_cores *
510                                     sizeof(struct trace_printk_buffer), 0);
511         }
512
513         return cpu_tpbs + core_id_early();
514 }
515
516 void trace_vprintk(const char *fmt, va_list args)
517 {
518         struct print_buf {
519                 char *ptr;
520                 char *top;
521         };
522
523         void emit_print_buf_str(struct print_buf *pb, const char *str,
524                                 ssize_t size)
525         {
526                 if (size < 0) {
527                         for (; *str && (pb->ptr < pb->top); str++)
528                                 *(pb->ptr++) = *str;
529                 } else {
530                         for (; (size > 0) && (pb->ptr < pb->top); str++, size--)
531                                 *(pb->ptr++) = *str;
532                 }
533         }
534
535         static const size_t bufsz = TRACE_PRINTK_BUFFER_SIZE;
536         static const size_t usr_bufsz = (3 * bufsz) / 8;
537         static const size_t kp_bufsz = bufsz - usr_bufsz;
538         struct trace_printk_buffer *tpb = kprof_get_printk_buffer();
539         struct timespec ts_now = { 0, 0 };
540         struct print_buf pb;
541         char *usrbuf = tpb->buffer, *kpbuf = tpb->buffer + usr_bufsz;
542         const char *utop, *uptr;
543         char hdr[64];
544
545         if (!atomic_cas(&tpb->in_use, 0, 1))
546                 return;
547         if (likely(__proc_global_info.tsc_freq))
548                 ts_now = tsc2timespec(read_tsc());
549         snprintf(hdr, sizeof(hdr), "[%lu.%09lu]:cpu%d: ", ts_now.tv_sec,
550                  ts_now.tv_nsec, core_id_early());
551
552         pb.ptr = usrbuf + vsnprintf(usrbuf, usr_bufsz, fmt, args);
553         pb.top = usrbuf + usr_bufsz;
554
555         if (pb.ptr[-1] != '\n')
556                 emit_print_buf_str(&pb, "\n", 1);
557         /* snprintf null terminates the buffer, and does not count that as part
558          * of the len.  If we maxed out the buffer, let's make sure it has a \n.
559          */
560         if (pb.ptr == pb.top)
561                 pb.ptr[-1] = '\n';
562         utop = pb.ptr;
563
564         pb.ptr = kpbuf;
565         pb.top = kpbuf + kp_bufsz;
566         for (uptr = usrbuf; uptr < utop;) {
567                 const char *nlptr = memchr(uptr, '\n', utop - uptr);
568
569                 if (nlptr == NULL)
570                         nlptr = utop;
571                 emit_print_buf_str(&pb, hdr, -1);
572                 emit_print_buf_str(&pb, uptr, (nlptr - uptr) + 1);
573                 uptr = nlptr + 1;
574         }
575         kprof_tracedata_write(kpbuf, pb.ptr - kpbuf);
576         atomic_set(&tpb->in_use, 0);
577 }
578
579 void trace_printk(const char *fmt, ...)
580 {
581         va_list args;
582
583         va_start(args, fmt);
584         trace_vprintk(fmt, args);
585         va_end(args);
586 }
587
588 struct dev kprofdevtab __devtab = {
589         .name = "kprof",
590
591         .reset = devreset,
592         .init = kprof_init,
593         .shutdown = kprof_shutdown,
594         .attach = kprof_attach,
595         .walk = kprof_walk,
596         .stat = kprof_stat,
597         .open = kprof_open,
598         .create = devcreate,
599         .close = kprof_close,
600         .read = kprof_read,
601         .bread = devbread,
602         .write = kprof_write,
603         .bwrite = devbwrite,
604         .remove = devremove,
605         .wstat = devwstat,
606 };