65463874a3fa4fd7fbc5cf4f2bf0f9f1048bdeef
[akaros.git] / tools / profile / perf / perf.c
1 /* Copyright (c) 2015-2016 Google Inc
2  * Barret Rhoden <brho@cs.berkeley.edu>
3  * Davide Libenzi <dlibenzi@google.com>
4  * See LICENSE for details.
5  */
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <sys/wait.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <errno.h>
18 #include <argp.h>
19 #include <parlib/parlib.h>
20 #include <parlib/timing.h>
21 #include "xlib.h"
22 #include "akaros.h"
23 #include "perfconv.h"
24 #include "perf_core.h"
25
26 /* Helpers */
27 static void run_process_and_wait(int argc, char *argv[],
28                                                                  const struct core_set *cores);
29
30 /* For communicating with perf_create_context() */
31 static struct perf_context_config perf_cfg = {
32         .perf_file = "#arch/perf",
33         .kpctl_file = "#kprof/kpctl",
34         .kpdata_file = "#kprof/kpdata",
35 };
36
37 static struct perfconv_context *cctx;
38 static struct perf_context *pctx;
39
40 struct perf_opts {
41         const char                                      *output_file;
42         const char                                      *events;
43         char                                            **cmd_argv;
44         int                                                     cmd_argc;
45         struct core_set                         cores;
46         bool                                            got_cores;
47         bool                                            verbose;
48         bool                                            sampling;
49         bool                                            record_quiet;
50         unsigned long                           record_period;
51 };
52 static struct perf_opts opts;
53
54 struct perf_cmd {
55         char                                            *name;
56         char                                            *desc;
57         char                                            *opts;
58         int (*func)(struct perf_cmd *, int, char **);
59 };
60
61 static int perf_help(struct perf_cmd *cmd, int argc, char *argv[]);
62 static int perf_list(struct perf_cmd *cmd, int argc, char *argv[]);
63 static int perf_record(struct perf_cmd *cmd, int argc, char *argv[]);
64 static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[]);
65
66 static struct perf_cmd perf_cmds[] = {
67         { .name = "help",
68           .desc = "Detailed help for commands",
69           .opts = "COMMAND",
70           .func = perf_help,
71         },
72         { .name = "list",
73           .desc = "Lists all available events",
74           .opts = "[REGEX]",
75           .func = perf_list,
76         },
77         { .name = "record",
78           .desc = "Samples events during command execution",
79           .opts = 0,
80           .func = perf_record,
81         },
82         { .name = "pmu_caps",
83           .desc = "Shows PMU capabilities",
84           .opts = "",
85           .func = perf_pmu_caps,
86         },
87 };
88
89 /**************************** perf help ****************************/
90
91 static int perf_help(struct perf_cmd *cmd, int argc, char *argv[])
92 {
93         char *sub_argv[2];
94
95         if (argc < 2) {
96                 fprintf(stderr, "perf %s %s\n", cmd->name, cmd->opts);
97                 return -1;
98         }
99         for (int i = 0; i < COUNT_OF(perf_cmds); i++) {
100                 if (!strcmp(perf_cmds[i].name, argv[1])) {
101                         if (perf_cmds[i].opts) {
102                                 fprintf(stdout, "perf %s %s\n", perf_cmds[i].name,
103                                         perf_cmds[i].opts);
104                                 fprintf(stdout, "\t%s\n", perf_cmds[i].desc);
105                         } else {
106                                 /* For argp subcommands, call their help directly. */
107                                 sub_argv[0] = xstrdup(perf_cmds[i].name);
108                                 sub_argv[1] = xstrdup("--help");
109                                 perf_cmds[i].func(&perf_cmds[i], 2, sub_argv);
110                                 free(sub_argv[0]);
111                                 free(sub_argv[1]);
112                         }
113                         return 0;
114                 }
115         }
116         fprintf(stderr, "Unknown perf command %s\n", argv[1]);
117         return -1;
118 }
119
120 /**************************** perf list ****************************/
121
122 static int perf_list(struct perf_cmd *cmd, int argc, char *argv[])
123 {
124         char *show_regex = NULL;
125
126         if (argc > 1)
127                 show_regex = argv[1];
128         perf_show_events(show_regex, stdout);
129         return 0;
130 }
131
132 /**************************** perf pmu_caps ************************/
133
134 static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[])
135 {
136         const struct perf_arch_info *pai = perf_context_get_arch_info(pctx);
137
138         fprintf(stdout,
139                         "PERF.version             = %u\n"
140                         "PERF.proc_arch_events    = %u\n"
141                         "PERF.bits_x_counter      = %u\n"
142                         "PERF.counters_x_proc     = %u\n"
143                         "PERF.bits_x_fix_counter  = %u\n"
144                         "PERF.fix_counters_x_proc = %u\n",
145                         pai->perfmon_version, pai->proc_arch_events, pai->bits_x_counter,
146                         pai->counters_x_proc, pai->bits_x_fix_counter,
147                         pai->fix_counters_x_proc);
148         return 0;
149 }
150
151 /**************************** Common argp ************************/
152
153 /* Collection argument parsing.  These options are common to any function that
154  * will collect perf events, e.g. perf record and perf stat. */
155
156 static struct argp_option collect_opts[] = {
157         {"event", 'e', "EVENT", 0, "Event string, e.g. cycles:u:k"},
158         {"cores", 'C', "CORE_LIST", 0, "List of cores, e.g. 0.2.4:8-19"},
159         {"cpu", 'C', 0, OPTION_ALIAS},
160         {"all-cpus", 'a', 0, 0, "Collect events on all cores (on by default)"},
161         {"verbose", 'v', 0, 0, 0},
162         { 0 }
163 };
164
165 static const char *collect_args_doc = "COMMAND [ARGS]";
166
167 static error_t parse_collect_opt(int key, char *arg, struct argp_state *state)
168 {
169         struct perf_opts *p_opts = state->input;
170
171         /* argp doesn't pass input to the child parser(s) by default... */
172         state->child_inputs[0] = state->input;
173
174         switch (key) {
175         case 'a':
176                 /* Our default operation is to track all cores; we don't follow
177                  * processes yet. */
178                 break;
179         case 'C':
180                 ros_parse_cores(arg, &p_opts->cores);
181                 p_opts->got_cores = TRUE;
182                 break;
183         case 'e':
184                 p_opts->events = arg;
185                 break;
186         case 'v':
187                 p_opts->verbose = TRUE;
188                 break;
189         case ARGP_KEY_ARG:
190                 p_opts->cmd_argc = state->argc - state->next + 1;
191                 p_opts->cmd_argv = xmalloc(sizeof(char*) * (p_opts->cmd_argc + 1));
192                 p_opts->cmd_argv[0] = arg;
193                 memcpy(&p_opts->cmd_argv[1], &state->argv[state->next],
194                        sizeof(char*) * (p_opts->cmd_argc - 1));
195                 p_opts->cmd_argv[p_opts->cmd_argc] = NULL;
196                 state->next = state->argc;
197                 break;
198         case ARGP_KEY_END:
199                 if (!p_opts->cmd_argc)
200                         argp_usage(state);
201                 /* By default, we set all cores (different than linux) */
202                 if (!p_opts->got_cores)
203                         ros_get_all_cores_set(&p_opts->cores);
204                 break;
205         default:
206                 return ARGP_ERR_UNKNOWN;
207         }
208         return 0;
209 }
210
211 /* Helper, parses args using the collect_opts and the child parser for a given
212  * cmd. */
213 static void collect_argp(struct perf_cmd *cmd, int argc, char *argv[],
214                          struct argp_child *children, struct perf_opts *opts)
215 {
216         struct argp collect_opt = {collect_opts, parse_collect_opt,
217                                    collect_args_doc, cmd->desc, children};
218         char *cmd_name;
219         const char *fmt = "perf %s";
220         size_t cmd_sz = strlen(cmd->name) + strlen(fmt) + 1;
221
222         /* Rewrite the command name from foo to perf foo for the --help output */
223         cmd_name = xmalloc(cmd_sz);
224         snprintf(cmd_name, cmd_sz, fmt, cmd->name);
225         cmd_name[cmd_sz - 1] = '\0';
226         argv[0] = cmd_name;
227         argp_parse(&collect_opt, argc, argv, ARGP_IN_ORDER, 0, opts);
228         /* It's possible that someone could still be using cmd_name */
229 }
230
231 /* Helper, submits the events in opts to the kernel for monitoring. */
232 static void submit_events(struct perf_opts *opts)
233 {
234         struct perf_eventsel *sel;
235         char *dup_evts, *tok, *tok_save = 0;
236
237         dup_evts = xstrdup(opts->events);
238         for (tok = strtok_r(dup_evts, ",", &tok_save);
239              tok;
240                  tok = strtok_r(NULL, ",", &tok_save)) {
241
242                 sel = perf_parse_event(tok);
243                 PMEV_SET_INTEN(sel->ev.event, opts->sampling);
244                 sel->ev.trigger_count = opts->record_period;
245                 perf_context_event_submit(pctx, &opts->cores, sel);
246         }
247         free(dup_evts);
248 }
249
250 /**************************** perf record ************************/
251
252 static struct argp_option record_opts[] = {
253         {"count", 'c', "PERIOD", 0, "Sampling period"},
254         {"output", 'o', "FILE", 0, "Output file name (default perf.data)"},
255         {"freq", 'F', "FREQUENCY", 0, "Sampling frequency (assumes cycles)"},
256         {"call-graph", 'g', 0, 0, "Backtrace recording (always on!)"},
257         {"quiet", 'q', 0, 0, "No printing to stdio"},
258         { 0 }
259 };
260
261 /* In lieu of adaptively changing the period to maintain a set freq, we
262  * just assume they want cycles and that the TSC is close to that.
263  *
264  * (cycles/sec) / (samples/sec) = cycles / sample = period.
265  *
266  * TODO: this also assumes we're running the core at full speed. */
267 static unsigned long freq_to_period(unsigned long freq)
268 {
269         return get_tsc_freq() / freq;
270 }
271
272 static error_t parse_record_opt(int key, char *arg, struct argp_state *state)
273 {
274         struct perf_opts *p_opts = state->input;
275
276         switch (key) {
277         case 'c':
278                 if (p_opts->record_period)
279                         argp_error(state, "Period set.  Only use at most one of -c -F");
280                 p_opts->record_period = atol(arg);
281                 break;
282         case 'F':
283                 if (p_opts->record_period)
284                         argp_error(state, "Period set.  Only use at most one of -c -F");
285                 /* TODO: when we properly support freq, multiple events will have the
286                  * same freq but different, dynamic, periods. */
287                 p_opts->record_period = freq_to_period(atol(arg));
288                 break;
289         case 'g':
290                 /* Our default operation is to record backtraces. */
291                 break;
292         case 'o':
293                 p_opts->output_file = arg;
294                 break;
295         case 'q':
296                 p_opts->record_quiet = TRUE;
297                 break;
298         case ARGP_KEY_END:
299                 if (!p_opts->events)
300                         p_opts->events = "cycles";
301                 if (!p_opts->output_file)
302                         p_opts->output_file = "perf.data";
303                 if (!p_opts->record_period)
304                         p_opts->record_period = freq_to_period(1000);
305                 break;
306         default:
307                 return ARGP_ERR_UNKNOWN;
308         }
309         return 0;
310 }
311
312 static int perf_record(struct perf_cmd *cmd, int argc, char *argv[])
313 {
314         struct argp argp_record = {record_opts, parse_record_opt};
315         struct argp_child children[] = { {&argp_record, 0, 0, 0}, {0} };
316
317         collect_argp(cmd, argc, argv, children, &opts);
318         opts.sampling = TRUE;
319
320         submit_events(&opts);
321         if (!strcmp(opts.cmd_argv[0], "sleep") && (opts.cmd_argc > 1))
322                 sleep(atoi(opts.cmd_argv[1]));
323         else
324                 run_process_and_wait(opts.cmd_argc, opts.cmd_argv,
325                                      &opts.cores);
326         if (opts.verbose)
327                 perf_context_show_values(pctx, stdout);
328         /* Flush the profiler per-CPU trace data into the main queue, so that
329          * it will be available for read. */
330         perf_flush_context_traces(pctx);
331         /* Generate the Linux perf file format with the traces which have been
332          * created during this operation. */
333         perf_convert_trace_data(cctx, perf_cfg.kpdata_file, opts.output_file);
334         return 0;
335 }
336
337 static void run_process_and_wait(int argc, char *argv[],
338                                                                  const struct core_set *cores)
339 {
340         int pid, status;
341         size_t max_cores = ros_total_cores();
342         struct core_set pvcores;
343
344         pid = sys_proc_create(argv[0], strlen(argv[0]), argv, NULL, 0);
345         if (pid < 0) {
346                 perror(argv[0]);
347                 exit(1);
348         }
349
350         ros_get_low_latency_core_set(&pvcores);
351         ros_not_core_set(&pvcores);
352         ros_and_core_sets(&pvcores, cores);
353         for (size_t i = 0; i < max_cores; i++) {
354                 if (ros_get_bit(&pvcores, i)) {
355                         if (sys_provision(pid, RES_CORES, i)) {
356                                 fprintf(stderr,
357                                                 "Unable to provision CPU %lu to PID %d: cmd='%s'\n",
358                                                 i, pid, argv[0]);
359                                 exit(1);
360                         }
361                 }
362         }
363
364         sys_proc_run(pid);
365         waitpid(pid, &status, 0);
366 }
367
368 static void save_cmdline(int argc, char *argv[])
369 {
370         size_t len = 0;
371         char *p;
372
373         for (int i = 0; i < argc; i++)
374                 len += strlen(argv[i]) + 1;
375         cmd_line_save = xmalloc(len);
376         p = cmd_line_save;
377         for (int i = 0; i < argc; i++) {
378                 strcpy(p, argv[i]);
379                 p += strlen(argv[i]);
380                 if (!(i == argc - 1)) {
381                         *p = ' ';       /* overwrite \0 with ' ' */
382                         p++;
383                 }
384         }
385 }
386
387 static void global_usage(void)
388 {
389         fprintf(stderr, "  Usage: perf COMMAND [ARGS]\n");
390         fprintf(stderr, "\n  Available commands:\n\n");
391         for (int i = 0; i < COUNT_OF(perf_cmds); i++)
392                 fprintf(stderr, "  \t%s: %s\n", perf_cmds[i].name, perf_cmds[i].desc);
393         exit(-1);
394 }
395
396 int main(int argc, char *argv[])
397 {
398         int i, ret = -1;
399
400         save_cmdline(argc, argv);
401
402         /* Common inits.  Some functions don't need these, but it doesn't hurt. */
403         perf_initialize();
404         pctx = perf_create_context(&perf_cfg);
405         cctx = perfconv_create_context(pctx);
406
407         if (argc < 2)
408                 global_usage();
409         for (i = 0; i < COUNT_OF(perf_cmds); i++) {
410                 if (!strcmp(perf_cmds[i].name, argv[1])) {
411                         ret = perf_cmds[i].func(&perf_cmds[i], argc - 1, argv + 1);
412                         break;
413                 }
414         }
415         if (i == COUNT_OF(perf_cmds))
416                 global_usage();
417         perf_free_context(pctx);
418         perfconv_free_context(cctx);
419         perf_finalize();
420         return ret;
421 }