perf: Report errors when counter setup fails
[akaros.git] / tools / profile / perf / perf.c
1 /* Copyright (c) 2015-2016 Google Inc
2  * Barret Rhoden <brho@cs.berkeley.edu>
3  * Davide Libenzi <dlibenzi@google.com>
4  * See LICENSE for details.
5  */
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9 #include <sys/wait.h>
10 #include <stdint.h>
11 #include <stdio.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <errno.h>
18 #include <argp.h>
19 #include <parlib/parlib.h>
20 #include <parlib/timing.h>
21 #include "xlib.h"
22 #include "akaros.h"
23 #include "perfconv.h"
24 #include "perf_core.h"
25
26 /* Helpers */
27 static void run_process_and_wait(int argc, char *argv[],
28                                                                  const struct core_set *cores);
29
30 /* For communicating with perf_create_context() */
31 static struct perf_context_config perf_cfg = {
32         .perf_file = "#arch/perf",
33         .kpctl_file = "#kprof/kpctl",
34         .kpdata_file = "#kprof/kpdata",
35 };
36
37 static struct perfconv_context *cctx;
38 static struct perf_context *pctx;
39 extern char **environ;  /* POSIX envp */
40
41 struct perf_opts {
42         FILE                                            *outfile;
43         const char                                      *events;
44         char                                            **cmd_argv;
45         int                                                     cmd_argc;
46         struct core_set                         cores;
47         bool                                            got_cores;
48         bool                                            verbose;
49         bool                                            sampling;
50         bool                                            record_quiet;
51         unsigned long                           record_period;
52 };
53 static struct perf_opts opts;
54
55 struct perf_cmd {
56         char                                            *name;
57         char                                            *desc;
58         char                                            *opts;
59         int (*func)(struct perf_cmd *, int, char **);
60 };
61
62 static int perf_help(struct perf_cmd *cmd, int argc, char *argv[]);
63 static int perf_list(struct perf_cmd *cmd, int argc, char *argv[]);
64 static int perf_record(struct perf_cmd *cmd, int argc, char *argv[]);
65 static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[]);
66
67 static struct perf_cmd perf_cmds[] = {
68         { .name = "help",
69           .desc = "Detailed help for commands",
70           .opts = "COMMAND",
71           .func = perf_help,
72         },
73         { .name = "list",
74           .desc = "Lists all available events",
75           .opts = "[REGEX]",
76           .func = perf_list,
77         },
78         { .name = "record",
79           .desc = "Samples events during command execution",
80           .opts = 0,
81           .func = perf_record,
82         },
83         { .name = "pmu_caps",
84           .desc = "Shows PMU capabilities",
85           .opts = "",
86           .func = perf_pmu_caps,
87         },
88 };
89
90 /**************************** perf help ****************************/
91
92 static int perf_help(struct perf_cmd *cmd, int argc, char *argv[])
93 {
94         char *sub_argv[2];
95
96         if (argc < 2) {
97                 fprintf(stderr, "perf %s %s\n", cmd->name, cmd->opts);
98                 return -1;
99         }
100         for (int i = 0; i < COUNT_OF(perf_cmds); i++) {
101                 if (!strcmp(perf_cmds[i].name, argv[1])) {
102                         if (perf_cmds[i].opts) {
103                                 fprintf(stdout, "perf %s %s\n", perf_cmds[i].name,
104                                         perf_cmds[i].opts);
105                                 fprintf(stdout, "\t%s\n", perf_cmds[i].desc);
106                         } else {
107                                 /* For argp subcommands, call their help directly. */
108                                 sub_argv[0] = xstrdup(perf_cmds[i].name);
109                                 sub_argv[1] = xstrdup("--help");
110                                 perf_cmds[i].func(&perf_cmds[i], 2, sub_argv);
111                                 free(sub_argv[0]);
112                                 free(sub_argv[1]);
113                         }
114                         return 0;
115                 }
116         }
117         fprintf(stderr, "Unknown perf command %s\n", argv[1]);
118         return -1;
119 }
120
121 /**************************** perf list ****************************/
122
123 static int perf_list(struct perf_cmd *cmd, int argc, char *argv[])
124 {
125         char *show_regex = NULL;
126
127         if (argc > 1)
128                 show_regex = argv[1];
129         perf_show_events(show_regex, stdout);
130         return 0;
131 }
132
133 /**************************** perf pmu_caps ************************/
134
135 static int perf_pmu_caps(struct perf_cmd *cmd, int argc, char *argv[])
136 {
137         const struct perf_arch_info *pai = perf_context_get_arch_info(pctx);
138
139         fprintf(stdout,
140                         "PERF.version             = %u\n"
141                         "PERF.proc_arch_events    = %u\n"
142                         "PERF.bits_x_counter      = %u\n"
143                         "PERF.counters_x_proc     = %u\n"
144                         "PERF.bits_x_fix_counter  = %u\n"
145                         "PERF.fix_counters_x_proc = %u\n",
146                         pai->perfmon_version, pai->proc_arch_events, pai->bits_x_counter,
147                         pai->counters_x_proc, pai->bits_x_fix_counter,
148                         pai->fix_counters_x_proc);
149         return 0;
150 }
151
152 /**************************** Common argp ************************/
153
154 /* Collection argument parsing.  These options are common to any function that
155  * will collect perf events, e.g. perf record and perf stat. */
156
157 static struct argp_option collect_opts[] = {
158         {"event", 'e', "EVENT", 0, "Event string, e.g. cycles:u:k"},
159         {"cores", 'C', "CORE_LIST", 0, "List of cores, e.g. 0.2.4:8-19"},
160         {"cpu", 'C', 0, OPTION_ALIAS},
161         {"all-cpus", 'a', 0, 0, "Collect events on all cores (on by default)"},
162         {"verbose", 'v', 0, 0, 0},
163         { 0 }
164 };
165
166 static const char *collect_args_doc = "COMMAND [ARGS]";
167
168 static error_t parse_collect_opt(int key, char *arg, struct argp_state *state)
169 {
170         struct perf_opts *p_opts = state->input;
171
172         /* argp doesn't pass input to the child parser(s) by default... */
173         state->child_inputs[0] = state->input;
174
175         switch (key) {
176         case 'a':
177                 /* Our default operation is to track all cores; we don't follow
178                  * processes yet. */
179                 break;
180         case 'C':
181                 ros_parse_cores(arg, &p_opts->cores);
182                 p_opts->got_cores = TRUE;
183                 break;
184         case 'e':
185                 p_opts->events = arg;
186                 break;
187         case 'v':
188                 p_opts->verbose = TRUE;
189                 break;
190         case ARGP_KEY_ARG:
191                 p_opts->cmd_argc = state->argc - state->next + 1;
192                 p_opts->cmd_argv = xmalloc(sizeof(char*) * (p_opts->cmd_argc + 1));
193                 p_opts->cmd_argv[0] = arg;
194                 memcpy(&p_opts->cmd_argv[1], &state->argv[state->next],
195                        sizeof(char*) * (p_opts->cmd_argc - 1));
196                 p_opts->cmd_argv[p_opts->cmd_argc] = NULL;
197                 state->next = state->argc;
198                 break;
199         case ARGP_KEY_END:
200                 if (!p_opts->cmd_argc)
201                         argp_usage(state);
202                 /* By default, we set all cores (different than linux) */
203                 if (!p_opts->got_cores)
204                         ros_get_all_cores_set(&p_opts->cores);
205                 break;
206         default:
207                 return ARGP_ERR_UNKNOWN;
208         }
209         return 0;
210 }
211
212 /* Helper, parses args using the collect_opts and the child parser for a given
213  * cmd. */
214 static void collect_argp(struct perf_cmd *cmd, int argc, char *argv[],
215                          struct argp_child *children, struct perf_opts *opts)
216 {
217         struct argp collect_opt = {collect_opts, parse_collect_opt,
218                                    collect_args_doc, cmd->desc, children};
219         char *cmd_name;
220         const char *fmt = "perf %s";
221         size_t cmd_sz = strlen(cmd->name) + strlen(fmt) + 1;
222
223         /* Rewrite the command name from foo to perf foo for the --help output */
224         cmd_name = xmalloc(cmd_sz);
225         snprintf(cmd_name, cmd_sz, fmt, cmd->name);
226         cmd_name[cmd_sz - 1] = '\0';
227         argv[0] = cmd_name;
228         argp_parse(&collect_opt, argc, argv, ARGP_IN_ORDER, 0, opts);
229         /* It's possible that someone could still be using cmd_name */
230 }
231
232 /* Helper, submits the events in opts to the kernel for monitoring. */
233 static void submit_events(struct perf_opts *opts)
234 {
235         struct perf_eventsel *sel;
236         char *dup_evts, *tok, *tok_save = 0;
237
238         dup_evts = xstrdup(opts->events);
239         for (tok = strtok_r(dup_evts, ",", &tok_save);
240              tok;
241                  tok = strtok_r(NULL, ",", &tok_save)) {
242
243                 sel = perf_parse_event(tok);
244                 PMEV_SET_INTEN(sel->ev.event, opts->sampling);
245                 sel->ev.trigger_count = opts->record_period;
246                 perf_context_event_submit(pctx, &opts->cores, sel);
247         }
248         free(dup_evts);
249 }
250
251 /**************************** perf record ************************/
252
253 static struct argp_option record_opts[] = {
254         {"count", 'c', "PERIOD", 0, "Sampling period"},
255         {"output", 'o', "FILE", 0, "Output file name (default perf.data)"},
256         {"freq", 'F', "FREQUENCY", 0, "Sampling frequency (assumes cycles)"},
257         {"call-graph", 'g', 0, 0, "Backtrace recording (always on!)"},
258         {"quiet", 'q', 0, 0, "No printing to stdio"},
259         { 0 }
260 };
261
262 /* In lieu of adaptively changing the period to maintain a set freq, we
263  * just assume they want cycles and that the TSC is close to that.
264  *
265  * (cycles/sec) / (samples/sec) = cycles / sample = period.
266  *
267  * TODO: this also assumes we're running the core at full speed. */
268 static unsigned long freq_to_period(unsigned long freq)
269 {
270         return get_tsc_freq() / freq;
271 }
272
273 static error_t parse_record_opt(int key, char *arg, struct argp_state *state)
274 {
275         struct perf_opts *p_opts = state->input;
276
277         switch (key) {
278         case 'c':
279                 if (p_opts->record_period)
280                         argp_error(state, "Period set.  Only use at most one of -c -F");
281                 p_opts->record_period = atol(arg);
282                 break;
283         case 'F':
284                 if (p_opts->record_period)
285                         argp_error(state, "Period set.  Only use at most one of -c -F");
286                 /* TODO: when we properly support freq, multiple events will have the
287                  * same freq but different, dynamic, periods. */
288                 p_opts->record_period = freq_to_period(atol(arg));
289                 break;
290         case 'g':
291                 /* Our default operation is to record backtraces. */
292                 break;
293         case 'o':
294                 p_opts->outfile = xfopen(arg, "wb");
295                 break;
296         case 'q':
297                 p_opts->record_quiet = TRUE;
298                 break;
299         case ARGP_KEY_END:
300                 if (!p_opts->events)
301                         p_opts->events = "cycles";
302                 if (!p_opts->outfile)
303                         p_opts->outfile = xfopen("perf.data", "wb");
304                 if (!p_opts->record_period)
305                         p_opts->record_period = freq_to_period(1000);
306                 break;
307         default:
308                 return ARGP_ERR_UNKNOWN;
309         }
310         return 0;
311 }
312
313 static int perf_record(struct perf_cmd *cmd, int argc, char *argv[])
314 {
315         struct argp argp_record = {record_opts, parse_record_opt};
316         struct argp_child children[] = { {&argp_record, 0, 0, 0}, {0} };
317
318         collect_argp(cmd, argc, argv, children, &opts);
319         opts.sampling = TRUE;
320
321         submit_events(&opts);
322         run_process_and_wait(opts.cmd_argc, opts.cmd_argv, &opts.cores);
323         if (opts.verbose)
324                 perf_context_show_values(pctx, stdout);
325         /* Flush the profiler per-CPU trace data into the main queue, so that
326          * it will be available for read. */
327         perf_flush_context_traces(pctx);
328         /* Generate the Linux perf file format with the traces which have been
329          * created during this operation. */
330         perf_convert_trace_data(cctx, perf_cfg.kpdata_file, opts.outfile);
331         fclose(opts.outfile);
332         return 0;
333 }
334
335 static void run_process_and_wait(int argc, char *argv[],
336                                                                  const struct core_set *cores)
337 {
338         int pid, status;
339         size_t max_cores = ros_total_cores();
340         struct core_set pvcores;
341
342         pid = create_child_with_stdfds(argv[0], argc, argv, environ);
343         if (pid < 0) {
344                 perror("Unable to spawn child");
345                 fflush(stderr);
346                 exit(1);
347         }
348
349         ros_get_low_latency_core_set(&pvcores);
350         ros_not_core_set(&pvcores);
351         ros_and_core_sets(&pvcores, cores);
352         for (size_t i = 0; i < max_cores; i++) {
353                 if (ros_get_bit(&pvcores, i)) {
354                         if (sys_provision(pid, RES_CORES, i)) {
355                                 fprintf(stderr,
356                                                 "Unable to provision CPU %lu to PID %d: cmd='%s'\n",
357                                                 i, pid, argv[0]);
358                                 sys_proc_destroy(pid, -1);
359                                 exit(1);
360                         }
361                 }
362         }
363
364         sys_proc_run(pid);
365         waitpid(pid, &status, 0);
366 }
367
368 static void save_cmdline(int argc, char *argv[])
369 {
370         size_t len = 0;
371         char *p;
372
373         for (int i = 0; i < argc; i++)
374                 len += strlen(argv[i]) + 1;
375         cmd_line_save = xmalloc(len);
376         p = cmd_line_save;
377         for (int i = 0; i < argc; i++) {
378                 strcpy(p, argv[i]);
379                 p += strlen(argv[i]);
380                 if (!(i == argc - 1)) {
381                         *p = ' ';       /* overwrite \0 with ' ' */
382                         p++;
383                 }
384         }
385 }
386
387 static void global_usage(void)
388 {
389         fprintf(stderr, "  Usage: perf COMMAND [ARGS]\n");
390         fprintf(stderr, "\n  Available commands:\n\n");
391         for (int i = 0; i < COUNT_OF(perf_cmds); i++)
392                 fprintf(stderr, "  \t%s: %s\n", perf_cmds[i].name, perf_cmds[i].desc);
393         exit(-1);
394 }
395
396 int main(int argc, char *argv[])
397 {
398         int i, ret = -1;
399
400         save_cmdline(argc, argv);
401
402         /* Common inits.  Some functions don't need these, but it doesn't hurt. */
403         perf_initialize();
404         pctx = perf_create_context(&perf_cfg);
405         cctx = perfconv_create_context(pctx);
406
407         if (argc < 2)
408                 global_usage();
409         for (i = 0; i < COUNT_OF(perf_cmds); i++) {
410                 if (!strcmp(perf_cmds[i].name, argv[1])) {
411                         ret = perf_cmds[i].func(&perf_cmds[i], argc - 1, argv + 1);
412                         break;
413                 }
414         }
415         if (i == COUNT_OF(perf_cmds))
416                 global_usage();
417         perf_free_context(pctx);
418         perfconv_free_context(cctx);
419         perf_finalize();
420         return ret;
421 }