perf: Use PERF_SAMPLE_IDENTIFIER
[akaros.git] / tools / profile / perf / perf_core.c
index 83881b2..181d2f1 100644 (file)
@@ -1,7 +1,9 @@
-/* Copyright (c) 2015 Google Inc
+/* Copyright (c) 2015-2016 Google Inc
  * Davide Libenzi <dlibenzi@google.com>
- * See LICENSE for details.
- */
+ * Barret Rhoden <brho@cs.berkeley.edu>
+ * Stephane Eranian <eranian@gmail.com> (perf_show_event_info() from libpfm4)
+ *
+ * See LICENSE for details. */
 
 #include <ros/arch/msr-index.h>
 #include <ros/arch/perfmon.h>
 #include "perfconv.h"
 #include "akaros.h"
 #include "perf_core.h"
+#include "elf.h"
 
-struct event_coords {
-       char *buffer;
-       const char *event;
-       const char *umask;
+struct perf_generic_event {
+       char                                            *name;
+       uint32_t                                        type;
+       uint32_t                                        config;
 };
 
-static const uint32_t invalid_mask = (uint32_t) -1;
-
-static void perf_parse_event_coords(const char *name, struct event_coords *ec)
-{
-       const char *cptr = strchr(name, ':');
-
-       if (cptr == NULL) {
-               ec->buffer = NULL;
-               ec->event = name;
-               ec->umask = NULL;
-       } else {
-               size_t cpos = cptr - name;
-
-               ec->buffer = xstrdup(name);
-               ec->event = ec->buffer;
-               ec->umask = ec->buffer + cpos + 1;
-               ec->buffer[cpos] = 0;
-       }
-}
-
-static void perf_free_event_coords(struct event_coords *ec)
-{
-       free(ec->buffer);
-}
+struct perf_generic_event generic_events[] = {
+       { .name = "cycles",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_CPU_CYCLES,
+       },
+       { .name = "cpu-cycles",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_CPU_CYCLES,
+       },
+       { .name = "instructions",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_INSTRUCTIONS,
+       },
+       { .name = "cache-references",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_CACHE_REFERENCES,
+       },
+       { .name = "cache-misses",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_CACHE_MISSES,
+       },
+       { .name = "branches",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+       },
+       { .name = "branch-instructions",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+       },
+       { .name = "branch-misses",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_BRANCH_MISSES,
+       },
+       { .name = "bus-cycles",
+         .type = PERF_TYPE_HARDWARE,
+         .config = PERF_COUNT_HW_BUS_CYCLES,
+       },
+};
 
 static const char *perf_get_event_mask_name(const pfm_event_info_t *einfo,
                                                                                        uint32_t mask)
@@ -82,194 +99,314 @@ static const char *perf_get_event_mask_name(const pfm_event_info_t *einfo,
        return NULL;
 }
 
-static int perf_resolve_event_name(const char *name, uint32_t *event,
-                                                                  uint32_t *mask, uint32_t mask_hint)
+void perf_initialize(void)
 {
-       int idx;
-       struct event_coords ec;
-
-       perf_parse_event_coords(name, &ec);
+       pfm_err_t err = pfm_initialize();
 
-       idx = pfm_find_event(ec.event);
-       if (idx >= 0) {
-               int i;
-               pfm_err_t err;
-               pfm_event_info_t einfo;
-               pfm_event_attr_info_t ainfo;
+       if (err != PFM_SUCCESS) {
+               fprintf(stderr, "Unable to initialize perfmon library: %s\n",
+                               pfm_strerror(err));
+               exit(1);
+       }
+       symbol__elf_init();
+}
 
-               ZERO_DATA(einfo);
-               einfo.size = sizeof(einfo);
-               err = pfm_get_event_info(idx, PFM_OS_NONE, &einfo);
-               if (err != PFM_SUCCESS) {
-                       fprintf(stderr, "Unable to retrieve event (%s) info: %s\n",
-                                       name, pfm_strerror(err));
-                       exit(1);
-               }
+void perf_finalize(void)
+{
+       pfm_terminate();
+}
 
-               *event = (uint32_t) einfo.code;
-               *mask = invalid_mask;
+/* This is arch-specific and maybe model specific in the future.  For some
+ * events, pfm4 gives us a pseudo encoding.  Those codes don't map to real
+ * hardware events and are meant to be interpreted by Linux for *other* HW
+ * events, e.g. in arch/x86/events/intel/core.c.
+ *
+ * While we're here, we can also take *real* encodings and treat them like
+ * pseudo encodings.  For instance, the arch event 0x3c (unhalted_core_cycles)
+ * can also be done with fixed counter 1.  This all assumes we have version 2 or
+ * later of Intel's perfmon. */
+static void x86_handle_pseudo_encoding(struct perf_eventsel *sel)
+{
+       uint8_t lower_byte;
+
+       switch (sel->ev.event & 0xffff) {
+       case 0xc0:      /* arch inst_retired */
+               sel->ev.flags |= PERFMON_FIXED_EVENT;
+               PMEV_SET_MASK(sel->ev.event, 0);
+               PMEV_SET_EVENT(sel->ev.event, 0);
+               return;
+       case 0x3c:      /* arch unhalted_core_cycles */
+               sel->ev.flags |= PERFMON_FIXED_EVENT;
+               PMEV_SET_MASK(sel->ev.event, 0);
+               PMEV_SET_EVENT(sel->ev.event, 1);
+               return;
+       case 0x13c:     /* arch unhalted_reference_cycles */
+       case 0x300:     /* pseudo encode: unhalted_reference_cycles */
+               sel->ev.flags |= PERFMON_FIXED_EVENT;
+               PMEV_SET_MASK(sel->ev.event, 0);
+               PMEV_SET_EVENT(sel->ev.event, 2);
+               return;
+       };
+       lower_byte = sel->ev.event & 0xff;
+       if ((lower_byte == 0x00) || (lower_byte == 0xff))
+               fprintf(stderr, "Unhandled pseudo encoding %d\n", lower_byte);
+}
 
-               ZERO_DATA(ainfo);
-               ainfo.size = sizeof(ainfo);
-               pfm_for_each_event_attr(i, &einfo) {
-                       err = pfm_get_event_attr_info(idx, i, PFM_OS_NONE, &ainfo);
-                       if (err != PFM_SUCCESS) {
-                               fprintf(stderr, "Failed to get attribute info: %s\n",
-                                               pfm_strerror(err));
-                               exit(1);
-                       }
-                       if (ainfo.type == PFM_ATTR_UMASK) {
-                               if (mask_hint != invalid_mask) {
-                                       if (mask_hint == (uint32_t) ainfo.code) {
-                                               *mask = (uint32_t) ainfo.code;
-                                               break;
-                                       }
-                               } else if (!ec.umask) {
-                                       *mask = (uint32_t) ainfo.code;
-                                       if (ainfo.is_dfl)
-                                               break;
-                               } else if (!strcmp(ec.umask, ainfo.name)) {
-                                       *mask = (uint32_t) ainfo.code;
-                                       break;
-                               }
-                       }
-               }
+/* Parse the string using pfm's lookup functions.  Returns TRUE on success and
+ * fills in parts of sel. */
+static bool parse_pfm_encoding(const char *str, struct perf_eventsel *sel)
+{
+       pfm_pmu_encode_arg_t encode;
+       int err;
+       char *ptr;
+
+       memset(&encode, 0, sizeof(encode));
+       encode.size = sizeof(encode);
+       encode.fstr = &ptr;
+       err = pfm_get_os_event_encoding(str, PFM_PLM3 | PFM_PLM0, PFM_OS_NONE,
+                                       &encode);
+       if (err)
+               return FALSE;
+       strlcpy(sel->fq_str, ptr, MAX_FQSTR_SZ);
+       free(ptr);
+       if (encode.count == 0) {
+               fprintf(stderr, "Found event %s, but it had no codes!\n", sel->fq_str);
+               return FALSE;
        }
-       perf_free_event_coords(&ec);
-
-       return idx;
+       sel->ev.event = encode.codes[0];
+       x86_handle_pseudo_encoding(sel);
+       sel->type = PERF_TYPE_RAW;
+       sel->config = PMEV_GET_MASK(sel->ev.event) | PMEV_GET_EVENT(sel->ev.event);
+       return TRUE;
 }
 
-static int perf_find_event_by_id(uint32_t event, uint32_t mask)
+static bool is_end_of_raw(char c)
 {
-       int pmu;
-       pfm_pmu_info_t pinfo;
-       pfm_event_info_t info;
-
-    ZERO_DATA(pinfo);
-    pinfo.size = sizeof(pinfo);
-    ZERO_DATA(info);
-    info.size = sizeof(info);
+       return (c == ':') || (c == '\0');
+}
 
-       pfm_for_all_pmus(pmu) {
-               pfm_err_t err = pfm_get_pmu_info(pmu, &pinfo);
+/* Helper: given a string, if the event is a raw hex code, return its numeric
+ * value.  Returns -1 if it does not match a raw code.
+ *
+ * rNN[N][N][:,\0].  Begins with r, has at least two hexdigits, up to 4, and
+ * ends with : , or \0. */
+static int extract_raw_code(const char *event)
+{
+       int i;
+       char copy[5] = {0};
 
-               if (err != PFM_SUCCESS || !pinfo.is_present)
+       if (event[0] != 'r')
+               return -1;
+       event++;
+       for (i = 0; i < 4; i++) {
+               if (isxdigit(event[i]))
                        continue;
+               if (is_end_of_raw(event[i]))
+                       break;
+               return -1;
+       }
+       if (!is_end_of_raw(event[i]))
+               return -1;
+       /* 'i' tracks how many we found (i.e. every 'continue') */
+       if (i < 2)
+               return -1;
+       /* need a null-terminated raw code for strtol. */
+       for (int j = 0; j < i; j++)
+               copy[j] = event[j];
+       return strtol(copy, NULL, 16);
+}
 
-               for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
-                       uint32_t cevent, cmask;
-
-                       err = pfm_get_event_info(i, PFM_OS_NONE, &info);
-                       if (err != PFM_SUCCESS) {
-                               fprintf(stderr, "Failed to get event info: %s\n",
-                                               pfm_strerror(err));
-                               exit(1);
+/* Takes any modifiers, e.g. u:k:etc, and sets the respective values in sel. */
+static void parse_modifiers(const char *str, struct perf_eventsel *sel)
+{
+       char *dup_str, *tok, *tok_save = 0;
+
+       dup_str = xstrdup(str);
+       for (tok = strtok_r(dup_str, ":", &tok_save);
+            tok;
+            tok = strtok_r(NULL, ":", &tok_save)) {
+
+               switch (tok[0]) {
+               case 'u':
+                       PMEV_SET_USR(sel->ev.event, 1);
+                       break;
+               case 'k':
+                       PMEV_SET_OS(sel->ev.event, 1);
+                       break;
+               case 'e':
+                       PMEV_SET_EDGE(sel->ev.event, 1);
+                       break;
+               case 'p':
+                       PMEV_SET_PC(sel->ev.event, 1);
+                       break;
+               case 't':
+                       PMEV_SET_ANYTH(sel->ev.event, 1);
+                       break;
+               case 'i':
+                       PMEV_SET_INVCMSK(sel->ev.event, 1);
+                       break;
+               case 'c':
+                       if (tok[1] != '=') {
+                               fprintf(stderr, "Bad cmask tok %s, ignoring\n", tok);
+                               break;
                        }
-                       if (perf_resolve_event_name(info.name, &cevent, &cmask, mask) != i)
-                               continue;
-                       if ((cevent == event) && (cmask == mask))
-                               return i;
+                       errno = 0;
+                       PMEV_SET_CMASK(sel->ev.event, strtoul(&tok[2], NULL, 0));
+                       if (errno)
+                               fprintf(stderr, "Bad cmask tok %s, trying anyway\n", tok);
+                       break;
                }
        }
-
-       return -1;
+       free(dup_str);
 }
 
-void perf_initialize(int argc, char *argv[])
+/* Parse the string for a raw encoding.  Returns TRUE on success and fills in
+ * parts of sel.  It has basic modifiers, like pfm4, for setting bits in the
+ * event code.  This is arch specific, and is all x86 (intel) for now. */
+static bool parse_raw_encoding(const char *str, struct perf_eventsel *sel)
 {
-       pfm_err_t err = pfm_initialize();
+       int code = extract_raw_code(str);
+       char *colon;
+
+       if (code == -1)
+               return FALSE;
+       sel->ev.event = code;
+       strlcpy(sel->fq_str, str, MAX_FQSTR_SZ);
+       colon = strchr(str, ':');
+       if (colon)
+               parse_modifiers(colon + 1, sel);
+       /* Note that we do not call x86_handle_pseudo_encoding here.  We'll submit
+        * exactly what the user asked us for - which also means no fixed counters
+        * for them (unless we want a :f: token or something). */
+       sel->type = PERF_TYPE_RAW;
+       sel->config = PMEV_GET_MASK(sel->ev.event) | PMEV_GET_EVENT(sel->ev.event);
+       return TRUE;
+}
 
-       if (err != PFM_SUCCESS) {
-               fprintf(stderr, "Unable to initialize perfmon library: %s\n",
-                               pfm_strerror(err));
-               exit(1);
+/* Helper, returns true is str is a generic event string, and fills in sel with
+ * the type and config. */
+static bool generic_str_get_code(const char *str, struct perf_eventsel *sel)
+{
+       char *colon = strchr(str, ':');
+       /* if there was no :, we compare as far as we can.  generic_events.name is a
+        * string literal, so strcmp() is fine. */
+       size_t len = colon ? colon - str : SIZE_MAX;
+
+       for (int i = 0; i < COUNT_OF(generic_events); i++) {
+               if (!strncmp(generic_events[i].name, str, len)) {
+                       sel->type = generic_events[i].type;
+                       sel->config = generic_events[i].config;
+                       return TRUE;
+               }
        }
+       return FALSE;
 }
 
-void perf_finalize(void)
+/* TODO: this is arch-specific and possibly machine-specific. (intel for now).
+ * Basically a lot of our perf is arch-dependent. (e.g. PMEV_*). */
+static bool arch_translate_generic(struct perf_eventsel *sel)
 {
-       pfm_terminate();
+       switch (sel->type) {
+       case PERF_TYPE_HARDWARE:
+               /* These are the intel/x86 architectural perf events */
+               switch (sel->config) {
+               case PERF_COUNT_HW_CPU_CYCLES:
+                       PMEV_SET_MASK(sel->ev.event, 0x00);
+                       PMEV_SET_EVENT(sel->ev.event, 0x3c);
+                       break;
+               case PERF_COUNT_HW_INSTRUCTIONS:
+                       PMEV_SET_MASK(sel->ev.event, 0x00);
+                       PMEV_SET_EVENT(sel->ev.event, 0xc0);
+                       break;
+               case PERF_COUNT_HW_CACHE_REFERENCES:
+                       PMEV_SET_MASK(sel->ev.event, 0x4f);
+                       PMEV_SET_EVENT(sel->ev.event, 0x2e);
+                       break;
+               case PERF_COUNT_HW_CACHE_MISSES:
+                       PMEV_SET_MASK(sel->ev.event, 0x41);
+                       PMEV_SET_EVENT(sel->ev.event, 0x2e);
+                       break;
+               case PERF_COUNT_HW_BRANCH_INSTRUCTIONS:
+                       PMEV_SET_MASK(sel->ev.event, 0x00);
+                       PMEV_SET_EVENT(sel->ev.event, 0xc4);
+                       break;
+               case PERF_COUNT_HW_BRANCH_MISSES:
+                       PMEV_SET_MASK(sel->ev.event, 0x00);
+                       PMEV_SET_EVENT(sel->ev.event, 0xc5);
+                       break;
+               case PERF_COUNT_HW_BUS_CYCLES:
+                       /* Unhalted reference cycles */
+                       PMEV_SET_MASK(sel->ev.event, 0x01);
+                       PMEV_SET_EVENT(sel->ev.event, 0x3c);
+                       break;
+               default:
+                       return FALSE;
+               };
+               break;
+       default:
+               return FALSE;
+       };
+       /* This will make sure we use fixed counters if available */
+       x86_handle_pseudo_encoding(sel);
+       return TRUE;
 }
 
-void perf_parse_event(const char *str, struct perf_eventsel *sel)
+/* Parse the string for a built-in encoding.  These are the perf defaults such
+ * as 'cycles' or 'cache-references.' Returns TRUE on success and fills in parts
+ * of sel. */
+static bool parse_generic_encoding(const char *str, struct perf_eventsel *sel)
 {
-       static const char *const event_spec =
-               "{EVENT_ID:MASK,EVENT_NAME:MASK_NAME}[,os[={0,1}]][,usr[={0,1}]]"
-               "[,int[={0,1}]][,invcmsk[={0,1}]][,cmask=MASK][,icount=COUNT]";
-       char *dstr = xstrdup(str), *sptr, *tok, *ev;
-
-       tok = strtok_r(dstr, ",", &sptr);
-       if (tok == NULL) {
-               fprintf(stderr, "Invalid event spec string: '%s'\n\t%s\n", str,
-                               event_spec);
-               exit(1);
-       }
-       ZERO_DATA(*sel);
-       sel->eidx = -1;
-       sel->ev.flags = 0;
-       sel->ev.event = 0;
-       PMEV_SET_OS(sel->ev.event, 1);
-       PMEV_SET_USR(sel->ev.event, 1);
-       PMEV_SET_EN(sel->ev.event, 1);
-       if (isdigit(*tok)) {
-               ev = strchr(tok, ':');
-               if (ev == NULL) {
-                       fprintf(stderr, "Invalid event spec string: '%s'\n"
-                                       "\tShould be: %s\n", str, event_spec);
-                       exit(1);
-               }
-               *ev++ = 0;
-               PMEV_SET_EVENT(sel->ev.event, (uint8_t) strtoul(tok, NULL, 0));
-               PMEV_SET_MASK(sel->ev.event, (uint8_t) strtoul(ev, NULL, 0));
-       } else {
-               uint32_t event, mask;
-
-               sel->eidx = perf_resolve_event_name(tok, &event, &mask, invalid_mask);
-               if (sel->eidx < 0) {
-                       fprintf(stderr, "Unable to find event: %s\n", tok);
-                       exit(1);
-               }
-               PMEV_SET_EVENT(sel->ev.event, (uint8_t) event);
-               PMEV_SET_MASK(sel->ev.event, (uint8_t) mask);
-       }
-       while ((tok = strtok_r(NULL, ",", &sptr)) != NULL) {
-               ev = strchr(tok, '=');
-               if (ev)
-                       *ev++ = 0;
-               if (!strcmp(tok, "os")) {
-                       PMEV_SET_OS(sel->ev.event, (ev == NULL || atoi(ev) != 0) ? 1 : 0);
-               } else if (!strcmp(tok, "usr")) {
-                       PMEV_SET_USR(sel->ev.event, (ev == NULL || atoi(ev) != 0) ? 1 : 0);
-               } else if (!strcmp(tok, "int")) {
-                       PMEV_SET_INTEN(sel->ev.event,
-                                                  (ev == NULL || atoi(ev) != 0) ? 1 : 0);
-               } else if (!strcmp(tok, "invcmsk")) {
-                       PMEV_SET_INVCMSK(sel->ev.event,
-                                                        (ev == NULL || atoi(ev) != 0) ? 1 : 0);
-               } else if (!strcmp(tok, "cmask")) {
-                       if (ev == NULL) {
-                               fprintf(stderr, "Invalid event spec string: '%s'\n"
-                                               "\tShould be: %s\n", str, event_spec);
-                               exit(1);
-                       }
-                       PMEV_SET_CMASK(sel->ev.event, (uint32_t) strtoul(ev, NULL, 0));
-               } else if (!strcmp(tok, "icount")) {
-                       if (ev == NULL) {
-                               fprintf(stderr, "Invalid event spec string: '%s'\n"
-                                               "\tShould be: %s\n", str, event_spec);
-                               exit(1);
-                       }
-                       sel->ev.trigger_count = (uint64_t) strtoul(ev, NULL, 0);
-               }
+       bool ret = FALSE;
+       char *colon;
+
+       if (!generic_str_get_code(str, sel))
+               return FALSE;
+       switch (sel->type) {
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               ret = arch_translate_generic(sel);
+               break;
+       };
+       if (!ret) {
+               fprintf(stderr, "Unsupported built-in event %s\n", str);
+               return FALSE;
        }
-       if (PMEV_GET_INTEN(sel->ev.event) && !sel->ev.trigger_count) {
-               fprintf(stderr,
-                               "Counter trigger count for interrupt is too small: %lu\n",
-                               sel->ev.trigger_count);
-               exit(1);
+       strlcpy(sel->fq_str, str, MAX_FQSTR_SZ);
+       colon = strchr(str, ':');
+       if (colon)
+               parse_modifiers(colon + 1, sel);
+       return TRUE;
+}
+
+/* Given an event description string, fills out sel with the info from the
+ * string such that it can be submitted to the OS.
+ *
+ * The caller can set more bits if they like, such as whether or not to
+ * interrupt on overflow, the sample_period, etc.  None of those settings are
+ * part of the event string.
+ *
+ * Kills the program on failure. */
+struct perf_eventsel *perf_parse_event(const char *str)
+{
+       struct perf_eventsel *sel = xzmalloc(sizeof(struct perf_eventsel));
+
+       sel->ev.user_data = (uint64_t)sel;
+       if (parse_generic_encoding(str, sel))
+               goto success;
+       if (parse_pfm_encoding(str, sel))
+               goto success;
+       if (parse_raw_encoding(str, sel))
+               goto success;
+       free(sel);
+       fprintf(stderr, "Failed to parse event string %s\n", str);
+       exit(-1);
+success:
+       if (!PMEV_GET_OS(sel->ev.event) && !PMEV_GET_USR(sel->ev.event)) {
+               PMEV_SET_OS(sel->ev.event, 1);
+               PMEV_SET_USR(sel->ev.event, 1);
        }
-       free(dstr);
+       PMEV_SET_EN(sel->ev.event, 1);
+       return sel;
 }
 
 static void perf_get_arch_info(int perf_fd, struct perf_arch_info *pai)
@@ -304,6 +441,7 @@ static int perf_open_event(int perf_fd, const struct core_set *cores,
        wptr = put_le_u64(wptr, sel->ev.event);
        wptr = put_le_u64(wptr, sel->ev.flags);
        wptr = put_le_u64(wptr, sel->ev.trigger_count);
+       wptr = put_le_u64(wptr, sel->ev.user_data);
 
        for (i = CORE_SET_SIZE - 1; (i >= 0) && !cores->core_set[i]; i--)
                ;
@@ -323,15 +461,13 @@ static int perf_open_event(int perf_fd, const struct core_set *cores,
        return (int) ped;
 }
 
-static uint64_t *perf_get_event_values(int perf_fd, int ped,
-                                                                          struct perf_eventsel *sel,
-                                                                          size_t *pnvalues)
+static uint64_t *perf_get_event_values(int perf_fd, int ped, size_t *pnvalues)
 {
        ssize_t rsize;
        uint32_t i, n;
        uint64_t *values;
-       size_t bufsize = 3 * sizeof(uint64_t) + sizeof(uint32_t) +
-               MAX_NUM_CORES * sizeof(uint64_t);
+       uint64_t temp;
+       size_t bufsize = sizeof(uint32_t) + MAX_NUM_CORES * sizeof(uint64_t);
        uint8_t *cmdbuf = xmalloc(bufsize);
        uint8_t *wptr = cmdbuf;
        const uint8_t *rptr = cmdbuf;
@@ -342,15 +478,11 @@ static uint64_t *perf_get_event_values(int perf_fd, int ped,
        xpwrite(perf_fd, cmdbuf, wptr - cmdbuf, 0);
        rsize = pread(perf_fd, cmdbuf, bufsize, 0);
 
-       if (rsize < (3 * sizeof(uint64_t) + sizeof(uint32_t))) {
+       if (rsize < (sizeof(uint32_t))) {
                fprintf(stderr, "Invalid read size while fetching event status: %ld\n",
                                rsize);
                exit(1);
        }
-
-       rptr = get_le_u64(rptr, &sel->ev.event);
-       rptr = get_le_u64(rptr, &sel->ev.flags);
-       rptr = get_le_u64(rptr, &sel->ev.trigger_count);
        rptr = get_le_u32(rptr, &n);
        if (((rptr - cmdbuf) + n * sizeof(uint64_t)) > rsize) {
                fprintf(stderr, "Invalid read size while fetching event status: %ld\n",
@@ -367,6 +499,21 @@ static uint64_t *perf_get_event_values(int perf_fd, int ped,
        return values;
 }
 
+/* Helper, returns the total count (across all cores) of the event @idx */
+uint64_t perf_get_event_count(struct perf_context *pctx, unsigned int idx)
+{
+       uint64_t total = 0;
+       size_t nvalues;
+       uint64_t *values;
+
+       values = perf_get_event_values(pctx->perf_fd, pctx->events[idx].ped,
+                                      &nvalues);
+       for (int i = 0; i < nvalues; i++)
+               total += values[i];
+       free(values);
+       return total;
+}
+
 static void perf_close_event(int perf_fd, int ped)
 {
        uint8_t cmdbuf[1 + sizeof(uint32_t)];
@@ -378,54 +525,29 @@ static void perf_close_event(int perf_fd, int ped)
        xpwrite(perf_fd, cmdbuf, wptr - cmdbuf, 0);
 }
 
-static void perf_enable_sampling(int kpctl_fd)
-{
-       static const char * const enable_str = "start";
-
-       xwrite(kpctl_fd, enable_str, strlen(enable_str));
-}
-
-static void perf_disable_sampling(int kpctl_fd)
-{
-       static const char * const disable_str = "stop";
-
-       xwrite(kpctl_fd, disable_str, strlen(disable_str));
-}
-
-static void perf_flush_sampling(int kpctl_fd)
-{
-       static const char * const flush_str = "flush";
-
-       xwrite(kpctl_fd, flush_str, strlen(flush_str));
-}
-
-struct perf_context *perf_create_context(const struct perf_context_config *cfg)
+struct perf_context *perf_create_context(struct perf_context_config *cfg)
 {
        struct perf_context *pctx = xzmalloc(sizeof(struct perf_context));
 
+       pctx->cfg = cfg;
        pctx->perf_fd = xopen(cfg->perf_file, O_RDWR, 0);
-       pctx->kpctl_fd = xopen(cfg->kpctl_file, O_RDWR, 0);
+       /* perf record needs kpctl_fd, but other perf subcommands might not.  We'll
+        * delay the opening of kpctl until we need it, since kprof is picky about
+        * multiple users of kpctl. */
+       pctx->kpctl_fd = -1;
        perf_get_arch_info(pctx->perf_fd, &pctx->pai);
-       perf_enable_sampling(pctx->kpctl_fd);
 
        return pctx;
 }
 
 void perf_free_context(struct perf_context *pctx)
 {
-       for (int i = 0; i < pctx->event_count; i++)
-               perf_close_event(pctx->perf_fd, pctx->events[i].ped);
-       perf_disable_sampling(pctx->kpctl_fd);
-       close(pctx->kpctl_fd);
-       close(pctx->perf_fd);
+       if (pctx->kpctl_fd != -1)
+               close(pctx->kpctl_fd);  /* disabled sampling */
+       close(pctx->perf_fd);   /* closes all events */
        free(pctx);
 }
 
-void perf_flush_context_traces(struct perf_context *pctx)
-{
-       perf_flush_sampling(pctx->kpctl_fd);
-}
-
 void perf_context_event_submit(struct perf_context *pctx,
                                                           const struct core_set *cores,
                                                           const struct perf_eventsel *sel)
@@ -440,25 +562,51 @@ void perf_context_event_submit(struct perf_context *pctx,
        pevt->cores = *cores;
        pevt->sel = *sel;
        pevt->ped = perf_open_event(pctx->perf_fd, cores, sel);
+       if (pevt->ped < 0) {
+               fprintf(stderr, "Unable to submit event \"%s\": %s\n", sel->fq_str,
+                       errstr());
+               exit(1);
+       }
+}
+
+void perf_stop_events(struct perf_context *pctx)
+{
+       for (int i = 0; i < pctx->event_count; i++)
+               perf_close_event(pctx->perf_fd, pctx->events[i].ped);
 }
 
-void perf_context_show_values(struct perf_context *pctx, FILE *file)
+static void ensure_kpctl_is_open(struct perf_context *pctx)
 {
+       if (pctx->kpctl_fd == -1)
+               pctx->kpctl_fd = xopen(pctx->cfg->kpctl_file, O_RDWR, 0);
+}
+
+void perf_start_sampling(struct perf_context *pctx)
+{
+       static const char * const enable_str = "start";
+
+       ensure_kpctl_is_open(pctx);
+       xwrite(pctx->kpctl_fd, enable_str, strlen(enable_str));
+}
+
+void perf_stop_sampling(struct perf_context *pctx)
+{
+       static const char * const disable_str = "stop";
+
+       ensure_kpctl_is_open(pctx);
+       xwrite(pctx->kpctl_fd, disable_str, strlen(disable_str));
+}
+
+void perf_context_show_events(struct perf_context *pctx, FILE *file)
+{
+       struct perf_eventsel *sel;
+
        for (int i = 0; i < pctx->event_count; i++) {
-               size_t nvalues;
-               struct perf_eventsel sel;
-               uint64_t *values = perf_get_event_values(pctx->perf_fd,
-                                                                                                pctx->events[i].ped, &sel,
-                                                                                                &nvalues);
-               char ename[256];
-
-               perf_get_event_string(&pctx->events[i].sel, ename, sizeof(ename));
-               fprintf(file, "Event: %s\n\t", ename);
-               for (size_t j = 0; j < nvalues; j++)
-                       fprintf(file, "%lu ", values[j]);
-               fprintf(file, "\n");
-
-               free(values);
+               sel = &pctx->events[i].sel;
+               fprintf(file, "Event: %s, final code %p%s, trigger count %d\n",
+                       sel->fq_str, sel->ev.event,
+                       perfmon_is_fixed_event(&sel->ev) ? " (fixed)" : "",
+                       sel->ev.trigger_count);
        }
 }
 
@@ -490,6 +638,7 @@ static void perf_print_attr_flags(const pfm_event_attr_info_t *info, FILE *file)
                fputs("None ", file);
 }
 
+/* Ported from libpfm4 */
 static void perf_show_event_info(const pfm_event_info_t *info,
                                                                 const pfm_pmu_info_t *pinfo, FILE *file)
 {
@@ -610,87 +759,17 @@ void perf_show_events(const char *rx, FILE *file)
                regfree(&crx);
 }
 
-void perf_get_event_string(const struct perf_eventsel *sel, char *sbuf,
-                                                  size_t size)
-{
-       pfm_event_info_t einfo;
-
-    ZERO_DATA(einfo);
-    einfo.size = sizeof(einfo);
-       if ((sel->eidx >= 0) &&
-               (pfm_get_event_info(sel->eidx, PFM_OS_NONE, &einfo) == PFM_SUCCESS)) {
-               const char *mask_name =
-                       perf_get_event_mask_name(&einfo, PMEV_GET_MASK(sel->ev.event));
-
-               if (mask_name)
-                       snprintf(sbuf, size, "%s:%s", einfo.name, mask_name);
-               else
-                       snprintf(sbuf, size, "%s", einfo.name);
-       } else {
-               snprintf(sbuf, size, "0x%02x:0x%02x",
-                                (int) PMEV_GET_EVENT(sel->ev.event),
-                                (int) PMEV_GET_MASK(sel->ev.event));
-       }
-}
-
-void perf_make_eventsel_from_event_mask(struct perf_eventsel *sel,
-                                                                               uint32_t event, uint32_t mask)
-{
-       ZERO_DATA(*sel);
-       PMEV_SET_EVENT(sel->ev.event, (uint8_t) event);
-       PMEV_SET_MASK(sel->ev.event, (uint8_t) mask);
-       sel->eidx = perf_find_event_by_id(event, mask);
-}
-
-static bool perf_get_kernel_elf_path(char *path, size_t psize, size_t *ksize)
-{
-       int fd;
-       ssize_t rsize = -1;
-
-       fd = open("#version/kernel_path", O_RDONLY);
-       if (fd >= 0) {
-               rsize = read(fd, path, psize);
-               while ((rsize > 0) && (path[rsize - 1] == '\n'))
-                       rsize--;
-               close(fd);
-
-               /* We do not export the real kernel size from the #versions device,
-                * because of cyclic dependency issues. The only reason the size is
-                * needed, is because we generate an MMAP record, which Linux perf
-                * uses to find which ELF should be used to resolve addresses to
-                * symbols. Above the Akaros kernel, hardly other ELF will be loaded,
-                * so the worst it can happen if something above the kernel ELF
-                * proper address gets a hit, is that Linux perf will ask the kernel
-                * ELF to resolve an address, and that will fail.
-                * So here we use a large enough size to cover kernel size expansions
-                * for the next 10 years.
-                */
-               *ksize = 128 * 1024 * 1024;
-       }
-
-       return rsize > 0;
-}
-
 void perf_convert_trace_data(struct perfconv_context *cctx, const char *input,
-                                                        const char *output)
+                                                        FILE *outfile)
 {
-       FILE *infile, *outfile;
+       FILE *infile;
        size_t ksize;
-       char kpath[1024];
 
        infile = xfopen(input, "rb");
        if (xfsize(infile) > 0) {
-               outfile = xfopen(output, "wb");
-
-               if (perf_get_kernel_elf_path(kpath, sizeof(kpath), &ksize))
-                       perfconv_add_kernel_mmap(kpath, ksize, cctx);
-               else
-                       fprintf(stderr, "Unable to fetch kernel build information!\n"
-                                       "Kernel traces will be missing symbol information.\n");
-
+               perfconv_add_kernel_mmap(cctx);
+               perfconv_add_kernel_buildid(cctx);
                perfconv_process_input(cctx, infile, outfile);
-
-               fclose(outfile);
        }
        fclose(infile);
 }