Added new perf utility to access CPU counters from userspace
authorDavide Libenzi <dlibenzi@google.com>
Mon, 16 Nov 2015 15:05:47 +0000 (07:05 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 16 Dec 2015 21:27:06 +0000 (16:27 -0500)
Added new perf utility to access CPU counters from userspace.

Signed-off-by: Davide Libenzi <dlibenzi@google.com>
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
Makefile
tools/profile/perf/.gitignore [new file with mode: 0644]
tools/profile/perf/Makefile [new file with mode: 0644]
tools/profile/perf/akaros.c [new file with mode: 0644]
tools/profile/perf/akaros.h [new file with mode: 0644]
tools/profile/perf/perf.c [new file with mode: 0644]
tools/profile/perf/perf_core.c [new file with mode: 0644]
tools/profile/perf/perf_core.h [new file with mode: 0644]
tools/profile/perf/xlib.c [new file with mode: 0644]
tools/profile/perf/xlib.h [new file with mode: 0644]

index 7832ecb..3dcc2c0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -661,12 +661,14 @@ apps-install: install-libs
        @$(call make_as_parent, -C tools/apps/busybox)
        @$(call make_as_parent, -C tools/profile/kprof2perf install)
        @$(call make_as_parent, -C tools/apps/snc install)
+       @$(call make_as_parent, -C tools/profile/perf install)
 
 PHONY += apps-clean
 apps-clean:
        @$(call make_as_parent, -C tools/apps/busybox clean)
        @$(call make_as_parent, -C tools/profile/kprof2perf clean)
        @$(call make_as_parent, -C tools/apps/snc clean)
+       @$(call make_as_parent, -C tools/profile/perf clean)
 
 # Cross Compiler
 # =========================================================================
diff --git a/tools/profile/perf/.gitignore b/tools/profile/perf/.gitignore
new file mode 100644 (file)
index 0000000..bd14107
--- /dev/null
@@ -0,0 +1 @@
+perf
diff --git a/tools/profile/perf/Makefile b/tools/profile/perf/Makefile
new file mode 100644 (file)
index 0000000..bb737df
--- /dev/null
@@ -0,0 +1,42 @@
+# Do not:
+# o  use make's built-in rules and variables
+#    (this increases performance and avoids hard-to-debug behaviour);
+# o  print "Entering directory ...";
+MAKEFLAGS += -rR --no-print-directory
+
+# Overrides
+BUILDDIR ?= $(shell pwd)
+AKAROS_ROOT ?= $(BUILDDIR)/../../..
+MAKE_JOBS ?= 4
+KFS_ROOT ?= $(AKAROS_ROOT)/kern/kfs
+
+SOURCES = perf.c xlib.c perf_core.c akaros.c
+
+XCC = $(CROSS_COMPILE)gcc
+
+LIBS=-lperfmon
+
+PHONY := all
+all: perf
+
+
+PHONY += perf
+perf: $(SOURCES)
+       @$(XCC) $(ROS_CFLAGS) $(ROS_LDFLAGS) -o perf $(SOURCES) $(LIBS)
+
+
+PHONY += install
+install: all
+       @cp perf $(KFS_ROOT)/bin/perf
+
+
+PHONY += clean
+clean:
+       @rm -f perf
+
+
+PHONY += mrproper
+mrproper: clean
+
+
+.PHONY: $(PHONY)
diff --git a/tools/profile/perf/akaros.c b/tools/profile/perf/akaros.c
new file mode 100644 (file)
index 0000000..7885ffe
--- /dev/null
@@ -0,0 +1,133 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#include <ros/arch/arch.h>
+#include <ros/common.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <parlib/parlib.h>
+#include "xlib.h"
+#include "akaros.h"
+
+static const unsigned int llcores[] = {
+       0
+};
+
+void ros_get_low_latency_core_set(struct core_set *cores)
+{
+       for (size_t i = 0; i < COUNT_OF(llcores); i++)
+               ros_set_bit(cores, llcores[i]);
+}
+
+size_t ros_get_low_latency_core_count(void)
+{
+       return COUNT_OF(llcores);
+}
+
+size_t ros_total_cores(void)
+{
+       return max_vcores() + ros_get_low_latency_core_count();
+}
+
+void ros_parse_cores(const char *str, struct core_set *cores)
+{
+       unsigned int fcpu, ncpu;
+       char *dstr = xstrdup(str), *tok, *sptr, *sptr2;
+
+       ZERO_DATA(*cores);
+       for (tok = strtok_r(dstr, ":", &sptr); tok;
+                tok = strtok_r(NULL, ":", &sptr)) {
+               bool neg_core = FALSE;
+
+               if (*tok == '!') {
+                       neg_core = TRUE;
+                       tok++;
+               }
+               if (!strcmp(tok, "all")) {
+                       size_t max_cores = ros_total_cores();
+
+                       if (max_cores > MAX_NUM_CORES) {
+                               fprintf(stderr, "The number of system CPU exceeds the "
+                                               "structure limits: num_cores=%u limits=%u\n", max_cores,
+                                               CHAR_BIT * CORE_SET_SIZE);
+                               exit(1);
+                       }
+                       if (neg_core)
+                               memset(cores->core_set, 0,
+                                          DIV_ROUND_UP(max_cores, CHAR_BIT));
+                       else
+                               memset(cores->core_set, 0xff,
+                                          DIV_ROUND_UP(max_cores, CHAR_BIT));
+               } else if (!strcmp(tok, "llall")) {
+                       ros_get_low_latency_core_set(cores);
+               } else if (strchr(tok, '-')) {
+                       if (sscanf(tok, "%u-%u", &fcpu, &ncpu) != 2) {
+                               fprintf(stderr, "Invalid CPU range: %s\n", tok);
+                               exit(1);
+                       }
+                       if ((fcpu >= MAX_NUM_CORES) ||
+                               (ncpu >= MAX_NUM_CORES) || (fcpu > ncpu)) {
+                               fprintf(stderr, "CPU number out of bound: %u\n",
+                                               fcpu);
+                               exit(1);
+                       }
+                       for (; fcpu <= ncpu; fcpu++) {
+                               if (neg_core)
+                                       ros_clear_bit(cores->core_set, fcpu);
+                               else
+                                       ros_set_bit(cores->core_set, fcpu);
+                       }
+               } else {
+                       for (tok = strtok_r(tok, ".", &sptr2); tok;
+                                tok = strtok_r(NULL, ".", &sptr2)) {
+                               fcpu = atoi(tok);
+                               if (fcpu >= MAX_NUM_CORES) {
+                                       fprintf(stderr, "CPU number out of bound: %u\n",
+                                                       fcpu);
+                                       exit(1);
+                               }
+                               if (neg_core)
+                                       ros_clear_bit(cores->core_set, fcpu);
+                               else
+                                       ros_set_bit(cores->core_set, fcpu);
+                       }
+               }
+       }
+       free(dstr);
+}
+
+void ros_get_all_cores_set(struct core_set *cores)
+{
+       size_t max_cores = ros_total_cores();
+
+       memset(cores->core_set, 0xff, DIV_ROUND_UP(max_cores, CHAR_BIT));
+}
+
+void ros_not_core_set(struct core_set *dcs)
+{
+       size_t max_cores = ros_total_cores();
+
+       for (size_t i = 0; (max_cores > 0) && (i < sizeof(dcs->core_set)); i++) {
+               size_t nb = (max_cores >= CHAR_BIT) ? CHAR_BIT : max_cores;
+
+               dcs->core_set[i] = (~dcs->core_set[i]) & ((1 << nb) - 1);
+               max_cores -= nb;
+       }
+}
+
+void ros_and_core_sets(struct core_set *dcs, const struct core_set *scs)
+{
+       for (size_t i = 0; i < sizeof(dcs->core_set); i++)
+               dcs->core_set[i] &= scs->core_set[i];
+}
+
+void ros_or_core_sets(struct core_set *dcs, const struct core_set *scs)
+{
+       for (size_t i = 0; i < sizeof(dcs->core_set); i++)
+               dcs->core_set[i] |= scs->core_set[i];
+}
diff --git a/tools/profile/perf/akaros.h b/tools/profile/perf/akaros.h
new file mode 100644 (file)
index 0000000..98c9779
--- /dev/null
@@ -0,0 +1,48 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#pragma once
+
+#include <ros/arch/arch.h>
+#include <ros/common.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include <parlib/parlib.h>
+
+#define CORE_SET_SIZE DIV_ROUND_UP(MAX_NUM_CORES, CHAR_BIT)
+
+/* Not using sched.h CPU set because that file has definitions for a large
+ * number of APIs Akaros does not support.
+ * Making Akaros core_set.h visible in userslace might be a cleaner approach.
+ */
+struct core_set {
+       uint8_t core_set[CORE_SET_SIZE];
+};
+
+void ros_get_low_latency_core_set(struct core_set *cores);
+size_t ros_get_low_latency_core_count(void);
+size_t ros_total_cores(void);
+void ros_parse_cores(const char *str, struct core_set *cores);
+void ros_get_all_cores_set(struct core_set *cores);
+void ros_not_core_set(struct core_set *dcs);
+void ros_and_core_sets(struct core_set *dcs, const struct core_set *scs);
+void ros_or_core_sets(struct core_set *dcs, const struct core_set *scs);
+
+static inline void ros_set_bit(void *addr, size_t nbit)
+{
+       ((uint8_t *) addr)[nbit % CHAR_BIT] |= 1 << (nbit % CHAR_BIT);
+}
+
+static inline void ros_clear_bit(void *addr, size_t nbit)
+{
+       ((uint8_t *) addr)[nbit % CHAR_BIT] &= ~(1 << (nbit % CHAR_BIT));
+}
+
+static inline bool ros_get_bit(const void *addr, size_t nbit)
+{
+       return ((const uint8_t *) addr)[nbit % CHAR_BIT] & (1 << (nbit % CHAR_BIT));
+}
diff --git a/tools/profile/perf/perf.c b/tools/profile/perf/perf.c
new file mode 100644 (file)
index 0000000..77e6f53
--- /dev/null
@@ -0,0 +1,175 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <errno.h>
+#include <parlib/parlib.h>
+#include "xlib.h"
+#include "akaros.h"
+#include "perf_core.h"
+
+static struct perf_context_config perf_cfg = {
+       .perf_file = "#arch/perf",
+       .kpctl_file = "#kprof/kpctl",
+};
+
+static void usage(const char *prg)
+{
+       fprintf(stderr,
+                       "Use: %s {list,cpucaps,record} [-mkecxh] -- CMD [ARGS ...]\n"
+                       "\tlist            Lists all the available events and their meaning.\n"
+                       "\tcpucaps         Shows the system CPU capabilities in term of "
+                       "performance counters.\n"
+                       "\trecord           Setups the configured counters, runs CMD, and "
+                       "shows the values of the counters.\n"
+                       "Options:\n"
+                       "\t-m PATH          Sets the path of the PERF file ('%s').\n"
+                       "\t-k PATH          Sets the path of the KPROF control file "
+                       "('%s').\n"
+                       "\t-e EVENT_SPEC    Adds an event to be tracked.\n"
+                       "\t-c CPUS_STR      Selects the CPU set on which the counters "
+                       "should be active.\n"
+                       "\t                 The following format is supported for the CPU "
+                       "set:\n"
+                       "\t                   !      = Negates the following set\n"
+                       "\t                   all    = Enable all CPUs\n"
+                       "\t                   llall  = Enable all low latency CPUs\n"
+                       "\t                   I.J.K  = Enable CPUs I, J, and K\n"
+                       "\t                   N-M    = Enable CPUs from N to M, included\n"
+                       "\t                 Examples: all:!3.4.7  0-15:!3.5.7\n"
+                       "\t-x EVENT_RX      Sets the event name regular expression for "
+                       "list.\n"
+                       "\t-h               Displays this help screen.\n", prg,
+                       perf_cfg.perf_file, perf_cfg.kpctl_file);
+       exit(1);
+}
+
+static void show_perf_arch_info(const struct perf_arch_info *pai, FILE *file)
+{
+       fprintf(file,
+                       "PERF.version             = %u\n"
+                       "PERF.proc_arch_events    = %u\n"
+                       "PERF.bits_x_counter      = %u\n"
+                       "PERF.counters_x_proc     = %u\n"
+                       "PERF.bits_x_fix_counter  = %u\n"
+                       "PERF.fix_counters_x_proc = %u\n",
+                       pai->perfmon_version, pai->proc_arch_events, pai->bits_x_counter,
+                       pai->counters_x_proc, pai->bits_x_fix_counter,
+                       pai->fix_counters_x_proc);
+}
+
+static void run_process_and_wait(int argc, const char * const *argv,
+                                                                const struct core_set *cores)
+{
+       int pid, status;
+       size_t max_cores = ros_total_cores();
+       struct core_set pvcores;
+
+       pid = sys_proc_create(argv[0], strlen(argv[0]), argv, NULL, 0);
+       if (pid < 0) {
+               perror(argv[0]);
+               exit(1);
+       }
+
+       ros_get_low_latency_core_set(&pvcores);
+       ros_not_core_set(&pvcores);
+       ros_and_core_sets(&pvcores, cores);
+       for (size_t i = 0; i < max_cores; i++) {
+               if (ros_get_bit(&pvcores, i)) {
+                       if (sys_provision(pid, RES_CORES, i)) {
+                               fprintf(stderr,
+                                               "Unable to provision CPU %lu to PID %d: cmd='%s'\n",
+                                               i, pid, argv[0]);
+                               exit(1);
+                       }
+               }
+       }
+
+       sys_proc_run(pid);
+       waitpid(pid, &status, 0);
+}
+
+int main(int argc, const char * const *argv)
+{
+       int i, icmd = -1, num_events = 0;
+       const char *cmd = argv[1], *show_rx = NULL;
+       struct perf_context *pctx;
+       struct core_set cores;
+       const char *events[MAX_CPU_EVENTS];
+
+       ros_get_all_cores_set(&cores);
+
+       for (i = 2; i < argc; i++) {
+               if (!strcmp(argv[i], "-m")) {
+                       if (++i < argc)
+                               perf_cfg.perf_file = argv[i];
+               } else if (!strcmp(argv[i], "-k")) {
+                       if (++i < argc)
+                               perf_cfg.kpctl_file = argv[i];
+               } else if (!strcmp(argv[i], "-e")) {
+                       if (++i < argc) {
+                               if (num_events >= MAX_CPU_EVENTS) {
+                                       fprintf(stderr, "Too many events: %d\n", num_events);
+                                       return 1;
+                               }
+                               events[num_events++] = argv[i];
+                       }
+               } else if (!strcmp(argv[i], "-x")) {
+                       if (++i < argc)
+                               show_rx = argv[i];
+               } else if (!strcmp(argv[i], "-c")) {
+                       if (++i < argc)
+                               ros_parse_cores(argv[i], &cores);
+               } else if (!strcmp(argv[i], "--")) {
+                       icmd = i + 1;
+                       break;
+               } else {
+                       usage(argv[0]);
+               }
+       }
+       if (!cmd)
+               usage(argv[0]);
+
+       perf_initialize(argc, argv);
+       pctx = perf_create_context(&perf_cfg);
+
+       if (!strcmp(cmd, "list")) {
+               perf_show_events(show_rx, stdout);
+       } else if (!strcmp(cmd, "cpucaps")) {
+               show_perf_arch_info(perf_context_get_arch_info(pctx), stdout);
+       } else if (!strcmp(cmd, "record")) {
+               if (icmd < 0)
+                       usage(argv[0]);
+
+               for (i = 0; i < num_events; i++) {
+                       struct perf_eventsel sel;
+
+                       perf_parse_event(events[i], &sel);
+                       perf_context_event_submit(pctx, &cores, &sel);
+               }
+
+               if (!strcmp(argv[icmd], "sleep") && (icmd + 1) < argc)
+                       sleep(atoi(argv[icmd + 1]));
+               else
+                       run_process_and_wait(argc - icmd, argv + icmd, &cores);
+
+               perf_context_show_values(pctx, stdout);
+       } else {
+               usage(argv[0]);
+       }
+       perf_free_context(pctx);
+       perf_finalize();
+
+       return 0;
+}
diff --git a/tools/profile/perf/perf_core.c b/tools/profile/perf/perf_core.c
new file mode 100644 (file)
index 0000000..06f2b25
--- /dev/null
@@ -0,0 +1,618 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#include <ros/arch/msr-index.h>
+#include <ros/arch/perfmon.h>
+#include <ros/common.h>
+#include <ros/memops.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <limits.h>
+#include <errno.h>
+#include <regex.h>
+#include <parlib/parlib.h>
+#include <perfmon/err.h>
+#include <perfmon/pfmlib.h>
+#include "xlib.h"
+#include "akaros.h"
+#include "perf_core.h"
+
+struct event_coords {
+       char *buffer;
+       const char *event;
+       const char *umask;
+};
+
+static const uint32_t invalid_mask = (uint32_t) -1;
+
+static void perf_parse_event_coords(const char *name, struct event_coords *ec)
+{
+       const char *cptr = strchr(name, ':');
+
+       if (cptr == NULL) {
+               ec->buffer = NULL;
+               ec->event = name;
+               ec->umask = NULL;
+       } else {
+               size_t cpos = cptr - name;
+
+               ec->buffer = xstrdup(name);
+               ec->event = ec->buffer;
+               ec->umask = ec->buffer + cpos + 1;
+               ec->buffer[cpos] = 0;
+       }
+}
+
+static void perf_free_event_coords(struct event_coords *ec)
+{
+       free(ec->buffer);
+}
+
+static const char *perf_get_event_mask_name(const pfm_event_info_t *einfo,
+                                                                                       uint32_t mask)
+{
+       int i;
+       pfm_event_attr_info_t ainfo;
+
+       ZERO_DATA(ainfo);
+       ainfo.size = sizeof(ainfo);
+       pfm_for_each_event_attr(i, einfo) {
+               pfm_err_t err = pfm_get_event_attr_info(einfo->idx, i, PFM_OS_NONE,
+                                                                                               &ainfo);
+
+               if (err != PFM_SUCCESS) {
+                       fprintf(stderr, "Failed to get attribute info: %s\n",
+                                       pfm_strerror(err));
+                       exit(1);
+               }
+               if (ainfo.type == PFM_ATTR_UMASK) {
+                       if (mask == (uint32_t) ainfo.code)
+                               return ainfo.name;
+               }
+       }
+
+       return NULL;
+}
+
+static int perf_resolve_event_name(const char *name, uint32_t *event,
+                                                                  uint32_t *mask, uint32_t mask_hint)
+{
+       int idx;
+       struct event_coords ec;
+
+       perf_parse_event_coords(name, &ec);
+
+       idx = pfm_find_event(ec.event);
+       if (idx >= 0) {
+               int i;
+               pfm_err_t err;
+               pfm_event_info_t einfo;
+               pfm_event_attr_info_t ainfo;
+
+               ZERO_DATA(einfo);
+               einfo.size = sizeof(einfo);
+               err = pfm_get_event_info(idx, PFM_OS_NONE, &einfo);
+               if (err != PFM_SUCCESS) {
+                       fprintf(stderr, "Unable to retrieve event (%s) info: %s\n",
+                                       name, pfm_strerror(err));
+                       exit(1);
+               }
+
+               *event = (uint32_t) einfo.code;
+               *mask = invalid_mask;
+
+               ZERO_DATA(ainfo);
+               ainfo.size = sizeof(ainfo);
+               pfm_for_each_event_attr(i, &einfo) {
+                       err = pfm_get_event_attr_info(idx, i, PFM_OS_NONE, &ainfo);
+                       if (err != PFM_SUCCESS) {
+                               fprintf(stderr, "Failed to get attribute info: %s\n",
+                                               pfm_strerror(err));
+                               exit(1);
+                       }
+                       if (ainfo.type == PFM_ATTR_UMASK) {
+                               if (mask_hint != invalid_mask) {
+                                       if (mask_hint == (uint32_t) ainfo.code) {
+                                               *mask = (uint32_t) ainfo.code;
+                                               break;
+                                       }
+                               } else if (!ec.umask) {
+                                       *mask = (uint32_t) ainfo.code;
+                                       if (ainfo.is_dfl)
+                                               break;
+                               } else if (!strcmp(ec.umask, ainfo.name)) {
+                                       *mask = (uint32_t) ainfo.code;
+                                       break;
+                               }
+                       }
+               }
+       }
+       perf_free_event_coords(&ec);
+
+       return idx;
+}
+
+static int perf_find_event_by_id(uint32_t event, uint32_t mask)
+{
+       int pmu;
+       pfm_pmu_info_t pinfo;
+       pfm_event_info_t info;
+
+    ZERO_DATA(pinfo);
+    pinfo.size = sizeof(pinfo);
+    ZERO_DATA(info);
+    info.size = sizeof(info);
+
+       pfm_for_all_pmus(pmu) {
+               pfm_err_t err = pfm_get_pmu_info(pmu, &pinfo);
+
+               if (err != PFM_SUCCESS || !pinfo.is_present)
+                       continue;
+
+               for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
+                       uint32_t cevent, cmask;
+
+                       err = pfm_get_event_info(i, PFM_OS_NONE, &info);
+                       if (err != PFM_SUCCESS) {
+                               fprintf(stderr, "Failed to get event info: %s\n",
+                                               pfm_strerror(err));
+                               exit(1);
+                       }
+                       if (perf_resolve_event_name(info.name, &cevent, &cmask, mask) != i)
+                               continue;
+                       if ((cevent == event) && (cmask == mask))
+                               return i;
+               }
+       }
+
+       return -1;
+}
+
+void perf_initialize(int argc, const char * const *argv)
+{
+       pfm_err_t err = pfm_initialize();
+
+       if (err != PFM_SUCCESS) {
+               fprintf(stderr, "Unable to initialize perfmon library: %s\n",
+                               pfm_strerror(err));
+               exit(1);
+       }
+}
+
+void perf_finalize(void)
+{
+       pfm_terminate();
+}
+
+void perf_parse_event(const char *str, struct perf_eventsel *sel)
+{
+       static const char *const event_spec =
+               "{EVENT_ID:MASK,EVENT_NAME:MASK_NAME}[,os[={0,1}]][,usr[={0,1}]]"
+               "[,int[={0,1}]][,invcmsk[={0,1}]][,cmask=MASK][,icount=COUNT]";
+       char *dstr = xstrdup(str), *sptr, *tok, *ev;
+
+       tok = strtok_r(dstr, ",", &sptr);
+       if (tok == NULL) {
+               fprintf(stderr, "Invalid event spec string: '%s'\n\t%s\n", str,
+                               event_spec);
+               exit(1);
+       }
+       ZERO_DATA(*sel);
+       sel->eidx = -1;
+       sel->ev.flags = 0;
+       sel->ev.u.v = 0;
+       sel->ev.u.b.os = 1;
+       sel->ev.u.b.usr = 1;
+       sel->ev.u.b.en = 1;
+       if (isdigit(*tok)) {
+               ev = strchr(tok, ':');
+               if (ev == NULL) {
+                       fprintf(stderr, "Invalid event spec string: '%s'\n"
+                                       "\tShould be: %s\n", str, event_spec);
+                       exit(1);
+               }
+               *ev++ = 0;
+               sel->ev.u.b.event = (uint8_t) strtoul(tok, NULL, 0);
+               sel->ev.u.b.mask = (uint8_t) strtoul(ev, NULL, 0);
+       } else {
+               uint32_t event, mask;
+
+               sel->eidx = perf_resolve_event_name(tok, &event, &mask, invalid_mask);
+               if (sel->eidx < 0) {
+                       fprintf(stderr, "Unable to find event: %s\n", tok);
+                       exit(1);
+               }
+               sel->ev.u.b.event = (uint8_t) event;
+               sel->ev.u.b.mask = (uint8_t) mask;
+       }
+       while ((tok = strtok_r(NULL, ",", &sptr)) != NULL) {
+               ev = strchr(tok, '=');
+               if (ev)
+                       *ev++ = 0;
+               if (!strcmp(tok, "os")) {
+                       sel->ev.u.b.os = (ev == NULL || atoi(ev) != 0) ? 1 : 0;
+               } else if (!strcmp(tok, "usr")) {
+                       sel->ev.u.b.usr = (ev == NULL || atoi(ev) != 0) ? 1 : 0;
+               } else if (!strcmp(tok, "int")) {
+                       sel->ev.u.b.inten = (ev == NULL || atoi(ev) != 0) ? 1 : 0;
+               } else if (!strcmp(tok, "invcmsk")) {
+                       sel->ev.u.b.invcmsk = (ev == NULL || atoi(ev) != 0) ? 1 : 0;
+               } else if (!strcmp(tok, "cmask")) {
+                       if (ev == NULL) {
+                               fprintf(stderr, "Invalid event spec string: '%s'\n"
+                                               "\tShould be: %s\n", str, event_spec);
+                               exit(1);
+                       }
+                       sel->ev.u.b.cmask = (uint32_t) strtoul(ev, NULL, 0);
+               } else if (!strcmp(tok, "icount")) {
+                       if (ev == NULL) {
+                               fprintf(stderr, "Invalid event spec string: '%s'\n"
+                                               "\tShould be: %s\n", str, event_spec);
+                               exit(1);
+                       }
+                       sel->ev.trigger_count = (uint64_t) strtoul(ev, NULL, 0);
+               }
+       }
+       if (sel->ev.u.b.inten && !sel->ev.trigger_count) {
+               fprintf(stderr,
+                               "Counter trigger count for interrupt is too small: %lu\n",
+                               sel->ev.trigger_count);
+               exit(1);
+       }
+       free(dstr);
+}
+
+static void perf_get_arch_info(int perf_fd, struct perf_arch_info *pai)
+{
+       uint8_t cmdbuf[6 * sizeof(uint32_t)];
+       const uint8_t *rptr = cmdbuf;
+
+       cmdbuf[0] = PERFMON_CMD_CPU_CAPS;
+
+       xpwrite(perf_fd, cmdbuf, 1, 0);
+       xpread(perf_fd, cmdbuf, 6 * sizeof(uint32_t), 0);
+
+       rptr = get_le_u32(rptr, &pai->perfmon_version);
+       rptr = get_le_u32(rptr, &pai->proc_arch_events);
+       rptr = get_le_u32(rptr, &pai->bits_x_counter);
+       rptr = get_le_u32(rptr, &pai->counters_x_proc);
+       rptr = get_le_u32(rptr, &pai->bits_x_fix_counter);
+       rptr = get_le_u32(rptr, &pai->fix_counters_x_proc);
+}
+
+static int perf_open_event(int perf_fd, const struct core_set *cores,
+                                                  const struct perf_eventsel *sel)
+{
+       uint8_t cmdbuf[1 + 3 * sizeof(uint64_t) + sizeof(uint32_t) +
+                                  CORE_SET_SIZE];
+       uint8_t *wptr = cmdbuf;
+       const uint8_t *rptr = cmdbuf;
+       uint32_t ped;
+       int i, j;
+
+       *wptr++ = PERFMON_CMD_COUNTER_OPEN;
+       wptr = put_le_u64(wptr, sel->ev.u.v);
+       wptr = put_le_u64(wptr, sel->ev.flags);
+       wptr = put_le_u64(wptr, sel->ev.trigger_count);
+
+       for (i = CORE_SET_SIZE - 1; (i >= 0) && !cores->core_set[i]; i--)
+               ;
+       if (i < 0) {
+               fprintf(stderr, "Performance event CPU set must not be empty\n");
+               exit(1);
+       }
+       wptr = put_le_u32(wptr, i + 1);
+       for (j = 0; j <= i; j++)
+               *wptr++ = cores->core_set[j];
+
+       xpwrite(perf_fd, cmdbuf, wptr - cmdbuf, 0);
+       xpread(perf_fd, cmdbuf, sizeof(uint32_t), 0);
+
+       rptr = get_le_u32(rptr, &ped);
+
+       return (int) ped;
+}
+
+static uint64_t *perf_get_event_values(int perf_fd, int ped,
+                                                                          struct perf_eventsel *sel,
+                                                                          size_t *pnvalues)
+{
+       ssize_t rsize;
+       uint32_t i, n;
+       uint64_t *values;
+       size_t bufsize = 3 * sizeof(uint64_t) + sizeof(uint32_t) +
+               MAX_NUM_CORES * sizeof(uint64_t);
+       uint8_t *cmdbuf = xmalloc(bufsize);
+       uint8_t *wptr = cmdbuf;
+       const uint8_t *rptr = cmdbuf;
+
+       *wptr++ = PERFMON_CMD_COUNTER_STATUS;
+       wptr = put_le_u32(wptr, ped);
+
+       xpwrite(perf_fd, cmdbuf, wptr - cmdbuf, 0);
+       rsize = pread(perf_fd, cmdbuf, bufsize, 0);
+
+       if (rsize < (3 * sizeof(uint64_t) + sizeof(uint32_t))) {
+               fprintf(stderr, "Invalid read size while fetching event status: %ld\n",
+                               rsize);
+               exit(1);
+       }
+
+       rptr = get_le_u64(rptr, &sel->ev.u.v);
+       rptr = get_le_u64(rptr, &sel->ev.flags);
+       rptr = get_le_u64(rptr, &sel->ev.trigger_count);
+       rptr = get_le_u32(rptr, &n);
+       if (((rptr - cmdbuf) + n * sizeof(uint64_t)) > rsize) {
+               fprintf(stderr, "Invalid read size while fetching event status: %ld\n",
+                               rsize);
+               exit(1);
+       }
+       values = xmalloc(n * sizeof(uint64_t));
+       for (i = 0; i < n; i++)
+               rptr = get_le_u64(rptr, values + i);
+       free(cmdbuf);
+
+       *pnvalues = n;
+
+       return values;
+}
+
+static void perf_close_event(int perf_fd, int ped)
+{
+       uint8_t cmdbuf[1 + sizeof(uint32_t)];
+       uint8_t *wptr = cmdbuf;
+
+       *wptr++ = PERFMON_CMD_COUNTER_CLOSE;
+       wptr = put_le_u32(wptr, ped);
+
+       xpwrite(perf_fd, cmdbuf, wptr - cmdbuf, 0);
+}
+
+static void perf_enable_sampling(int kpctl_fd)
+{
+       static const char * const enable_str = "start";
+
+       xwrite(kpctl_fd, enable_str, strlen(enable_str));
+}
+
+static void perf_disable_sampling(int kpctl_fd)
+{
+       static const char * const disable_str = "stop";
+
+       xwrite(kpctl_fd, disable_str, strlen(disable_str));
+}
+
+struct perf_context *perf_create_context(const struct perf_context_config *cfg)
+{
+       struct perf_context *pctx = xzmalloc(sizeof(struct perf_context));
+
+       pctx->perf_fd = xopen(cfg->perf_file, O_RDWR, 0);
+       pctx->kpctl_fd = xopen(cfg->kpctl_file, O_RDWR, 0);
+       perf_get_arch_info(pctx->perf_fd, &pctx->pai);
+       perf_enable_sampling(pctx->kpctl_fd);
+
+       return pctx;
+}
+
+void perf_free_context(struct perf_context *pctx)
+{
+       for (int i = 0; i < pctx->event_count; i++)
+               perf_close_event(pctx->perf_fd, pctx->events[i].ped);
+       perf_disable_sampling(pctx->kpctl_fd);
+       close(pctx->kpctl_fd);
+       close(pctx->perf_fd);
+       free(pctx);
+}
+
+void perf_context_event_submit(struct perf_context *pctx,
+                                                          const struct core_set *cores,
+                                                          const struct perf_eventsel *sel)
+{
+       struct perf_event *pevt = pctx->events + pctx->event_count;
+
+       if (pctx->event_count >= COUNT_OF(pctx->events)) {
+               fprintf(stderr, "Too many open events: %d\n", pctx->event_count);
+               exit(1);
+       }
+       pctx->event_count++;
+       pevt->cores = *cores;
+       pevt->sel = *sel;
+       pevt->ped = perf_open_event(pctx->perf_fd, cores, sel);
+}
+
+void perf_context_show_values(struct perf_context *pctx, FILE *file)
+{
+       for (int i = 0; i < pctx->event_count; i++) {
+               size_t nvalues;
+               struct perf_eventsel sel;
+               uint64_t *values = perf_get_event_values(pctx->perf_fd,
+                                                                                                pctx->events[i].ped, &sel,
+                                                                                                &nvalues);
+               char ename[256];
+
+               perf_get_event_string(&pctx->events[i].sel, ename, sizeof(ename));
+               fprintf(file, "Event: %s\n\t", ename);
+               for (size_t j = 0; j < nvalues; j++)
+                       fprintf(file, "%lu ", values[j]);
+               fprintf(file, "\n");
+
+               free(values);
+       }
+}
+
+static void perf_print_event_flags(const pfm_event_info_t *info, FILE *file)
+{
+       int n = 0;
+
+       if (info->is_precise) {
+               fputs("[precise] ", file);
+               n++;
+       }
+       if (!n)
+               fputs("None", file);
+}
+
+static void perf_print_attr_flags(const pfm_event_attr_info_t *info, FILE *file)
+{
+       int n = 0;
+
+       if (info->is_dfl) {
+               fputs("[default] ", file);
+               n++;
+       }
+       if (info->is_precise) {
+               fputs("[precise] ", file);
+               n++;
+       }
+       if (!n)
+               fputs("None ", file);
+}
+
+static void perf_show_event_info(const pfm_event_info_t *info,
+                                                                const pfm_pmu_info_t *pinfo, FILE *file)
+{
+       static const char * const srcs[PFM_ATTR_CTRL_MAX] = {
+               [PFM_ATTR_CTRL_UNKNOWN] = "???",
+               [PFM_ATTR_CTRL_PMU] = "PMU",
+               [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+       };
+       pfm_event_attr_info_t ainfo;
+       int i, mod = 0, um = 0;
+
+       fprintf(file, "#-----------------------------\n"
+                       "IDX      : %d\n"
+                       "PMU name : %s (%s)\n"
+                       "Name     : %s\n"
+                       "Equiv    : %s\n",
+                       info->idx, pinfo->name, pinfo->desc,
+                       info->name, info->equiv ? info->equiv : "None");
+
+       fprintf(file, "Flags    : ");
+       perf_print_event_flags(info, file);
+       fputc('\n', file);
+
+       fprintf(file, "Desc     : %s\n", info->desc ? info->desc :
+                       "no description available");
+       fprintf(file, "Code     : 0x%"PRIx64"\n", info->code);
+
+       ZERO_DATA(ainfo);
+       ainfo.size = sizeof(ainfo);
+
+       pfm_for_each_event_attr(i, info) {
+               const char *src;
+               pfm_err_t err = pfm_get_event_attr_info(info->idx, i, PFM_OS_NONE,
+                                                                                               &ainfo);
+
+               if (err != PFM_SUCCESS) {
+                       fprintf(stderr, "Failed to get attribute info: %s\n",
+                                       pfm_strerror(err));
+                       exit(1);
+               }
+
+               if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX) {
+                       fprintf(stderr, "event: %s has unsupported attribute source %d",
+                                       info->name, ainfo.ctrl);
+                       ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+               }
+               src = srcs[ainfo.ctrl];
+               switch (ainfo.type) {
+                       case PFM_ATTR_UMASK:
+                               fprintf(file, "Umask-%02u : 0x%02"PRIx64" : %s : [%s] : ",
+                                               um, ainfo.code, src, ainfo.name);
+                               perf_print_attr_flags(&ainfo, file);
+                               fputc(':', file);
+                               if (ainfo.equiv)
+                                       fprintf(file, " Alias to %s", ainfo.equiv);
+                               else
+                                       fprintf(file, " %s", ainfo.desc);
+                               fputc('\n', file);
+                               um++;
+                               break;
+                       case PFM_ATTR_MOD_BOOL:
+                               fprintf(file, "Modif-%02u : 0x%02"PRIx64" : %s : [%s] : "
+                                               "%s (boolean)\n", mod, ainfo.code, src, ainfo.name,
+                                               ainfo.desc);
+                               mod++;
+                               break;
+                       case PFM_ATTR_MOD_INTEGER:
+                               fprintf(file, "Modif-%02u : 0x%02"PRIx64" : %s : [%s] : "
+                                               "%s (integer)\n", mod, ainfo.code, src, ainfo.name,
+                                               ainfo.desc);
+                               mod++;
+                               break;
+                       default:
+                               fprintf(file, "Attr-%02u  : 0x%02"PRIx64" : %s : [%s] : %s\n",
+                                               i, ainfo.code, ainfo.name, src, ainfo.desc);
+               }
+       }
+}
+
+void perf_show_events(const char *rx, FILE *file)
+{
+       int pmu;
+       regex_t crx;
+       pfm_pmu_info_t pinfo;
+       pfm_event_info_t info;
+       char fullname[256];
+
+       if (rx && regcomp(&crx, rx, REG_ICASE)) {
+               fprintf(stderr, "Failed to compile event regex: '%s'\n", rx);
+               exit(1);
+       }
+
+    ZERO_DATA(pinfo);
+    pinfo.size = sizeof(pinfo);
+    ZERO_DATA(info);
+    info.size = sizeof(info);
+
+       pfm_for_all_pmus(pmu) {
+               pfm_err_t err = pfm_get_pmu_info(pmu, &pinfo);
+
+               if (err != PFM_SUCCESS || !pinfo.is_present)
+                       continue;
+
+               for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
+                       err = pfm_get_event_info(i, PFM_OS_NONE, &info);
+                       if (err != PFM_SUCCESS) {
+                               fprintf(stderr, "Failed to get event info: %s\n",
+                                               pfm_strerror(err));
+                               exit(1);
+                       }
+                       snprintf(fullname, sizeof(fullname), "%s::%s", pinfo.name,
+                                        info.name);
+                       if (!rx || regexec(&crx, fullname, 0, NULL, 0) == 0)
+                               perf_show_event_info(&info, &pinfo, file);
+               }
+       }
+       if (rx)
+               regfree(&crx);
+}
+
+void perf_get_event_string(const struct perf_eventsel *sel, char *sbuf,
+                                                  size_t size)
+{
+       pfm_event_info_t einfo;
+
+    ZERO_DATA(einfo);
+    einfo.size = sizeof(einfo);
+       if ((sel->eidx >= 0) &&
+               (pfm_get_event_info(sel->eidx, PFM_OS_NONE, &einfo) == PFM_SUCCESS)) {
+               const char *mask_name = perf_get_event_mask_name(&einfo,
+                                                                                                                sel->ev.u.b.mask);
+
+               if (mask_name)
+                       snprintf(sbuf, size, "%s:%s", einfo.name, mask_name);
+               else
+                       snprintf(sbuf, size, "%s", einfo.name);
+       } else {
+               snprintf(sbuf, size, "0x%02x:0x%02x", sel->ev.u.b.event,
+                                sel->ev.u.b.mask);
+       }
+}
diff --git a/tools/profile/perf/perf_core.h b/tools/profile/perf/perf_core.h
new file mode 100644 (file)
index 0000000..6dfbd5d
--- /dev/null
@@ -0,0 +1,69 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#pragma once
+
+#include <ros/arch/arch.h>
+#include <ros/arch/perfmon.h>
+#include <ros/common.h>
+#include <sys/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <limits.h>
+#include "akaros.h"
+
+#define MAX_CPU_EVENTS 256
+
+struct perf_arch_info {
+       uint32_t perfmon_version;
+       uint32_t proc_arch_events;
+       uint32_t bits_x_counter;
+       uint32_t counters_x_proc;
+       uint32_t bits_x_fix_counter;
+       uint32_t fix_counters_x_proc;
+};
+
+struct perf_eventsel {
+       struct perfmon_event ev;
+       int eidx;
+};
+
+struct perf_event {
+       struct core_set cores;
+       struct perf_eventsel sel;
+       int ped;
+};
+
+struct perf_context {
+       int perf_fd;
+       int kpctl_fd;
+       struct perf_arch_info pai;
+       int event_count;
+       struct perf_event events[MAX_CPU_EVENTS];
+};
+
+struct perf_context_config {
+       const char *perf_file;
+       const char *kpctl_file;
+};
+
+void perf_initialize(int argc, const char * const *argv);
+void perf_finalize(void);
+void perf_parse_event(const char *str, struct perf_eventsel *sel);
+struct perf_context *perf_create_context(const struct perf_context_config *cfg);
+void perf_free_context(struct perf_context *pctx);
+void perf_context_event_submit(struct perf_context *pctx,
+                                                          const struct core_set *cores,
+                                                          const struct perf_eventsel *sel);
+void perf_context_show_values(struct perf_context *pctx, FILE *file);
+void perf_show_events(const char *rx, FILE *file);
+void perf_get_event_string(const struct perf_eventsel *sel, char *sbuf,
+                                                  size_t size);
+
+static inline const struct perf_arch_info *perf_context_get_arch_info(
+       const struct perf_context *pctx)
+{
+       return &pctx->pai;
+}
diff --git a/tools/profile/perf/xlib.c b/tools/profile/perf/xlib.c
new file mode 100644 (file)
index 0000000..622f6f4
--- /dev/null
@@ -0,0 +1,100 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "xlib.h"
+
+int xopen(const char *path, int flags, mode_t mode)
+{
+       int fd = open(path, flags, mode);
+
+       if (fd < 0) {
+               perror(path);
+               exit(1);
+       }
+
+       return fd;
+}
+
+void xwrite(int fd, const void *data, size_t size)
+{
+       ssize_t wcount = write(fd, data, size);
+
+       if (size != (size_t) wcount) {
+               perror("Writing file");
+               exit(1);
+       }
+}
+
+void xread(int fd, void *data, size_t size)
+{
+       ssize_t rcount = read(fd, data, size);
+
+       if (size != (size_t) rcount) {
+               perror("Reading file");
+               exit(1);
+       }
+}
+
+void xpwrite(int fd, const void *data, size_t size, off_t off)
+{
+       ssize_t wcount = pwrite(fd, data, size, off);
+
+       if (size != (size_t) wcount) {
+               perror("Writing file");
+               exit(1);
+       }
+}
+
+void xpread(int fd, void *data, size_t size, off_t off)
+{
+       ssize_t rcount = pread(fd, data, size, off);
+
+       if (size != (size_t) rcount) {
+               perror("Reading file");
+               exit(1);
+       }
+}
+
+void *xmalloc(size_t size)
+{
+       void *data = malloc(size);
+
+       if (!data) {
+               perror("Allocating memory block");
+               exit(1);
+       }
+
+       return data;
+}
+
+void *xzmalloc(size_t size)
+{
+       void *data = xmalloc(size);
+
+       memset(data, 0, size);
+
+       return data;
+}
+
+char *xstrdup(const char *str)
+{
+       char *dstr = strdup(str);
+
+       if (dstr == NULL) {
+               perror("Duplicating a string");
+               exit(1);
+       }
+
+       return dstr;
+}
diff --git a/tools/profile/perf/xlib.h b/tools/profile/perf/xlib.h
new file mode 100644 (file)
index 0000000..4289557
--- /dev/null
@@ -0,0 +1,42 @@
+/* Copyright (c) 2015 Google Inc
+ * Davide Libenzi <dlibenzi@google.com>
+ * See LICENSE for details.
+ */
+
+#pragma once
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int xopen(const char *path, int flags, mode_t mode);
+void xwrite(int fd, const void *data, size_t size);
+void xread(int fd, void *data, size_t size);
+void xpwrite(int fd, const void *data, size_t size, off_t off);
+void xpread(int fd, void *data, size_t size, off_t off);
+void *xmalloc(size_t size);
+void *xzmalloc(size_t size);
+char *xstrdup(const char *str);
+
+static inline void cpuid(uint32_t ieax, uint32_t iecx, uint32_t *eaxp,
+                         uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
+{
+       uint32_t eax, ebx, ecx, edx;
+
+       asm volatile("cpuid"
+                                : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+                                : "a" (ieax), "c" (iecx));
+       if (eaxp)
+               *eaxp = eax;
+       if (ebxp)
+               *ebxp = ebx;
+       if (ecxp)
+               *ecxp = ecx;
+       if (edxp)
+               *edxp = edx;
+}