Add load, safe load, read xcr0 functions
[akaros.git] / kern / arch / x86 / devarch.c
index 2c188b2..ca742d6 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * This file is part of the UCB release of Plan 9. It is subject to the license
  * terms in the LICENSE file found in the top-level directory of this
  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
@@ -7,31 +7,38 @@
  * in the LICENSE file.
  */
 
+#include <ros/memops.h>
 #include <vfs.h>
-#include <kfs.h>
-#include <slab.h>
 #include <kmalloc.h>
 #include <kref.h>
 #include <kthread.h>
 #include <string.h>
 #include <stdio.h>
 #include <assert.h>
-#include <error.h>
-#include <cpio.h>
+#include <err.h>
 #include <pmap.h>
 #include <umem.h>
 #include <smp.h>
 #include <ip.h>
 #include <time.h>
-#include <atomic.h>
+#include <bitops.h>
 #include <core_set.h>
-#include <completion.h>
 #include <address_range.h>
-#include <arch/uaccess.h>
+#include <arch/ros/perfmon.h>
+#include <arch/topology.h>
+#include <arch/perfmon.h>
+#include <arch/ros/msr-index.h>
+#include <arch/msr.h>
 #include <arch/devarch.h>
 
 #define REAL_MEM_SIZE (1024 * 1024)
 
+struct perf_context {
+       struct perfmon_session *ps;
+       size_t resp_size;
+       uint8_t *resp;
+};
+
 struct io_map {
        struct io_map *next;
        int reserved;
@@ -57,6 +64,7 @@ enum {
        Qgdb,
        Qrealmem,
        Qmsr,
+       Qperf,
 
        Qmax,
 };
@@ -65,17 +73,6 @@ enum {
        Linelen = 31,
 };
 
-struct smp_read_values {
-       uint64_t *values;
-       uint32_t addr;
-       atomic_t err;
-};
-struct smp_write_values {
-       uint32_t addr;
-       uint64_t value;
-       atomic_t err;
-};
-
 struct dev archdevtab;
 static struct dirtab archdir[Qmax] = {
        {".", {Qdir, 0, QTDIR}, 0, 0555},
@@ -86,65 +83,28 @@ static struct dirtab archdir[Qmax] = {
        {"gdb", {Qgdb, 0}, 0, 0660},
        {"realmem", {Qrealmem, 0}, 0, 0444},
        {"msr", {Qmsr, 0}, 0, 0666},
+       {"perf", {Qperf, 0}, 0, 0666},
 };
+/* White list entries needs to be ordered by start address, and never overlap.
+ */
+#define MSR_MAX_VAR_COUNTERS 16
+#define MSR_MAX_FIX_COUNTERS 4
 
+static const struct address_range msr_rd_wlist[] = {
+       ADDRESS_RANGE(0x00000000, 0xffffffff),
+};
+static const struct address_range msr_wr_wlist[] = {
+       ADDRESS_RANGE(MSR_IA32_PERFCTR0,
+                                 MSR_IA32_PERFCTR0 + MSR_MAX_VAR_COUNTERS - 1),
+       ADDRESS_RANGE(MSR_ARCH_PERFMON_EVENTSEL0,
+                                 MSR_ARCH_PERFMON_EVENTSEL0 + MSR_MAX_VAR_COUNTERS - 1),
+       ADDRESS_RANGE(MSR_IA32_PERF_CTL, MSR_IA32_PERF_CTL),
+       ADDRESS_RANGE(MSR_CORE_PERF_FIXED_CTR0,
+                                 MSR_CORE_PERF_FIXED_CTR0 + MSR_MAX_FIX_COUNTERS - 1),
+       ADDRESS_RANGE(MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL),
+};
 int gdbactive = 0;
 
-static void cpu_read_msr(void *opaque)
-{
-       int err, cpu = core_id();
-       struct smp_read_values *srv = (struct smp_read_values *) opaque;
-
-       err = safe_read_msr(srv->addr, &srv->values[cpu]);
-       if (unlikely(err))
-               atomic_cas(&srv->err, 0, err);
-}
-
-uint64_t *coreset_read_msr(const struct core_set *cset, uint32_t addr,
-                                                  size_t *nvalues)
-{
-       int err;
-       struct smp_read_values srv;
-
-       srv.addr = addr;
-       atomic_init(&srv.err, 0);
-       srv.values = kzmalloc(num_cores * sizeof(*srv.values), 0);
-       if (unlikely(!srv.values))
-               return ERR_PTR(-ENOMEM);
-       smp_do_in_cores(cset, cpu_read_msr, &srv);
-       err = (int) atomic_read(&srv.err);
-       if (unlikely(err)) {
-               kfree(srv.values);
-               return ERR_PTR(err);
-       }
-       *nvalues = num_cores;
-
-       return srv.values;
-}
-
-static void cpu_write_msr(void *opaque)
-{
-       int err;
-       struct smp_write_values *swv = (struct smp_write_values *) opaque;
-
-       err = safe_write_msr(swv->addr, swv->value);
-       if (unlikely(err))
-               atomic_cas(&swv->err, 0, err);
-}
-
-int coreset_write_msr(const struct core_set *cset, uint32_t addr,
-                                          uint64_t value)
-{
-       struct smp_write_values swv;
-
-       swv.addr = addr;
-       swv.value = value;
-       atomic_init(&swv.err, 0);
-       smp_do_in_cores(cset, cpu_write_msr, &swv);
-
-       return (int) atomic_read(&swv.err);
-}
-
 //
 //  alloc some io port space and remember who it was
 //  alloced to.  if port < 0, find a free region.
@@ -337,7 +297,7 @@ static void checkport(int start, int end)
 
        if (iounused(start, end))
                return;
-       error(EPERM, NULL);
+       error(EPERM, ERROR_FIXME);
 }
 
 static struct chan *archattach(char *spec)
@@ -358,25 +318,196 @@ static int archstat(struct chan *c, uint8_t * dp, int n)
        return devstat(c, dp, n, archdir, Qmax, devgen);
 }
 
+static struct perf_context *arch_create_perf_context(void)
+{
+       ERRSTACK(1);
+       struct perf_context *pc = kzmalloc(sizeof(struct perf_context),
+                                                                          KMALLOC_WAIT);
+
+       if (waserror()) {
+               kfree(pc);
+               nexterror();
+       }
+       pc->ps = perfmon_create_session();
+       poperror();
+
+       return pc;
+}
+
+static void arch_free_perf_context(struct perf_context *pc)
+{
+       if (likely(pc)) {
+               perfmon_close_session(pc->ps);
+               kfree(pc->resp);
+               kfree(pc);
+       }
+}
+
+static const uint8_t *arch_read_core_set(struct core_set *cset,
+                                                                                const uint8_t *kptr,
+                                                                                const uint8_t *ktop)
+{
+       int i, nb;
+       uint32_t n;
+
+       error_assert(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
+       kptr = get_le_u32(kptr, &n);
+       error_assert(EBADMSG, (kptr + n) <= ktop);
+       core_set_init(cset);
+       nb = MIN((int) n * 8, num_cores);
+       for (i = 0; i < nb; i++) {
+               if (test_bit(i, (const unsigned long *) kptr))
+                       core_set_setcpu(cset, i);
+       }
+
+       return kptr + n;
+}
+
+static long arch_perf_write(struct perf_context *pc, const void *udata,
+                                                       long usize)
+{
+       ERRSTACK(1);
+       void *kdata;
+       const uint8_t *kptr, *ktop;
+
+       kfree(pc->resp);
+       pc->resp = NULL;
+       pc->resp_size = 0;
+
+       kdata = user_memdup_errno(current, udata, usize);
+       if (unlikely(!kdata))
+               return -1;
+       if (waserror()) {
+               kfree(kdata);
+               nexterror();
+       }
+       kptr = kdata;
+       ktop = kptr + usize;
+       error_assert(EBADMSG, (kptr + 1) <= ktop);
+       switch (*kptr++) {
+               case PERFMON_CMD_COUNTER_OPEN: {
+                       int ped;
+                       struct perfmon_event pev;
+                       struct core_set cset;
+
+                       error_assert(EBADMSG, (kptr + 3 * sizeof(uint64_t)) <= ktop);
+                       perfmon_init_event(&pev);
+                       kptr = get_le_u64(kptr, &pev.event);
+                       kptr = get_le_u64(kptr, &pev.flags);
+                       kptr = get_le_u64(kptr, &pev.trigger_count);
+                       kptr = arch_read_core_set(&cset, kptr, ktop);
+
+                       ped = perfmon_open_event(&cset, pc->ps, &pev);
+
+                       pc->resp_size = sizeof(uint32_t);
+                       pc->resp = kmalloc(pc->resp_size, KMALLOC_WAIT);
+                       put_le_u32(pc->resp, (uint32_t) ped);
+                       break;
+               }
+               case PERFMON_CMD_COUNTER_STATUS: {
+                       int i;
+                       uint32_t ped;
+                       uint8_t *rptr;
+                       uint64_t *mvalues;
+                       struct perfmon_status *pef;
+
+                       error_assert(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
+                       kptr = get_le_u32(kptr, &ped);
+
+                       pef = perfmon_get_event_status(pc->ps, (int) ped);
+
+                       mvalues = kzmalloc(num_cores * sizeof(mvalues), KMALLOC_WAIT);
+                       for (i = 0; i < num_cores; i++)
+                               mvalues[i] = pef->cores_values[i];
+
+                       pc->resp_size = 3 * sizeof(uint64_t) + sizeof(uint32_t) +
+                               num_cores * sizeof(uint64_t);
+                       pc->resp = kmalloc(pc->resp_size, KMALLOC_WAIT);
+
+                       rptr = put_le_u64(pc->resp, pef->ev.event);
+                       rptr = put_le_u64(rptr, pef->ev.flags);
+                       rptr = put_le_u64(rptr, pef->ev.trigger_count);
+                       rptr = put_le_u32(rptr, num_cores);
+                       for (i = 0; i < num_cores; i++)
+                               rptr = put_le_u64(rptr, mvalues[i]);
+                       kfree(mvalues);
+                       perfmon_free_event_status(pef);
+                       break;
+               }
+               case PERFMON_CMD_COUNTER_CLOSE: {
+                       uint32_t ped;
+
+                       error_assert(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
+                       kptr = get_le_u32(kptr, &ped);
+
+                       perfmon_close_event(pc->ps, (int) ped);
+                       break;
+               }
+               case PERFMON_CMD_CPU_CAPS: {
+                       uint8_t *rptr;
+                       struct perfmon_cpu_caps pcc;
+
+                       kptr++;
+                       perfmon_get_cpu_caps(&pcc);
+
+                       pc->resp_size = 6 * sizeof(uint32_t);
+                       pc->resp = kmalloc(pc->resp_size, KMALLOC_WAIT);
+
+                       rptr = put_le_u32(pc->resp, pcc.perfmon_version);
+                       rptr = put_le_u32(rptr, pcc.proc_arch_events);
+                       rptr = put_le_u32(rptr, pcc.bits_x_counter);
+                       rptr = put_le_u32(rptr, pcc.counters_x_proc);
+                       rptr = put_le_u32(rptr, pcc.bits_x_fix_counter);
+                       rptr = put_le_u32(rptr, pcc.fix_counters_x_proc);
+                       break;
+               }
+               default:
+                       error(EINVAL, "Invalid perfmon command: 0x%x", kptr[-1]);
+       }
+       poperror();
+       kfree(kdata);
+
+       return (long) (kptr - (const uint8_t *) kdata);
+}
+
 static struct chan *archopen(struct chan *c, int omode)
 {
-       return devopen(c, omode, archdir, Qmax, devgen);
+       c = devopen(c, omode, archdir, Qmax, devgen);
+       switch ((uint32_t) c->qid.path) {
+               case Qperf:
+                       if (!perfmon_supported())
+                               error(ENODEV, "perf is not supported");
+                       assert(!c->aux);
+                       c->aux = arch_create_perf_context();
+                       break;
+       }
+
+       return c;
 }
 
-static void archclose(struct chan *unused)
+static void archclose(struct chan *c)
 {
+       switch ((uint32_t) c->qid.path) {
+               case Qperf:
+                       if (c->aux) {
+                               arch_free_perf_context((struct perf_context *) c->aux);
+                               c->aux = NULL;
+                       }
+                       break;
+       }
 }
 
 static long archread(struct chan *c, void *a, long n, int64_t offset)
 {
        char *buf, *p;
-       int port;
-       size_t nvalues;
+       int err, port;
        uint64_t *values;
        uint16_t *sp;
        uint32_t *lp;
        struct io_map *map;
        struct core_set cset;
+       struct msr_address msra;
+       struct msr_value msrv;
 
        switch ((uint32_t) c->qid.path) {
                case Qdir:
@@ -392,7 +523,7 @@ static long archread(struct chan *c, void *a, long n, int64_t offset)
                        return n;
                case Qiow:
                        if (n & 1)
-                               error(EINVAL, NULL);
+                               error(EINVAL, ERROR_FIXME);
                        checkport(offset, offset + n);
                        sp = a;
                        for (port = offset; port < offset + n; port += 2)
@@ -400,7 +531,7 @@ static long archread(struct chan *c, void *a, long n, int64_t offset)
                        return n;
                case Qiol:
                        if (n & 3)
-                               error(EINVAL, NULL);
+                               error(EINVAL, ERROR_FIXME);
                        checkport(offset, offset + n);
                        lp = a;
                        for (port = offset; port < offset + n; port += 4)
@@ -413,32 +544,53 @@ static long archread(struct chan *c, void *a, long n, int64_t offset)
                case Qmsr:
                        if (!address_range_find(msr_rd_wlist, ARRAY_SIZE(msr_rd_wlist),
                                                                        (uintptr_t) offset))
-                               error(EPERM, NULL);
+                               error(EPERM, ERROR_FIXME);
                        core_set_init(&cset);
                        core_set_fill_available(&cset);
-                       values = coreset_read_msr(&cset, (uint32_t) offset, &nvalues);
-                       if (likely(!IS_ERR(values))) {
-                               if (n >= nvalues * sizeof(uint64_t)) {
-                                       if (memcpy_to_user_errno(current, a, values,
-                                                                                        nvalues * sizeof(uint64_t)))
-                                               n = -1;
+                       msr_set_address(&msra, (uint32_t) offset);
+                       values = kzmalloc(num_cores * sizeof(uint64_t), KMALLOC_WAIT);
+                       if (!values)
+                               error(ENOMEM, ERROR_FIXME);
+                       msr_set_values(&msrv, values, num_cores);
+
+                       err = msr_cores_read(&cset, &msra, &msrv);
+
+                       if (likely(!err)) {
+                               if (n >= num_cores * sizeof(uint64_t)) {
+                                       if (!memcpy_to_user_errno(current, a, values,
+                                                                                         num_cores * sizeof(uint64_t)))
+                                               n = num_cores * sizeof(uint64_t);
                                        else
-                                               n = nvalues * sizeof(uint64_t);
+                                               n = -1;
                                } else {
                                        kfree(values);
-                                       error(ERANGE, NULL);
+                                       error(ERANGE, ERROR_FIXME);
                                }
-                               kfree(values);
                        } else {
-                               error(-PTR_ERR(values), NULL);
+                               n = -1;
                        }
+                       kfree(values);
+                       return n;
+               case Qperf: {
+                       struct perf_context *pc = (struct perf_context *) c->aux;
+
+                       assert(pc);
+                       if (pc->resp && ((size_t) offset < pc->resp_size)) {
+                               n = MIN(n, (long) pc->resp_size - (long) offset);
+                               if (memcpy_to_user_errno(current, a, pc->resp + offset, n))
+                                       n = -1;
+                       } else {
+                               n = 0;
+                       }
+
                        return n;
+               }
                default:
-                       error(EINVAL, NULL);
+                       error(EINVAL, ERROR_FIXME);
        }
 
        if ((buf = kzmalloc(n, 0)) == NULL)
-               error(ENOMEM, NULL);
+               error(ENOMEM, ERROR_FIXME);
        p = buf;
        n = n / Linelen;
        offset = offset / Linelen;
@@ -473,6 +625,8 @@ static long archwrite(struct chan *c, void *a, long n, int64_t offset)
        uint16_t *sp;
        uint32_t *lp;
        struct core_set cset;
+       struct msr_address msra;
+       struct msr_value msrv;
 
        switch ((uint32_t) c->qid.path) {
                case Qgdb:
@@ -494,7 +648,7 @@ static long archwrite(struct chan *c, void *a, long n, int64_t offset)
                        return n;
                case Qiow:
                        if (n & 1)
-                               error(EINVAL, NULL);
+                               error(EINVAL, ERROR_FIXME);
                        checkport(offset, offset + n);
                        sp = a;
                        for (port = offset; port < offset + n; port += 2)
@@ -502,7 +656,7 @@ static long archwrite(struct chan *c, void *a, long n, int64_t offset)
                        return n;
                case Qiol:
                        if (n & 3)
-                               error(EINVAL, NULL);
+                               error(EINVAL, ERROR_FIXME);
                        checkport(offset, offset + n);
                        lp = a;
                        for (port = offset; port < offset + n; port += 4)
@@ -511,19 +665,30 @@ static long archwrite(struct chan *c, void *a, long n, int64_t offset)
                case Qmsr:
                        if (!address_range_find(msr_wr_wlist, ARRAY_SIZE(msr_wr_wlist),
                                                                        (uintptr_t) offset))
-                               error(EPERM, NULL);
-                       core_set_init(&cset);
-                       core_set_fill_available(&cset);
+                               error(EPERM, ERROR_FIXME);
                        if (n != sizeof(uint64_t))
-                               error(EINVAL, NULL);
+                               error(EINVAL, ERROR_FIXME);
                        if (memcpy_from_user_errno(current, &value, a, sizeof(value)))
                                return -1;
-                       err = coreset_write_msr(&cset, (uint32_t) offset, value);
+
+                       core_set_init(&cset);
+                       core_set_fill_available(&cset);
+                       msr_set_address(&msra, (uint32_t) offset);
+                       msr_set_value(&msrv, value);
+
+                       err = msr_cores_write(&cset, &msra, &msrv);
                        if (unlikely(err))
-                               error(-err, NULL);
+                               error(-err, ERROR_FIXME);
                        return sizeof(uint64_t);
+               case Qperf: {
+                       struct perf_context *pc = (struct perf_context *) c->aux;
+
+                       assert(pc);
+
+                       return arch_perf_write(pc, a, n);
+               }
                default:
-                       error(EINVAL, NULL);
+                       error(EINVAL, ERROR_FIXME);
        }
        return 0;
 }