Completely restructured profiler code cutting all the unused code
authorDavide Libenzi <dlibenzi@google.com>
Tue, 20 Oct 2015 22:53:08 +0000 (15:53 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Wed, 18 Nov 2015 17:51:42 +0000 (09:51 -0800)
Completely restructured profiler code cutting all the unused code
that was imported from Linux.
Moved the kernel/userspace detection logic into the profiler code
to open the door to userspace stack traces.

Signed-off-by: Davide Libenzi <dlibenzi@google.com>
[checkpatch touchups]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/drivers/dev/kprof.c
kern/include/kdebug.h
kern/include/profiler.h [new file with mode: 0644]
kern/src/Kbuild
kern/src/oprofile/cpu_buffer.c [deleted file]
kern/src/oprofile/cpu_buffer.h [deleted file]
kern/src/profiler.c [new file with mode: 0644]

index 5d173c3..69b05b1 100644 (file)
@@ -36,7 +36,7 @@
 #include <pmap.h>
 #include <smp.h>
 #include <ip.h>
-#include <oprofile.h>
+#include <profiler.h>
 
 struct dev kprofdevtab;
 
@@ -101,10 +101,8 @@ static void oprof_alarm_handler(struct alarm_waiter *waiter,
 {
        int coreid = core_id();
        struct timer_chain *tchain = &per_cpu_info[coreid].tchain;
-       if (in_kernel(hw_tf))
-               oprofile_add_backtrace(get_hwtf_pc(hw_tf), get_hwtf_fp(hw_tf));
-       else
-               oprofile_add_userpc(get_hwtf_pc(hw_tf));
+
+       profiler_add_hw_sample(hw_tf);
        reset_alarm_rel(tchain, waiter, oprof_timer_period);
 }
 
@@ -186,8 +184,7 @@ static void kprofinit(void)
                kprof.buf_sz = n;
 
        /* no, i'm not sure how we should do this yet. */
-       int alloc_cpu_buffers(void);
-       alloc_cpu_buffers();
+       profiler_init();
        oprof_alarms = kzmalloc(sizeof(struct alarm_waiter) * num_cores,
                                KMALLOC_WAIT);
        if (!oprof_alarms)
@@ -212,8 +209,7 @@ static void kprofshutdown(void)
        kfree(oprof_alarms); oprof_alarms = NULL;
        kfree(kprof.buf); kprof.buf = NULL;
        qfree(kprof.systrace); kprof.systrace = NULL;
-       void free_cpu_buffers(void);
-       free_cpu_buffers();
+       profiler_cleanup();
 }
 
 static struct walkqid*
@@ -225,7 +221,7 @@ kprofwalk(struct chan *c, struct chan *nc, char **name, int nname)
 static int
 kprofstat(struct chan *c, uint8_t *db, int n)
 {
-       kproftab[Kprofoprofileqid].length = oproflen();
+       kproftab[Kprofoprofileqid].length = profiler_size();
        if (kprof.systrace)
                kproftab[Kptraceqid].length = qlen(kprof.systrace);
        else
@@ -407,7 +403,7 @@ kprofread(struct chan *c, void *va, long n, int64_t off)
                n = ret;
                break;
        case Kprofoprofileqid:
-               n = oprofread(va,n);
+               n = profiler_read(va, n);
                break;
        case Kptraceqid:
                if (kprof.systrace) {
@@ -511,9 +507,9 @@ kprofwrite(struct chan *c, void *a, long n, int64_t unused)
                                manage_oprof_timer(pcoreid, cb);
                        }
                } else if (!strcmp(cb->f[0], "opstart")) {
-                       oprofile_control_trace(1);
+                       profiler_control_trace(1);
                } else if (!strcmp(cb->f[0], "opstop")) {
-                       oprofile_control_trace(0);
+                       profiler_control_trace(0);
                } else {
                        error(EFAIL, ctlstring);
                }
@@ -524,7 +520,7 @@ kprofwrite(struct chan *c, void *a, long n, int64_t unused)
                 */
        case Kprofoprofileqid:
                pc = strtoul(a, 0, 0);
-               oprofile_add_trace(pc);
+               profiler_add_trace(pc);
                break;
        case Kprintxqid:
                if (!strncmp(a, "on", 2))
index 307d44f..10f62a5 100644 (file)
@@ -3,12 +3,15 @@
 #include <ros/common.h>
 #include <ros/trapframe.h>
 #include <arch/kdebug.h>
+#include <profiler.h>
 
 struct symtab_entry {
        char *name;
        uintptr_t addr;
 };
 
+#define TRACEME() oprofile_add_backtrace(read_pc(), read_bp())
+
 void backtrace(void);
 void backtrace_frame(uintptr_t pc, uintptr_t fp);
 size_t backtrace_list(uintptr_t pc, uintptr_t fp, uintptr_t *pcs,
@@ -41,8 +44,5 @@ void set_printx(int mode);
 #define printx(args...) if (printx_on) printk(args)
 #define trace_printx(args...) if (printx_on) trace_printk(args)
 
-#include <oprofile.h>
-#define TRACEME() oprofile_add_backtrace(read_pc(), read_bp())
-
 void debug_addr_proc(struct proc *p, unsigned long addr);
 void debug_addr_pid(int pid, unsigned long addr);
diff --git a/kern/include/profiler.h b/kern/include/profiler.h
new file mode 100644 (file)
index 0000000..4d13ebc
--- /dev/null
@@ -0,0 +1,18 @@
+
+#ifndef ROS_KERN_INC_PROFILER_H
+#define ROS_KERN_INC_PROFILER_H
+
+#include <sys/types.h>
+#include <trap.h>
+
+int profiler_init(void);
+void profiler_cleanup(void);
+void profiler_add_backtrace(uintptr_t pc, uintptr_t fp);
+void profiler_add_userpc(uintptr_t pc);
+void profiler_add_trace(uintptr_t eip);
+void profiler_control_trace(int onoff);
+void profiler_add_hw_sample(struct hw_trapframe *hw_tf);
+int profiler_read(void *va, int);
+int profiler_size(void);
+
+#endif /* ROS_KERN_INC_PROFILER_H */
index d2a9d18..d7c4a8e 100644 (file)
@@ -45,7 +45,7 @@ obj-y                                         += monitor.o
 obj-y                                          += multiboot.o
 obj-y                                          += net/
 obj-y                                          += ns/
-obj-y                                          += oprofile/
+obj-y                                          += profiler.o
 obj-y                                          += page_alloc.o
 obj-y                                          += pagemap.o
 obj-y                                          += pmap.o
diff --git a/kern/src/oprofile/cpu_buffer.c b/kern/src/oprofile/cpu_buffer.c
deleted file mode 100644 (file)
index 45f6e93..0000000
+++ /dev/null
@@ -1,760 +0,0 @@
-/**
- * @file cpu_buffer.c
- *
- * @remark Copyright 2002-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- * @author Barry Kasindorf <barry.kasindorf@amd.com>
- * @author Robert Richter <robert.richter@amd.com>
- *
- * Each CPU has a local buffer that stores PC value/event
- * pairs. We also log context switches when we notice them.
- * Eventually each CPU's buffer is processed into the global
- * event buffer by sync_buffer().
- *
- * We use a local buffer for two reasons: an NMI or similar
- * interrupt cannot synchronise, and high sampling rates
- * would lead to catastrophic global synchronisation if
- * a global buffer was used.
- */
-#include "event_buffer.h"
-#include "cpu_buffer.h"
-#include "buffer_sync.h"
-#include "oprof.h"
-
-#define OP_BUFFER_FLAGS        0
-
-/* we allocate an array of these and set the pointer in pcpui */
-struct oprofile_cpu_buffer *op_cpu_buffer;
-
-/* this one queue is used by #K to get all events. */
-static struct queue *opq;
-
-/* this is run from core 0 for all cpu buffers. */
-static void wq_sync_buffer(void);
-unsigned long oprofile_cpu_buffer_size = 65536;
-unsigned long oprofile_backtrace_depth = 16;
-
-#define DEFAULT_TIMER_EXPIRE (HZ / 10)
-static int work_enabled;
-
-/*
- * Resets the cpu buffer to a sane state.
- *
- * reset these to invalid values; the next sample collected will
- * populate the buffer with proper values to initialize the buffer
- */
-static inline void op_cpu_buffer_reset(int cpu)
-{
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-
-       cpu_buf->last_is_kernel = -1;
-       cpu_buf->last_proc = NULL;
-       //print_func_exit();
-}
-
-/* returns the remaining free size of data in the entry */
-static inline
-       int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
-{
-       //print_func_entry();
-       assert(entry->size >= 0);
-       if (!entry->size) {
-               //print_func_exit();
-               return 0;
-       }
-       *entry->data = val;
-       entry->size--;
-       entry->data++;
-       //print_func_exit();
-       return entry->size;
-}
-
-/* returns the size of data in the entry */
-static inline int op_cpu_buffer_get_size(struct op_entry *entry)
-{
-       //print_func_entry();
-       //print_func_exit();
-       return entry->size;
-}
-
-/* returns 0 if empty or the size of data including the current value */
-static inline
-       int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
-{
-       //print_func_entry();
-       int size = entry->size;
-       if (!size) {
-               //print_func_exit();
-               return 0;
-       }
-       *val = *entry->data;
-       entry->size--;
-       entry->data++;
-       //print_func_exit();
-       return size;
-}
-
-unsigned long oprofile_get_cpu_buffer_size(void)
-{
-       //print_func_entry();
-       //print_func_exit();
-       return oprofile_cpu_buffer_size;
-}
-
-void oprofile_cpu_buffer_inc_smpl_lost(void)
-{
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-
-       cpu_buf->sample_lost_overflow++;
-       //print_func_exit();
-}
-
-void free_cpu_buffers(void)
-{
-       //print_func_entry();
-       kfree(op_cpu_buffer);
-       /* we can just leave the queue set up; it will then always return EOF */
-       //print_func_exit();
-}
-
-#define RB_EVENT_HDR_SIZE 4
-
-int alloc_cpu_buffers(void)
-{
-       //print_func_entry();
-       /* should probably start using waserror() here. The fail stuff just gets
-        * ugly.
-        */
-       int i;
-       unsigned long buffer_size = oprofile_cpu_buffer_size;
-       unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
-                                                RB_EVENT_HDR_SIZE);
-       /* this can get called lots of times. Things might have been freed.
-        * So be careful.
-        */
-       /* what limit? No idea. */
-       if (!opq)
-               opq = qopen(1024, 0, NULL, NULL);
-       if (!opq)
-               goto fail;
-
-       /* we *really* don't want to block. Losing data is better. */
-       qdropoverflow(opq, 1);
-       if (!op_cpu_buffer) {
-               op_cpu_buffer =
-                       kzmalloc(sizeof(*op_cpu_buffer) * num_cores, KMALLOC_WAIT);
-               if (!op_cpu_buffer)
-                       goto fail;
-
-               for (i = 0; i < num_cores; i++) {
-                       struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
-                       b->last_proc = NULL;
-                       b->last_is_kernel = -1;
-                       b->tracing = 0;
-                       b->buffer_size = buffer_size;
-                       b->sample_received = 0;
-                       b->sample_lost_overflow = 0;
-                       b->backtrace_aborted = 0;
-                       b->sample_invalid_eip = 0;
-                       b->cpu = i;
-                       b->fullqueue = qopen(1024, Qmsg, NULL, NULL);
-                       b->emptyqueue = qopen(1024, Qmsg, NULL, NULL);
-                       spinlock_init_irqsave(&b->lock);
-               }
-       }
-
-       //print_func_exit();
-       return 0;
-
-fail:
-       free_cpu_buffers();
-       //print_func_exit();
-       return -ENOMEM;
-}
-
-void start_cpu_work(void)
-{
-       //print_func_entry();
-       int i;
-
-       work_enabled = 1;
-       /* task starts here.
-          schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
-        */
-       //print_func_exit();
-}
-
-void end_cpu_work(void)
-{
-       //print_func_entry();
-       work_enabled = 0;
-       //print_func_exit();
-}
-
-/* placeholder. Not used yet.
- */
-void flush_cpu_work(void)
-{
-       //print_func_entry();
-       int i;
-       struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];
-
-       //print_func_exit();
-}
-
-/* Not used since we're not doing per-cpu buffering yet.
- */
-
-struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
-{
-       //print_func_entry();
-       //print_func_exit();
-       return NULL;
-}
-
-static struct block *op_cpu_buffer_write_reserve(struct oprofile_cpu_buffer *cpu_buf,
-       struct op_entry *entry, int size)
-{
-       //print_func_entry();
-       struct block *b;
-       int totalsize = sizeof(struct op_sample) +
-               size * sizeof(entry->sample->data[0]);
-
-       b = cpu_buf->block;
-       /* we might have run out. */
-       if ((! b) || (b->lim - b->wp) < size) {
-               if (b){
-                       qibwrite(opq, b);
-               }
-               /* For now. Later, we will grab a block off the
-                * emptyblock queue.
-                */
-               cpu_buf->block = b = iallocb(oprofile_cpu_buffer_size);
-               if (!b) {
-                       printk("%s: fail\n", __func__);
-                       //print_func_exit();
-                       return NULL;
-               }
-       }
-       entry->sample = (void *)b->wp;
-       entry->size = size;
-       entry->data = entry->sample->data;
-
-       b->wp += totalsize;
-       //print_func_exit();
-       return b;
-
-}
-
-static int
-op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
-                       int is_kernel, struct proc *proc)
-{
-       //print_func_entry();
-       struct block *b;
-       struct op_entry entry;
-       struct op_sample *sample;
-       unsigned long flags;
-       int size;
-       ERRSTACK(1);
-
-       flags = 0;
-
-       if (waserror()) {
-               poperror();
-               printk("%s: failed\n", __func__);
-               //print_func_exit();
-               return 1;
-       }
-
-       if (backtrace)
-               flags |= TRACE_BEGIN;
-
-       /* notice a switch from user->kernel or vice versa */
-       is_kernel = ! !is_kernel;
-       if (cpu_buf->last_is_kernel != is_kernel) {
-               cpu_buf->last_is_kernel = is_kernel;
-               flags |= KERNEL_CTX_SWITCH;
-               if (is_kernel)
-                       flags |= IS_KERNEL;
-       }
-
-       /* notice a proc switch */
-       if (cpu_buf->last_proc != proc) {
-               cpu_buf->last_proc = proc;
-               flags |= USER_CTX_SWITCH;
-       }
-
-       if (!flags) {
-               poperror();
-               /* nothing to do */
-               //print_func_exit();
-               return 0;
-       }
-
-       if (flags & USER_CTX_SWITCH)
-               size = 1;
-       else
-               size = 0;
-
-       b = op_cpu_buffer_write_reserve(cpu_buf, &entry, size);
-
-       entry.sample->eip = ESCAPE_CODE;
-       entry.sample->event = flags;
-
-       if (size)
-               op_cpu_buffer_add_data(&entry, (unsigned long)proc);
-
-       poperror();
-       //print_func_exit();
-       return 0;
-}
-
-static inline int
-op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
-                         unsigned long pc, unsigned long event)
-{
-       //print_func_entry();
-       ERRSTACK(1);
-       struct op_entry entry;
-       struct op_sample *sample;
-       struct block *b;
-
-       if (waserror()) {
-               poperror();
-               printk("%s: failed\n", __func__);
-               //print_func_exit();
-               return 1;
-       }
-
-       b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 0);
-
-       sample = entry.sample;
-       sample->eip = pc;
-       sample->event = event;
-       poperror();
-       //print_func_exit();
-       return 0;
-}
-
-/*
- * This must be safe from any context.
- *
- * is_kernel is needed because on some architectures you cannot
- * tell if you are in kernel or user space simply by looking at
- * pc. We tag this in the buffer by generating kernel enter/exit
- * events whenever is_kernel changes
- */
-static int
-log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
-                  unsigned long backtrace, int is_kernel, unsigned long event,
-                  struct proc *proc)
-{
-       //print_func_entry();
-       struct proc *tsk = proc ? proc : current;
-       cpu_buf->sample_received++;
-
-       if (pc == ESCAPE_CODE) {
-               cpu_buf->sample_invalid_eip++;
-               //print_func_exit();
-               return 0;
-       }
-
-       /* ah, so great. op_add* return 1 in event of failure.
-        * this function returns 0 in event of failure.
-        * what a cluster.
-        */
-       spin_lock_irqsave(&cpu_buf->lock);
-       if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
-               goto fail;
-
-       if (op_add_sample(cpu_buf, pc, event))
-               goto fail;
-       spin_unlock_irqsave(&cpu_buf->lock);
-
-       //print_func_exit();
-       return 1;
-
-fail:
-       cpu_buf->sample_lost_overflow++;
-       //print_func_exit();
-       return 0;
-}
-
-static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
-{
-       //print_func_entry();
-       cpu_buf->tracing = 1;
-       //print_func_exit();
-}
-
-static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
-{
-       //print_func_entry();
-       cpu_buf->tracing = 0;
-       //print_func_exit();
-}
-
-void oprofile_cpubuf_flushone(int core, int newbuf)
-{
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf;
-       cpu_buf = &op_cpu_buffer[core];
-       spin_lock_irqsave(&cpu_buf->lock);
-       if (cpu_buf->block) {
-               printk("Core %d has data\n", core);
-               qibwrite(opq, cpu_buf->block);
-               printk("After qibwrite in %s, opq len %d\n", __func__, qlen(opq));
-       }
-       if (newbuf)
-               cpu_buf->block = iallocb(oprofile_cpu_buffer_size);
-       else
-               cpu_buf->block = NULL;
-       spin_unlock_irqsave(&cpu_buf->lock);
-       //print_func_exit();
-}
-
-void oprofile_cpubuf_flushall(int alloc)
-{
-       //print_func_entry();
-       int core;
-
-       for(core = 0; core < num_cores; core++) {
-               oprofile_cpubuf_flushone(core, alloc);
-       }
-       //print_func_exit();
-}
-
-void oprofile_control_trace(int onoff)
-{
-       //print_func_entry();
-       int core;
-       struct oprofile_cpu_buffer *cpu_buf;
-
-       for(core = 0; core < num_cores; core++) {
-               cpu_buf = &op_cpu_buffer[core];
-               cpu_buf->tracing = onoff;
-
-               if (onoff) {
-                       printk("Enable tracing on %d\n", core);
-                       continue;
-               }
-
-               /* halting. Force out all buffers. */
-               oprofile_cpubuf_flushone(core, 0);
-       }
-       //print_func_exit();
-}
-
-static inline void
-__oprofile_add_ext_sample(unsigned long pc,
-                                                 void /*struct pt_regs */ *const regs,
-                                                 unsigned long event, int is_kernel, struct proc *proc)
-{
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-       unsigned long backtrace = oprofile_backtrace_depth;
-
-       /*
-        * if log_sample() fail we can't backtrace since we lost the
-        * source of this event
-        */
-       if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
-               /* failed */
-       {
-               //print_func_exit();
-               return;
-       }
-
-       if (!backtrace) {
-               //print_func_exit();
-               return;
-       }
-#if 0
-       oprofile_begin_trace(cpu_buf);
-       oprofile_ops.backtrace(regs, backtrace);
-       oprofile_end_trace(cpu_buf);
-#endif
-       //print_func_exit();
-}
-
-void oprofile_add_ext_hw_sample(unsigned long pc,
-                                                               void /*struct pt_regs */ *const regs,
-                                                               unsigned long event, int is_kernel,
-                                                               struct proc *proc)
-{
-       //print_func_entry();
-       __oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
-       //print_func_exit();
-}
-
-void oprofile_add_ext_sample(unsigned long pc,
-                                                        void /*struct pt_regs */ *const regs,
-                                                        unsigned long event, int is_kernel)
-{
-       //print_func_entry();
-       __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
-       //print_func_exit();
-}
-
-void oprofile_add_sample(void /*struct pt_regs */ *const regs,
-                                                unsigned long event)
-{
-       //print_func_entry();
-       int is_kernel;
-       unsigned long pc;
-
-       if (regs) {
-               is_kernel = 0;  // FIXME!user_mode(regs);
-               pc = 0; // FIXME profile_pc(regs);
-       } else {
-               is_kernel = 0;  /* This value will not be used */
-               pc = ESCAPE_CODE;       /* as this causes an early return. */
-       }
-
-       __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
-       //print_func_exit();
-}
-
-/*
- * Add samples with data to the ring buffer.
- *
- * Use oprofile_add_data(&entry, val) to add data and
- * oprofile_write_commit(&entry) to commit the sample.
- */
-void
-oprofile_write_reserve(struct op_entry *entry,
-                                          void /*struct pt_regs */ *const regs,
-                                          unsigned long pc, int code, int size)
-{
-       //print_func_entry();
-       ERRSTACK(1);
-       struct op_sample *sample;
-       struct block *b;
-       int is_kernel = 0;                      // FIXME!user_mode(regs);
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-
-       if (waserror()) {
-               printk("%s: failed\n", __func__);
-               poperror();
-               goto fail;
-       }
-       cpu_buf->sample_received++;
-
-       /* no backtraces for samples with data */
-       if (op_add_code(cpu_buf, 0, is_kernel, current))
-               goto fail;
-
-       b = op_cpu_buffer_write_reserve(cpu_buf, entry, size + 2);
-       sample = entry->sample;
-       sample->eip = ESCAPE_CODE;
-       sample->event = 0;      /* no flags */
-
-       op_cpu_buffer_add_data(entry, code);
-       op_cpu_buffer_add_data(entry, pc);
-       poperror();
-       //print_func_exit();
-       return;
-fail:
-       entry->event = NULL;
-       cpu_buf->sample_lost_overflow++;
-       //print_func_exit();
-}
-
-int oprofile_add_data(struct op_entry *entry, unsigned long val)
-{
-       //print_func_entry();
-       if (!entry->event) {
-               //print_func_exit();
-               return 0;
-       }
-       //print_func_exit();
-       return op_cpu_buffer_add_data(entry, val);
-}
-
-int oprofile_add_data64(struct op_entry *entry, uint64_t val)
-{
-       //print_func_entry();
-       if (!entry->event) {
-               //print_func_exit();
-               return 0;
-       }
-       if (op_cpu_buffer_get_size(entry) < 2)
-               /*
-                * the function returns 0 to indicate a too small
-                * buffer, even if there is some space left
-                */
-       {
-               //print_func_exit();
-               return 0;
-       }
-       if (!op_cpu_buffer_add_data(entry, (uint32_t) val)) {
-               //print_func_exit();
-               return 0;
-       }
-       //print_func_exit();
-       return op_cpu_buffer_add_data(entry, (uint32_t) (val >> 32));
-}
-
-int oprofile_write_commit(struct op_entry *entry)
-{
-       //print_func_entry();
-       /* not much to do at present. In future, we might write the Block
-        * to opq.
-        */
-       //print_func_exit();
-       return 0;
-}
-
-void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
-{
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-       log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
-       //print_func_exit();
-}
-
-void oprofile_add_trace(unsigned long pc)
-{
-       if (! op_cpu_buffer)
-               return;
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-
-       if (!cpu_buf->tracing) {
-               //print_func_exit();
-               return;
-       }
-
-       /*
-        * broken frame can give an eip with the same value as an
-        * escape code, abort the trace if we get it
-        */
-       if (pc == ESCAPE_CODE)
-               goto fail;
-       if (op_add_sample(cpu_buf, pc, nsec()&~0xf))
-               goto fail;
-
-       //print_func_exit();
-       return;
-fail:
-       printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
-       cpu_buf->tracing = 0;
-       cpu_buf->backtrace_aborted++;
-       //print_func_exit();
-       return;
-}
-
-/* Format for samples:
- * first word:
- * high 8 bits is ee, which is an invalid address on amd64. 
- * next 8 bits is protocol version
- * next 16 bits is unused, MBZ. Later, we can make it a packet type. 
- * next 16 bits is core id
- * next 8 bits is unused
- * next 8 bits is # PCs following. This should be at least 1, for one EIP.
- *
- * second word is time in ns.
- * 
- * Third and following words are PCs, there must be at least one of them. 
- */
-void oprofile_add_backtrace(uintptr_t pc, uintptr_t fp)
-{
-       /* version 1. */
-       uint64_t descriptor = 0xee01ULL<<48;
-       if (! op_cpu_buffer)
-               return;
-       //print_func_entry();
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-
-       if (!cpu_buf->tracing) {
-               //print_func_exit();
-               return;
-       }
-
-       struct op_entry entry;
-       struct op_sample *sample;
-       struct block *b;
-       uint64_t event = nsec();
-
-       uintptr_t bt_pcs[oprofile_backtrace_depth];
-
-       int nr_pcs;
-       nr_pcs = backtrace_list(pc, fp, bt_pcs, oprofile_backtrace_depth);
-
-       /* write_reserve always assumes passed-in-size + 2.
-        * backtrace_depth should always be > 0.
-        */
-       b = op_cpu_buffer_write_reserve(cpu_buf, &entry, nr_pcs);
-
-       if (! b)
-               return;
-
-       /* we are changing the sample format, but not the struct
-        * member names yet. Later, assuming this works out.
-        */
-       descriptor |= (core_id() << 16) | nr_pcs;
-       sample = entry.sample;
-       sample->eip = descriptor;
-       sample->event = event;
-       memcpy(sample->data, bt_pcs, sizeof(uintptr_t) * nr_pcs);
-
-       //print_func_exit();
-       return;
-fail:
-       printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
-       cpu_buf->tracing = 0;
-       cpu_buf->backtrace_aborted++;
-       //print_func_exit();
-       return;
-}
-
-void oprofile_add_userpc(uintptr_t pc)
-{
-       struct oprofile_cpu_buffer *cpu_buf;
-       uint32_t pcoreid = core_id();
-       struct op_entry entry;
-       struct block *b;
-       uint64_t descriptor = (0xee01ULL << 48) | (pcoreid << 16) | 1;
-
-       if (!op_cpu_buffer)
-               return;
-       cpu_buf = &op_cpu_buffer[pcoreid];
-       if (!cpu_buf->tracing)
-               return;
-       /* write_reserve always assumes passed-in-size + 2.  need room for 1 PC. */
-       b = op_cpu_buffer_write_reserve(cpu_buf, &entry, 1);
-       if (!b)
-               return;
-       entry.sample->eip = descriptor;
-       entry.sample->event = nsec();
-       /* entry.sample->data == entry.data */
-       assert(entry.sample->data == entry.data);
-       *entry.sample->data = pc;
-}
-
-int
-oproflen(void)
-{
-       return qlen(opq);
-}
-
-/* return # bytes read, or 0 if profiling is off, or block if profiling on and no data.
- */
-int
-oprofread(void *va, int n)
-{
-       int len = qlen(opq);
-       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
-       if (len == 0) {
-               if (cpu_buf->tracing == 0)
-                       return 0;
-       }
-
-       len = qread(opq, va, n);
-       return len;
-}
diff --git a/kern/src/oprofile/cpu_buffer.h b/kern/src/oprofile/cpu_buffer.h
deleted file mode 100644 (file)
index 7205f78..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-/**
- * @file cpu_buffer.h
- *
- * @remark Copyright 2002-2009 OProfile authors
- * @remark Read the file COPYING
- *
- * @author John Levon <levon@movementarian.org>
- * @author Robert Richter <robert.richter@amd.com>
- */
-
-#pragma once
-#include <vfs.h>
-#include <kfs.h>
-#include <slab.h>
-#include <kmalloc.h>
-#include <kref.h>
-#include <string.h>
-#include <stdio.h>
-#include <assert.h>
-#include <error.h>
-#include <pmap.h>
-#include <smp.h>
-#include <oprofile.h>
-
-int alloc_cpu_buffers(void);
-void free_cpu_buffers(void);
-
-void start_cpu_work(void);
-void end_cpu_work(void);
-void flush_cpu_work(void);
-
-/* CPU buffer is composed of samples. 
- * As these are extracted from the buffer, they are encapsulated
- * in entries, which include additional info.
- */
-struct op_sample {
-       unsigned long eip;
-       unsigned long event;
-       unsigned long data[0];
-};
-
-struct op_entry;
-
-struct oprofile_cpu_buffer {
-       spinlock_t lock;
-       unsigned long buffer_size;
-       struct proc *last_proc;
-       int last_is_kernel;
-       int tracing;
-       unsigned long sample_received;
-       unsigned long sample_lost_overflow;
-       unsigned long backtrace_aborted;
-       unsigned long sample_invalid_eip;
-       int cpu;
-       struct block *block;
-       /* long term plan: when we fill the block,
-        * we write it to fullblock, and pull a
-        * freeblock from the emptyblock queue. 
-        * The thread that pulls fullbocks and
-        * allocates emptyblocks is timer-driven.
-        * Or, barret will make me use his queues,
-        * which is also fine; I just find the queue
-        * functions convenient because they interface to
-        * the dev code so easily.
-        */
-       struct queue *fullqueue, *emptyqueue;
-};
-
-/* extra data flags */
-#define KERNEL_CTX_SWITCH      (1UL << 0)
-#define IS_KERNEL              (1UL << 1)
-#define TRACE_BEGIN            (1UL << 2)
-#define USER_CTX_SWITCH                (1UL << 3)
diff --git a/kern/src/profiler.c b/kern/src/profiler.c
new file mode 100644 (file)
index 0000000..5b0f43a
--- /dev/null
@@ -0,0 +1,296 @@
+
+#include <ros/common.h>
+#include <smp.h>
+#include <trap.h>
+#include <kmalloc.h>
+#include <atomic.h>
+#include <sys/types.h>
+#include "profiler.h"
+
+struct op_sample {
+       uint64_t hdr;
+       uint64_t event;
+       uint64_t data[0];
+};
+
+struct op_entry {
+       struct op_sample *sample;
+       size_t size;
+       uint64_t *data;
+};
+
+struct profiler_cpu_context {
+       spinlock_t lock;
+       int tracing;
+       unsigned long sample_received;
+       unsigned long sample_lost_overflow;
+       unsigned long backtrace_aborted;
+       unsigned long sample_invalid_eip;
+       struct block *block;
+};
+
+static int profiler_queue_limit = 1024;
+static size_t profiler_cpu_buffer_size = 65536;
+static size_t profiler_backtrace_depth = 16;
+static struct profiler_cpu_context *profiler_percpu_ctx;
+static struct queue *profiler_queue;
+
+static inline struct profiler_cpu_context *profiler_get_cpu_ctx(int cpu)
+{
+       return profiler_percpu_ctx + cpu;
+}
+
+static inline uint64_t profiler_create_header(int cpu, size_t nbt)
+{
+       return (((uint64_t) 0xee01) << 48) | ((uint64_t) cpu << 16) |
+               (uint64_t) nbt;
+}
+
+static inline size_t profiler_cpu_buffer_add_data(struct op_entry *entry,
+                                                                                                 const uintptr_t *values,
+                                                                                                 size_t count)
+{
+       size_t i, n = entry->size;
+
+       if (unlikely(count > n))
+               n = count;
+       for (i = 0; i < n; i++)
+               entry->data[i] = (uint64_t) values[i];
+       entry->size -= n;
+       entry->data += n;
+
+       return entry->size;
+}
+
+static void free_cpu_buffers(void)
+{
+       kfree(profiler_percpu_ctx);
+}
+
+static int alloc_cpu_buffers(void)
+{
+       if (!profiler_queue) {
+               profiler_queue = qopen(profiler_queue_limit, 0, NULL, NULL);
+               if (!profiler_queue)
+                       goto fail;
+       }
+
+       /* we *really* don't want to block. Losing data is better. */
+       qdropoverflow(profiler_queue, 1);
+       if (!profiler_percpu_ctx) {
+               int i;
+
+               profiler_percpu_ctx =
+                       kzmalloc(sizeof(*profiler_percpu_ctx) * num_cores, KMALLOC_WAIT);
+               if (!profiler_percpu_ctx)
+                       goto fail;
+
+               for (i = 0; i < num_cores; i++) {
+                       struct profiler_cpu_context *b = &profiler_percpu_ctx[i];
+
+                       b->tracing = 0;
+                       b->sample_received = 0;
+                       b->sample_lost_overflow = 0;
+                       b->backtrace_aborted = 0;
+                       b->sample_invalid_eip = 0;
+                       spinlock_init_irqsave(&b->lock);
+               }
+       }
+
+       return 0;
+
+fail:
+       free_cpu_buffers();
+       return -ENOMEM;
+}
+
+int profiler_init(void)
+{
+       return alloc_cpu_buffers();
+}
+
+void profiler_cleanup(void)
+{
+       free_cpu_buffers();
+}
+
+static struct block *profiler_cpu_buffer_write_reserve(
+       struct profiler_cpu_context *cpu_buf, struct op_entry *entry, size_t size)
+{
+       struct block *b = cpu_buf->block;
+    size_t totalsize = sizeof(struct op_sample) +
+               size * sizeof(entry->sample->data[0]);
+
+       if (unlikely((!b) || (b->lim - b->wp) < size)) {
+               if (b)
+                       qibwrite(profiler_queue, b);
+               /* For now. Later, we will grab a block off the
+                * emptyblock queue.
+                */
+               cpu_buf->block = b = iallocb(profiler_cpu_buffer_size);
+        if (unlikely(!b)) {
+                       printk("%s: fail\n", __func__);
+                       return NULL;
+               }
+       }
+       entry->sample = (struct op_sample *) b->wp;
+       entry->size = size;
+       entry->data = entry->sample->data;
+
+       b->wp += totalsize;
+
+       return b;
+}
+
+static inline int profiler_add_sample(struct profiler_cpu_context *cpu_buf,
+                                                                         uintptr_t pc, unsigned long event)
+{
+       ERRSTACK(1);
+       struct op_entry entry;
+       struct block *b;
+
+       if (waserror()) {
+               poperror();
+               printk("%s: failed\n", __func__);
+               return 1;
+       }
+
+       b = profiler_cpu_buffer_write_reserve(cpu_buf, &entry, 0);
+       if (likely(b)) {
+               entry.sample->hdr = profiler_create_header(core_id(), 1);
+               entry.sample->event = (uint64_t) event;
+               profiler_cpu_buffer_add_data(&entry, &pc, 1);
+       }
+       poperror();
+
+       return b == NULL;
+}
+
+static inline void profiler_begin_trace(struct profiler_cpu_context *cpu_buf)
+{
+       cpu_buf->tracing = 1;
+}
+
+static inline void profiler_end_trace(struct profiler_cpu_context *cpu_buf)
+{
+       cpu_buf->tracing = 0;
+}
+
+static void profiler_cpubuf_flushone(int core, int newbuf)
+{
+       struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core);
+
+       spin_lock_irqsave(&cpu_buf->lock);
+       if (cpu_buf->block) {
+               printk("Core %d has data\n", core);
+               qibwrite(profiler_queue, cpu_buf->block);
+               printk("After qibwrite in %s, profiler_queue len %d\n",
+                          __func__, qlen(profiler_queue));
+       }
+       if (newbuf)
+               cpu_buf->block = iallocb(profiler_cpu_buffer_size);
+       else
+               cpu_buf->block = NULL;
+       spin_unlock_irqsave(&cpu_buf->lock);
+}
+
+void profiler_control_trace(int onoff)
+{
+       int core;
+       struct profiler_cpu_context *cpu_buf;
+
+       for (core = 0; core < num_cores; core++) {
+               cpu_buf = profiler_get_cpu_ctx(core);
+               cpu_buf->tracing = onoff;
+
+               if (onoff) {
+                       printk("Enable tracing on %d\n", core);
+                       continue;
+               }
+
+               /* halting. Force out all buffers. */
+               profiler_cpubuf_flushone(core, 0);
+       }
+}
+
+void profiler_add_trace(uintptr_t pc)
+{
+       struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core_id());
+
+       if (profiler_percpu_ctx && cpu_buf->tracing)
+               profiler_add_sample(cpu_buf, pc, nsec());
+}
+
+/* Format for samples:
+ * first word:
+ * high 8 bits is ee, which is an invalid address on amd64.
+ * next 8 bits is protocol version
+ * next 16 bits is unused, MBZ. Later, we can make it a packet type.
+ * next 16 bits is core id
+ * next 8 bits is unused
+ * next 8 bits is # PCs following. This should be at least 1, for one EIP.
+ *
+ * second word is time in ns.
+ *
+ * Third and following words are PCs, there must be at least one of them.
+ */
+void profiler_add_backtrace(uintptr_t pc, uintptr_t fp)
+{
+       int cpu = core_id();
+       struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(cpu);
+
+       if (profiler_percpu_ctx && cpu_buf->tracing) {
+               struct op_entry entry;
+               struct block *b;
+               uintptr_t bt_pcs[profiler_backtrace_depth];
+               size_t n = backtrace_list(pc, fp, bt_pcs, profiler_backtrace_depth);
+
+               /* write_reserve always assumes passed-in-size + 2.
+                * backtrace_depth should always be > 0.
+                */
+               b = profiler_cpu_buffer_write_reserve(cpu_buf, &entry, n);
+               if (likely(b)) {
+                       entry.sample->hdr = profiler_create_header(cpu, n);
+                       entry.sample->event = nsec();
+                       profiler_cpu_buffer_add_data(&entry, bt_pcs, n);
+               }
+       }
+}
+
+void profiler_add_userpc(uintptr_t pc)
+{
+       int cpu = core_id();
+       struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(cpu);
+
+       if (profiler_percpu_ctx && cpu_buf->tracing) {
+               struct op_entry entry;
+               struct block *b = profiler_cpu_buffer_write_reserve(cpu_buf,
+                                                                                                                       &entry, 1);
+
+               if (likely(b)) {
+                       entry.sample->hdr = profiler_create_header(cpu, 1);
+                       entry.sample->event = nsec();
+                       profiler_cpu_buffer_add_data(&entry, &pc, 1);
+               }
+       }
+}
+
+void profiler_add_hw_sample(struct hw_trapframe *hw_tf)
+{
+       if (in_kernel(hw_tf))
+               profiler_add_backtrace(get_hwtf_pc(hw_tf), get_hwtf_fp(hw_tf));
+       else
+               profiler_add_userpc(get_hwtf_pc(hw_tf));
+}
+
+int profiler_size(void)
+{
+       return qlen(profiler_queue);
+}
+
+int profiler_read(void *va, int n)
+{
+       struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core_id());
+
+       return cpu_buf->tracing ? qread(profiler_queue, va, n) : 0;
+}