First oprofile file to build
authorRonald G. Minnich <rminnich@google.com>
Wed, 7 May 2014 23:17:21 +0000 (16:17 -0700)
committerRonald G. Minnich <rminnich@google.com>
Wed, 7 May 2014 23:21:29 +0000 (16:21 -0700)
This is the one on which all else builds. It takes events of different types
and wedges them into the opq queue (i.e. queue struct that I'm about to
make available via #K).

This is not efficient. In fact it's quite stupid: it calls kzmalloc for each
sample! I have some ideas on how to make it very simply
efficient: just accumulate the cpu samples into a struct block and, when that block
is out of room, qbwrite it.

If that works, and is fast enough, giant gobs of nasty stuff that are in the Linux
driver will vanish in a puff of smoke. I suspect it will be fast enough. qbwrite
is just linked list manipulation, and if we make the block big enough
(64K will hold 16000 samples) I think we'll be able to just sail along.

Signed-off-by: Ronald G. Minnich <rminnich@google.com>
kern/include/oprofile.h [new file with mode: 0644]
kern/src/Kbuild
kern/src/oprofile/Kbuild [new file with mode: 0644]
kern/src/oprofile/cpu_buffer.c
kern/src/oprofile/cpu_buffer.h
kern/src/oprofile/event_buffer.h

diff --git a/kern/include/oprofile.h b/kern/include/oprofile.h
new file mode 100644 (file)
index 0000000..ab68609
--- /dev/null
@@ -0,0 +1,154 @@
+/**
+ * @file oprofile.h
+ *
+ * API for machine-specific interrupts to interface
+ * to oprofile.
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#ifndef OPROFILE_H
+#define OPROFILE_H
+
+/* Each escaped entry is prefixed by ESCAPE_CODE
+ * then one of the following codes, then the
+ * relevant data.
+ * These #defines live in this file so that arch-specific
+ * buffer sync'ing code can access them.
+ */
+#define ESCAPE_CODE                    ~0UL
+#define CTX_SWITCH_CODE                        1
+#define CPU_SWITCH_CODE                        2
+#define COOKIE_SWITCH_CODE             3
+#define KERNEL_ENTER_SWITCH_CODE       4
+#define KERNEL_EXIT_SWITCH_CODE                5
+#define MODULE_LOADED_CODE             6
+#define CTX_TGID_CODE                  7
+#define TRACE_BEGIN_CODE               8
+#define TRACE_END_CODE                 9
+#define XEN_ENTER_SWITCH_CODE          10
+#define SPU_PROFILING_CODE             11
+#define SPU_CTX_SWITCH_CODE            12
+#define IBS_FETCH_CODE                 13
+#define IBS_OP_CODE                    14
+
+/* Operations structure to be filled in */
+struct oprofile_operations {
+       /* create any necessary configuration files in the oprofile fs.
+        * Optional. */
+       int (*create_files)(void* sb, void *root);
+       /* Do any necessary interrupt setup. Optional. */
+       int (*setup)(void);
+       /* Do any necessary interrupt shutdown. Optional. */
+       void (*shutdown)(void);
+       /* Start delivering interrupts. */
+       int (*start)(void);
+       /* Stop delivering interrupts. */
+       void (*stop)(void);
+       /* Arch-specific buffer sync functions.
+        * Return value = 0:  Success
+        * Return value = -1: Failure
+        * Return value = 1:  Run generic sync function
+        */
+       int (*sync_start)(void);
+       int (*sync_stop)(void);
+
+       /* Initiate a stack backtrace. Optional. */
+       void (*backtrace)(void * const regs, unsigned int depth);
+
+       /* Multiplex between different events. Optional. */
+       int (*switch_events)(void);
+       /* CPU identification string. */
+       char * cpu_type;
+};
+
+/**
+ * One-time initialisation. *ops must be set to a filled-in
+ * operations structure. This is called even in timer interrupt
+ * mode so an arch can set a backtrace callback.
+ *
+ * If an error occurs, the fields should be left untouched.
+ */
+int oprofile_arch_init(struct oprofile_operations * ops);
+/**
+ * One-time exit/cleanup for the arch.
+ */
+void oprofile_arch_exit(void);
+
+/**
+ * Add a sample. This may be called from any context.
+ */
+void oprofile_add_sample(void* const regs, unsigned long event);
+
+/**
+ * Add an extended sample.  Use this when the PC is not from the regs, and
+ * we cannot determine if we're in kernel mode from the regs.
+ *
+ * This function does perform a backtrace.
+ *
+ */
+void oprofile_add_ext_sample(unsigned long pc, void * const regs,
+                               unsigned long event, int is_kernel);
+
+/**
+ * Add an hardware sample.
+ */
+void oprofile_add_ext_hw_sample(unsigned long pc, /*struct pt_regs*/void * const regs,
+                               unsigned long event, int is_kernel,
+                               struct proc *proc);
+
+/* Use this instead when the PC value is not from the regs. Doesn't
+ * backtrace. */
+void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);
+
+/* add a backtrace entry, to be called from the ->backtrace callback */
+void oprofile_add_trace(unsigned long eip);
+
+
+/**
+ * Add the contents of a circular buffer to the event buffer.
+ */
+void oprofile_put_buff(unsigned long *buf, unsigned int start,
+                       unsigned int stop, unsigned int max);
+
+unsigned long oprofile_get_cpu_buffer_size(void);
+void oprofile_cpu_buffer_inc_smpl_lost(void);
+/* cpu buffer functions */
+
+struct op_sample;
+
+struct op_entry {
+       void *event;
+       struct op_sample *sample;
+       unsigned long size;
+       unsigned long *data;
+};
+
+void oprofile_write_reserve(struct op_entry *entry,
+                           void */*struct pt_regs **/ const regs,
+                           unsigned long pc, int code, int size);
+int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_add_data64(struct op_entry *entry, uint64_t val);
+int oprofile_write_commit(struct op_entry *entry);
+
+int oprofile_perf_init(struct oprofile_operations *ops);
+void oprofile_perf_exit(void);
+char *op_name_from_perf_id(void);
+
+#if 0 
+make these weak funcitons. 
+static inline int __init oprofile_perf_init(struct oprofile_operations *ops)
+{
+       pr_info("oprofile: hardware counters not available\n");
+       return -ENODEV;
+}
+static inline void oprofile_perf_exit(void) { }
+#endif
+
+#endif /* OPROFILE_H */
index acfaf21..4cb25b3 100644 (file)
@@ -30,6 +30,7 @@ obj-y                                         += monitor.o
 obj-y                                          += multiboot.o
 obj-y                                          += net/
 obj-y                                          += ns/
+obj-y                                          += oprofile/
 obj-y                                          += page_alloc.o
 obj-y                                          += pagemap.o
 obj-y                                          += pmap.o
diff --git a/kern/src/oprofile/Kbuild b/kern/src/oprofile/Kbuild
new file mode 100644 (file)
index 0000000..e545568
--- /dev/null
@@ -0,0 +1 @@
+obj-y                                          += cpu_buffer.o
\ No newline at end of file
index 38bb9a5..fe0ac25 100644 (file)
@@ -18,7 +18,6 @@
  * would lead to catastrophic global synchronisation if
  * a global buffer was used.
  */
-
 #include "event_buffer.h"
 #include "cpu_buffer.h"
 #include "buffer_sync.h"
 
 #define OP_BUFFER_FLAGS        0
 
-static struct ring_buffer *op_ring_buffer;
-DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
+/* we allocate an array of these and set the pointer in pcpui */
+struct oprofile_cpu_buffer *op_cpu_buffer;
+
+/* this one queue is used by #K to get all events. */
+struct queue *opq;
 
-static void wq_sync_buffer(struct work_struct *work);
+/* this is run from core 0 for all cpu buffers. */
+static void wq_sync_buffer(void);
+unsigned long oprofile_cpu_buffer_size = 65536; 
+unsigned long oprofile_backtrace_depth = 8;
 
 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;
 
+/*
+ * Resets the cpu buffer to a sane state.
+ *
+ * reset these to invalid values; the next sample collected will
+ * populate the buffer with proper values to initialize the buffer
+ */
+static inline void op_cpu_buffer_reset(int cpu)
+{
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
+
+       cpu_buf->last_is_kernel = -1;
+       cpu_buf->last_proc = NULL;
+}
+
+/* returns the remaining free size of data in the entry */
+static inline
+int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
+{
+       assert(entry->size >= 0);
+       if (!entry->size)
+               return 0;
+       *entry->data = val;
+       entry->size--;
+       entry->data++;
+       return entry->size;
+}
+
+/* returns the size of data in the entry */
+static inline
+int op_cpu_buffer_get_size(struct op_entry *entry)
+{
+       return entry->size;
+}
+
+/* returns 0 if empty or the size of data including the current value */
+static inline
+int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
+{
+       int size = entry->size;
+       if (!size)
+               return 0;
+       *val = *entry->data;
+       entry->size--;
+       entry->data++;
+       return size;
+}
+
 unsigned long oprofile_get_cpu_buffer_size(void)
 {
        return oprofile_cpu_buffer_size;
@@ -41,16 +93,15 @@ unsigned long oprofile_get_cpu_buffer_size(void)
 
 void oprofile_cpu_buffer_inc_smpl_lost(void)
 {
-       struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 
        cpu_buf->sample_lost_overflow++;
 }
 
 void free_cpu_buffers(void)
 {
-       if (op_ring_buffer)
-               ring_buffer_free(op_ring_buffer);
-       op_ring_buffer = NULL;
+       kfree(op_cpu_buffer);
+       /* we can just leave the queue set up; it will then always return EOF */
 }
 
 #define RB_EVENT_HDR_SIZE 4
@@ -63,14 +114,26 @@ int alloc_cpu_buffers(void)
        unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
                                                 RB_EVENT_HDR_SIZE);
 
-       op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
-       if (!op_ring_buffer)
+       /* what limit? No idea. */
+       opq = qopen(1024, Qmsg, NULL, NULL);
+       if (! opq)
                goto fail;
 
-       for_each_possible_cpu(i) {
-               struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
+       op_cpu_buffer = kzmalloc(sizeof(*op_cpu_buffer), num_cpus);
+       if (! op_cpu_buffer)
+               goto fail;
 
-               b->last_task = NULL;
+       for(i = 0; i < num_cpus; i++) {
+               struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
+               /* short term: for each event, we're going to kmalloc a
+                * sample and shove it into the opq.
+                * Long term: TBD. One option is to create a big damn Block and
+                * add to it as needed. Once the block is full we can push
+                * it onto the opq. That will actually be pretty fast and easy
+                * if we make the block page-sized. Far, far simpler than the
+                * Linux tracebuffer stuff. 
+                */
+               b->last_proc = NULL;
                b->last_is_kernel = -1;
                b->tracing = 0;
                b->buffer_size = buffer_size;
@@ -79,8 +142,8 @@ int alloc_cpu_buffers(void)
                b->backtrace_aborted = 0;
                b->sample_invalid_eip = 0;
                b->cpu = i;
-               INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
        }
+
        return 0;
 
 fail:
@@ -93,16 +156,9 @@ void start_cpu_work(void)
        int i;
 
        work_enabled = 1;
-
-       for_each_online_cpu(i) {
-               struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
-
-               /*
-                * Spread the work by 1 jiffy per cpu so they dont all
-                * fire at once.
-                */
-               schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
-       }
+       /* task starts here. 
+       schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
+       */
 }
 
 void end_cpu_work(void)
@@ -110,81 +166,58 @@ void end_cpu_work(void)
        work_enabled = 0;
 }
 
+/* placeholder. Not used yet.
+ */
 void flush_cpu_work(void)
 {
        int i;
+       struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];
 
-       for_each_online_cpu(i) {
-               struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
-
-               /* these works are per-cpu, no need for flush_sync */
-               flush_delayed_work(&b->work);
-       }
 }
 
-/*
- * This function prepares the cpu buffer to write a sample.
- *
- * Struct op_entry is used during operations on the ring buffer while
- * struct op_sample contains the data that is stored in the ring
- * buffer. Struct entry can be uninitialized. The function reserves a
- * data array that is specified by size. Use
- * op_cpu_buffer_write_commit() after preparing the sample. In case of
- * errors a null pointer is returned, otherwise the pointer to the
- * sample.
- *
+/* Not used since we're not doing per-cpu buffering yet.
  */
-struct op_sample
-*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
-{
-       entry->event = ring_buffer_lock_reserve
-               (op_ring_buffer, sizeof(struct op_sample) +
-                size * sizeof(entry->sample->data[0]));
-       if (!entry->event)
-               return NULL;
-       entry->sample = ring_buffer_event_data(entry->event);
-       entry->size = size;
-       entry->data = entry->sample->data;
-
-       return entry->sample;
-}
 
-int op_cpu_buffer_write_commit(struct op_entry *entry)
+struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
 {
-       return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
+       return NULL;
 }
 
-struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
+static struct block *op_cpu_buffer_write_reserve(struct op_entry *entry, int size)
 {
-       struct ring_buffer_event *e;
-       e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
-       if (!e)
+       struct block *b;
+       
+       b = allocb(sizeof(struct op_sample) +
+                  size * sizeof(entry->sample->data[0]));
+       if (!b)
                return NULL;
-
-       entry->event = e;
-       entry->sample = ring_buffer_event_data(e);
-       entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
-               / sizeof(entry->sample->data[0]);
+       entry->sample = (void *)b->wp;
+       entry->size = size;
        entry->data = entry->sample->data;
-       return entry->sample;
-}
 
-unsigned long op_cpu_buffer_entries(int cpu)
-{
-       return ring_buffer_entries_cpu(op_ring_buffer, cpu);
-}
+       b->wp += sizeof(struct op_sample) +
+               size * sizeof(entry->sample->data[0]);
+       return b;
 
+}
 static int
 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
-           int is_kernel, struct task_struct *task)
+           int is_kernel, struct proc *proc)
 {
+       struct block *b;
        struct op_entry entry;
        struct op_sample *sample;
        unsigned long flags;
        int size;
+       ERRSTACK(1);
 
        flags = 0;
 
+       if (waserror()) {
+               poperror();
+               return 1;
+       }
+
        if (backtrace)
                flags |= TRACE_BEGIN;
 
@@ -197,33 +230,33 @@ op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
                        flags |= IS_KERNEL;
        }
 
-       /* notice a task switch */
-       if (cpu_buf->last_task != task) {
-               cpu_buf->last_task = task;
+       /* notice a proc switch */
+       if (cpu_buf->last_proc != proc) {
+               cpu_buf->last_proc = proc;
                flags |= USER_CTX_SWITCH;
        }
 
-       if (!flags)
+       if (!flags) {
+               poperror();
                /* nothing to do */
                return 0;
+       }
 
        if (flags & USER_CTX_SWITCH)
                size = 1;
        else
                size = 0;
 
-       sample = op_cpu_buffer_write_reserve(&entry, size);
-       if (!sample)
-               return -ENOMEM;
+       b = op_cpu_buffer_write_reserve(&entry, size);
 
-       sample->eip = ESCAPE_CODE;
-       sample->event = flags;
+       entry.sample->eip = ESCAPE_CODE;
+       entry.sample->event = flags;
 
        if (size)
-               op_cpu_buffer_add_data(&entry, (unsigned long)task);
-
-       op_cpu_buffer_write_commit(&entry);
+               op_cpu_buffer_add_data(&entry, (unsigned long)proc);
 
+       qbwrite(opq, b); /* note: out of our hands now. Don't free. */
+       poperror();
        return 0;
 }
 
@@ -231,17 +264,25 @@ static inline int
 op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
              unsigned long pc, unsigned long event)
 {
+       ERRSTACK(1);
        struct op_entry entry;
        struct op_sample *sample;
+       struct block *b;
+
+       if (waserror()) {
+               poperror();
+               return 1;
+       }
 
-       sample = op_cpu_buffer_write_reserve(&entry, 0);
-       if (!sample)
-               return -ENOMEM;
+       b = op_cpu_buffer_write_reserve(&entry, 0);
 
+       sample = entry.sample;
        sample->eip = pc;
        sample->event = event;
 
-       return op_cpu_buffer_write_commit(&entry);
+       qbwrite(opq, b);
+       poperror();
+       return 1;
 }
 
 /*
@@ -255,9 +296,9 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
 static int
 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
           unsigned long backtrace, int is_kernel, unsigned long event,
-          struct task_struct *task)
+          struct proc *proc)
 {
-       struct task_struct *tsk = task ? task : current;
+       struct proc *tsk = proc ? proc : current;
        cpu_buf->sample_received++;
 
        if (pc == ESCAPE_CODE) {
@@ -265,6 +306,10 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
                return 0;
        }
 
+       /* ah, so great. op_add* return 1 in event of failure. 
+        * this function returns 0 in event of failure.
+        * what a cluster.
+        */
        if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
                goto fail;
 
@@ -289,50 +334,51 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
 }
 
 static inline void
-__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+__oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
                          unsigned long event, int is_kernel,
-                         struct task_struct *task)
+                         struct proc *proc)
 {
-       struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
        unsigned long backtrace = oprofile_backtrace_depth;
 
        /*
         * if log_sample() fail we can't backtrace since we lost the
         * source of this event
         */
-       if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
+       if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
                /* failed */
                return;
 
        if (!backtrace)
                return;
-
+#if 0
        oprofile_begin_trace(cpu_buf);
        oprofile_ops.backtrace(regs, backtrace);
        oprofile_end_trace(cpu_buf);
+#endif
 }
 
-void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+void oprofile_add_ext_hw_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
                                unsigned long event, int is_kernel,
-                               struct task_struct *task)
+                               struct proc *proc)
 {
-       __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+       __oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
 }
 
-void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
+void oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
                             unsigned long event, int is_kernel)
 {
        __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
 }
 
-void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
+void oprofile_add_sample(void /*struct pt_regs*/ * const regs, unsigned long event)
 {
        int is_kernel;
        unsigned long pc;
 
-       if (likely(regs)) {
-               is_kernel = !user_mode(regs);
-               pc = profile_pc(regs);
+       if (regs) {
+               is_kernel = 0; // FIXME!user_mode(regs);
+               pc = 0; // FIXME profile_pc(regs);
        } else {
                is_kernel = 0;    /* This value will not be used */
                pc = ESCAPE_CODE; /* as this causes an early return. */
@@ -348,30 +394,35 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
  * oprofile_write_commit(&entry) to commit the sample.
  */
 void
-oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
+oprofile_write_reserve(struct op_entry *entry, void /*struct pt_regs*/ * const regs,
                       unsigned long pc, int code, int size)
 {
+       ERRSTACK(1);
        struct op_sample *sample;
-       int is_kernel = !user_mode(regs);
-       struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+       struct block *b;
+       int is_kernel = 0; // FIXME!user_mode(regs);
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 
+       if (waserror()){
+               poperror();
+               goto fail;
+       }
        cpu_buf->sample_received++;
 
        /* no backtraces for samples with data */
        if (op_add_code(cpu_buf, 0, is_kernel, current))
                goto fail;
 
-       sample = op_cpu_buffer_write_reserve(entry, size + 2);
-       if (!sample)
-               goto fail;
+       b = op_cpu_buffer_write_reserve(entry, size + 2);
+       sample = entry->sample;
        sample->eip = ESCAPE_CODE;
        sample->event = 0;              /* no flags */
 
        op_cpu_buffer_add_data(entry, code);
        op_cpu_buffer_add_data(entry, pc);
-
+       qbwrite(opq, b);
+       poperror();
        return;
-
 fail:
        entry->event = NULL;
        cpu_buf->sample_lost_overflow++;
@@ -384,7 +435,7 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
        return op_cpu_buffer_add_data(entry, val);
 }
 
-int oprofile_add_data64(struct op_entry *entry, u64 val)
+int oprofile_add_data64(struct op_entry *entry, uint64_t val)
 {
        if (!entry->event)
                return 0;
@@ -394,27 +445,28 @@ int oprofile_add_data64(struct op_entry *entry, u64 val)
                 * buffer, even if there is some space left
                 */
                return 0;
-       if (!op_cpu_buffer_add_data(entry, (u32)val))
+       if (!op_cpu_buffer_add_data(entry, (uint32_t)val))
                return 0;
-       return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
+       return op_cpu_buffer_add_data(entry, (uint32_t)(val >> 32));
 }
 
 int oprofile_write_commit(struct op_entry *entry)
 {
-       if (!entry->event)
-               return -EINVAL;
-       return op_cpu_buffer_write_commit(entry);
+       /* not much to do at present. In future, we might write the Block
+        * to opq.
+        */
+       return 0;
 }
 
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
 {
-       struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
        log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
 }
 
 void oprofile_add_trace(unsigned long pc)
 {
-       struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+       struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
 
        if (!cpu_buf->tracing)
                return;
@@ -436,24 +488,3 @@ fail:
        return;
 }
 
-/*
- * This serves to avoid cpu buffer overflow, and makes sure
- * the task mortuary progresses
- *
- * By using schedule_delayed_work_on and then schedule_delayed_work
- * we guarantee this will stay on the correct cpu
- */
-static void wq_sync_buffer(struct work_struct *work)
-{
-       struct oprofile_cpu_buffer *b =
-               container_of(work, struct oprofile_cpu_buffer, work.work);
-       if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
-               cancel_delayed_work(&b->work);
-               return;
-       }
-       sync_buffer(b->cpu);
-
-       /* don't re-add the work if we're shutting down */
-       if (work_enabled)
-               schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
-}
index e1d097e..56e5907 100644 (file)
 
 #ifndef OPROFILE_CPU_BUFFER_H
 #define OPROFILE_CPU_BUFFER_H
-
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/workqueue.h>
-#include <linux/cache.h>
-#include <linux/sched.h>
-#include <linux/ring_buffer.h>
-
-struct task_struct;
+#include <vfs.h>
+#include <kfs.h>
+#include <slab.h>
+#include <kmalloc.h>
+#include <kref.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <error.h>
+#include <pmap.h>
+#include <smp.h>
+#include <oprofile.h>
 
 int alloc_cpu_buffers(void);
 void free_cpu_buffers(void);
@@ -27,8 +30,9 @@ void start_cpu_work(void);
 void end_cpu_work(void);
 void flush_cpu_work(void);
 
-/* CPU buffer is composed of such entries (which are
- * also used for context switch notes)
+/* CPU buffer is composed of samples. 
+ * As these are extracted from the buffer, they are encapsulated
+ * in entries, which include additional info.
  */
 struct op_sample {
        unsigned long eip;
@@ -40,7 +44,7 @@ struct op_entry;
 
 struct oprofile_cpu_buffer {
        unsigned long buffer_size;
-       struct task_struct *last_task;
+       struct proc *last_proc;
        int last_is_kernel;
        int tracing;
        unsigned long sample_received;
@@ -48,70 +52,11 @@ struct oprofile_cpu_buffer {
        unsigned long backtrace_aborted;
        unsigned long sample_invalid_eip;
        int cpu;
-       struct delayed_work work;
 };
 
-DECLARE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
-
-/*
- * Resets the cpu buffer to a sane state.
- *
- * reset these to invalid values; the next sample collected will
- * populate the buffer with proper values to initialize the buffer
- */
-static inline void op_cpu_buffer_reset(int cpu)
-{
-       struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu);
-
-       cpu_buf->last_is_kernel = -1;
-       cpu_buf->last_task = NULL;
-}
-
-/*
- * op_cpu_buffer_add_data() and op_cpu_buffer_write_commit() may be
- * called only if op_cpu_buffer_write_reserve() did not return NULL or
- * entry->event != NULL, otherwise entry->size or entry->event will be
- * used uninitialized.
- */
-
-struct op_sample
-*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size);
-int op_cpu_buffer_write_commit(struct op_entry *entry);
 struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu);
 unsigned long op_cpu_buffer_entries(int cpu);
 
-/* returns the remaining free size of data in the entry */
-static inline
-int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
-{
-       if (!entry->size)
-               return 0;
-       *entry->data = val;
-       entry->size--;
-       entry->data++;
-       return entry->size;
-}
-
-/* returns the size of data in the entry */
-static inline
-int op_cpu_buffer_get_size(struct op_entry *entry)
-{
-       return entry->size;
-}
-
-/* returns 0 if empty or the size of data including the current value */
-static inline
-int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
-{
-       int size = entry->size;
-       if (!size)
-               return 0;
-       *val = *entry->data;
-       entry->size--;
-       entry->data++;
-       return size;
-}
-
 /* extra data flags */
 #define KERNEL_CTX_SWITCH      (1UL << 0)
 #define IS_KERNEL              (1UL << 1)
index 5042a68..b2cd2a2 100644 (file)
@@ -27,9 +27,4 @@ void wake_up_buffer_waiter(void);
 #define INVALID_COOKIE ~0UL
 #define NO_COOKIE 0UL
 
-/* mutex between sync_cpu_buffers() and the
- * file reading code.
- */
-extern struct mutex buffer_mutex;
-
 #endif /* EVENT_BUFFER_H */