oprofile: minimally working
[akaros.git] / kern / src / oprofile / cpu_buffer.c
1 /**
2  * @file cpu_buffer.c
3  *
4  * @remark Copyright 2002-2009 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  * @author Barry Kasindorf <barry.kasindorf@amd.com>
9  * @author Robert Richter <robert.richter@amd.com>
10  *
11  * Each CPU has a local buffer that stores PC value/event
12  * pairs. We also log context switches when we notice them.
13  * Eventually each CPU's buffer is processed into the global
14  * event buffer by sync_buffer().
15  *
16  * We use a local buffer for two reasons: an NMI or similar
17  * interrupt cannot synchronise, and high sampling rates
18  * would lead to catastrophic global synchronisation if
19  * a global buffer was used.
20  */
21 #include "event_buffer.h"
22 #include "cpu_buffer.h"
23 #include "buffer_sync.h"
24 #include "oprof.h"
25
26 #define OP_BUFFER_FLAGS 0
27
28 /* we allocate an array of these and set the pointer in pcpui */
29 struct oprofile_cpu_buffer *op_cpu_buffer;
30
31 /* this one queue is used by #K to get all events. */
32 struct queue *opq;
33
34 /* this is run from core 0 for all cpu buffers. */
35 static void wq_sync_buffer(void);
36 unsigned long oprofile_cpu_buffer_size = 65536;
37 unsigned long oprofile_backtrace_depth = 8;
38
39 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
40 static int work_enabled;
41
42 /*
43  * Resets the cpu buffer to a sane state.
44  *
45  * reset these to invalid values; the next sample collected will
46  * populate the buffer with proper values to initialize the buffer
47  */
48 static inline void op_cpu_buffer_reset(int cpu)
49 {
50         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
51
52         cpu_buf->last_is_kernel = -1;
53         cpu_buf->last_proc = NULL;
54 }
55
56 /* returns the remaining free size of data in the entry */
57 static inline
58 int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
59 {
60         assert(entry->size >= 0);
61         if (!entry->size) {
62                 return 0;
63         }
64         *entry->data = val;
65         entry->size--;
66         entry->data++;
67         return entry->size;
68 }
69
70 /* returns the size of data in the entry */
71 static inline
72 int op_cpu_buffer_get_size(struct op_entry *entry)
73 {
74         return entry->size;
75 }
76
77 /* returns 0 if empty or the size of data including the current value */
78 static inline
79 int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
80 {
81         int size = entry->size;
82         if (!size) {
83                 return 0;
84         }
85         *val = *entry->data;
86         entry->size--;
87         entry->data++;
88         return size;
89 }
90
91 unsigned long oprofile_get_cpu_buffer_size(void)
92 {
93         return oprofile_cpu_buffer_size;
94 }
95
96 void oprofile_cpu_buffer_inc_smpl_lost(void)
97 {
98         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
99
100         cpu_buf->sample_lost_overflow++;
101 }
102
103 void free_cpu_buffers(void)
104 {
105         kfree(op_cpu_buffer);
106         /* we can just leave the queue set up; it will then always return EOF */
107 }
108
109 #define RB_EVENT_HDR_SIZE 4
110
111 int alloc_cpu_buffers(void)
112 {
113         int i;
114         unsigned long buffer_size = oprofile_cpu_buffer_size;
115         unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
116                                                  RB_EVENT_HDR_SIZE);
117         /* this can get called lots of times. Things might have been freed.
118          * So be careful.
119          */
120         /* what limit? No idea. */
121         if (! opq)
122                 opq = qopen(1024, Qmsg, NULL, NULL);
123         if (! opq)
124                 goto fail;
125
126         /* we *really* don't want to block. Losing data is better. */
127         qnoblock(opq, 1);
128         if (! op_cpu_buffer) {
129                 op_cpu_buffer = kzmalloc(sizeof(*op_cpu_buffer), num_cpus);
130                 if (! op_cpu_buffer)
131                         goto fail;
132
133                 for(i = 0; i < num_cpus; i++) {
134                         struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
135                         /* short term: for each event, we're going to kmalloc a
136                          * sample and shove it into the opq.
137                          * Long term: TBD. One option is to create a big damn Block and
138                          * add to it as needed. Once the block is full we can push
139                          * it onto the opq. That will actually be pretty fast and easy
140                          * if we make the block page-sized. Far, far simpler than the
141                          * Linux tracebuffer stuff.
142                          */
143                         b->last_proc = NULL;
144                         b->last_is_kernel = -1;
145                         b->tracing = 1;
146                         b->buffer_size = buffer_size;
147                         b->sample_received = 0;
148                         b->sample_lost_overflow = 0;
149                         b->backtrace_aborted = 0;
150                         b->sample_invalid_eip = 0;
151                         b->cpu = i;
152                 }
153         }
154
155         return 0;
156
157 fail:
158         free_cpu_buffers();
159         return -ENOMEM;
160 }
161
162 void start_cpu_work(void)
163 {
164         int i;
165
166         work_enabled = 1;
167         /* task starts here.
168         schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
169         */
170 }
171
172 void end_cpu_work(void)
173 {
174         work_enabled = 0;
175 }
176
177 /* placeholder. Not used yet.
178  */
179 void flush_cpu_work(void)
180 {
181         int i;
182         struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];
183
184 }
185
186 /* Not used since we're not doing per-cpu buffering yet.
187  */
188
189 struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
190 {
191         return NULL;
192 }
193
194 static struct block *op_cpu_buffer_write_reserve(struct op_entry *entry, int size)
195 {
196         struct block *b;
197
198         b = allocb(sizeof(struct op_sample) +
199                    size * sizeof(entry->sample->data[0]));
200         if (!b) {
201                 printk("%s: fail\n", __func__);
202                 return NULL;
203         }
204         entry->sample = (void *)b->wp;
205         entry->size = size;
206         entry->data = entry->sample->data;
207
208         b->wp += sizeof(struct op_sample) +
209                 size * sizeof(entry->sample->data[0]);
210         return b;
211
212 }
213 static int
214 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
215             int is_kernel, struct proc *proc)
216 {
217         struct block *b;
218         struct op_entry entry;
219         struct op_sample *sample;
220         unsigned long flags;
221         int size;
222         ERRSTACK(1);
223
224         flags = 0;
225
226         if (waserror()) {
227                 poperror();
228                 printk("%s: failed\n", __func__);
229                 return 1;
230         }
231
232         if (backtrace)
233                 flags |= TRACE_BEGIN;
234
235         /* notice a switch from user->kernel or vice versa */
236         is_kernel = !!is_kernel;
237         if (cpu_buf->last_is_kernel != is_kernel) {
238                 cpu_buf->last_is_kernel = is_kernel;
239                 flags |= KERNEL_CTX_SWITCH;
240                 if (is_kernel)
241                         flags |= IS_KERNEL;
242         }
243
244         /* notice a proc switch */
245         if (cpu_buf->last_proc != proc) {
246                 cpu_buf->last_proc = proc;
247                 flags |= USER_CTX_SWITCH;
248         }
249
250         if (!flags) {
251                 poperror();
252                 /* nothing to do */
253                 return 0;
254         }
255
256         if (flags & USER_CTX_SWITCH)
257                 size = 1;
258         else
259                 size = 0;
260
261         b = op_cpu_buffer_write_reserve(&entry, size);
262
263         entry.sample->eip = ESCAPE_CODE;
264         entry.sample->event = flags;
265
266         if (size)
267                 op_cpu_buffer_add_data(&entry, (unsigned long)proc);
268
269         qbwrite(opq, b); /* note: out of our hands now. Don't free. */
270         poperror();
271         return 0;
272 }
273
274 static inline int
275 op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
276               unsigned long pc, unsigned long event)
277 {
278         ERRSTACK(1);
279         struct op_entry entry;
280         struct op_sample *sample;
281         struct block *b;
282
283         if (waserror()) {
284                 poperror();
285                 printk("%s: failed\n", __func__);
286                 return 1;
287         }
288
289         b = op_cpu_buffer_write_reserve(&entry, 0);
290
291         sample = entry.sample;
292         sample->eip = pc;
293         sample->event = event;
294         qbwrite(opq, b);
295         poperror();
296         return 0;
297 }
298
299 /*
300  * This must be safe from any context.
301  *
302  * is_kernel is needed because on some architectures you cannot
303  * tell if you are in kernel or user space simply by looking at
304  * pc. We tag this in the buffer by generating kernel enter/exit
305  * events whenever is_kernel changes
306  */
307 static int
308 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
309            unsigned long backtrace, int is_kernel, unsigned long event,
310            struct proc *proc)
311 {
312         struct proc *tsk = proc ? proc : current;
313         cpu_buf->sample_received++;
314
315         if (pc == ESCAPE_CODE) {
316                 cpu_buf->sample_invalid_eip++;
317                 return 0;
318         }
319
320         /* ah, so great. op_add* return 1 in event of failure.
321          * this function returns 0 in event of failure.
322          * what a cluster.
323          */
324         if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
325                 goto fail;
326
327         if (op_add_sample(cpu_buf, pc, event))
328                 goto fail;
329
330         return 1;
331
332 fail:
333         cpu_buf->sample_lost_overflow++;
334         return 0;
335 }
336
337 static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
338 {
339         cpu_buf->tracing = 1;
340 }
341
342 static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
343 {
344         cpu_buf->tracing = 0;
345 }
346
347 static inline void
348 __oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
349                           unsigned long event, int is_kernel,
350                           struct proc *proc)
351 {
352         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
353         unsigned long backtrace = oprofile_backtrace_depth;
354
355         /*
356          * if log_sample() fail we can't backtrace since we lost the
357          * source of this event
358          */
359         if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
360                 /* failed */
361                 {
362                         return;
363                 }
364
365         if (!backtrace) {
366                 return;
367         }
368 #if 0
369         oprofile_begin_trace(cpu_buf);
370         oprofile_ops.backtrace(regs, backtrace);
371         oprofile_end_trace(cpu_buf);
372 #endif
373 }
374
375 void oprofile_add_ext_hw_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
376                                 unsigned long event, int is_kernel,
377                                 struct proc *proc)
378 {
379         __oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
380 }
381
382 void oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
383                              unsigned long event, int is_kernel)
384 {
385         __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
386 }
387
388 void oprofile_add_sample(void /*struct pt_regs*/ * const regs, unsigned long event)
389 {
390         int is_kernel;
391         unsigned long pc;
392
393         if (regs) {
394                 is_kernel = 0; // FIXME!user_mode(regs);
395                 pc = 0; // FIXME profile_pc(regs);
396         } else {
397                 is_kernel = 0;    /* This value will not be used */
398                 pc = ESCAPE_CODE; /* as this causes an early return. */
399         }
400
401         __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
402 }
403
404 /*
405  * Add samples with data to the ring buffer.
406  *
407  * Use oprofile_add_data(&entry, val) to add data and
408  * oprofile_write_commit(&entry) to commit the sample.
409  */
410 void
411 oprofile_write_reserve(struct op_entry *entry, void /*struct pt_regs*/ * const regs,
412                        unsigned long pc, int code, int size)
413 {
414         ERRSTACK(1);
415         struct op_sample *sample;
416         struct block *b;
417         int is_kernel = 0; // FIXME!user_mode(regs);
418         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
419
420         if (waserror()){
421                 printk("%s: failed\n", __func__);
422                 poperror();
423                 goto fail;
424         }
425         cpu_buf->sample_received++;
426
427         /* no backtraces for samples with data */
428         if (op_add_code(cpu_buf, 0, is_kernel, current))
429                 goto fail;
430
431         b = op_cpu_buffer_write_reserve(entry, size + 2);
432         sample = entry->sample;
433         sample->eip = ESCAPE_CODE;
434         sample->event = 0;              /* no flags */
435
436         op_cpu_buffer_add_data(entry, code);
437         op_cpu_buffer_add_data(entry, pc);
438         qbwrite(opq, b);
439         poperror();
440         return;
441 fail:
442         entry->event = NULL;
443         cpu_buf->sample_lost_overflow++;
444 }
445
446 int oprofile_add_data(struct op_entry *entry, unsigned long val)
447 {
448         if (!entry->event) {
449                 return 0;
450         }
451         return op_cpu_buffer_add_data(entry, val);
452 }
453
454 int oprofile_add_data64(struct op_entry *entry, uint64_t val)
455 {
456         if (!entry->event) {
457                 return 0;
458         }
459         if (op_cpu_buffer_get_size(entry) < 2)
460                 /*
461                  * the function returns 0 to indicate a too small
462                  * buffer, even if there is some space left
463                  */
464                 {
465                         return 0;
466                 }
467         if (!op_cpu_buffer_add_data(entry, (uint32_t)val)) {
468                 return 0;
469         }
470         return op_cpu_buffer_add_data(entry, (uint32_t)(val >> 32));
471 }
472
473 int oprofile_write_commit(struct op_entry *entry)
474 {
475         /* not much to do at present. In future, we might write the Block
476          * to opq.
477          */
478         return 0;
479 }
480
481 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
482 {
483         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
484         log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
485 }
486
487 void oprofile_add_trace(unsigned long pc)
488 {
489         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
490
491         if (!cpu_buf->tracing) {
492                 return;
493         }
494
495         /*
496          * broken frame can give an eip with the same value as an
497          * escape code, abort the trace if we get it
498          */
499         if (pc == ESCAPE_CODE)
500                 goto fail;
501
502         if (op_add_sample(cpu_buf, pc, 0))
503                 goto fail;
504
505         return;
506 fail:
507         printk("%s: fail. Turning of tracing on cpu %d\n", core_id());
508         cpu_buf->tracing = 0;
509         cpu_buf->backtrace_aborted++;
510         return;
511 }
512