5759cafce97cfec1e04ef60dca85dc2dd0f022d2
[akaros.git] / kern / src / oprofile / cpu_buffer.c
1 /**
2  * @file cpu_buffer.c
3  *
4  * @remark Copyright 2002-2009 OProfile authors
5  * @remark Read the file COPYING
6  *
7  * @author John Levon <levon@movementarian.org>
8  * @author Barry Kasindorf <barry.kasindorf@amd.com>
9  * @author Robert Richter <robert.richter@amd.com>
10  *
11  * Each CPU has a local buffer that stores PC value/event
12  * pairs. We also log context switches when we notice them.
13  * Eventually each CPU's buffer is processed into the global
14  * event buffer by sync_buffer().
15  *
16  * We use a local buffer for two reasons: an NMI or similar
17  * interrupt cannot synchronise, and high sampling rates
18  * would lead to catastrophic global synchronisation if
19  * a global buffer was used.
20  */
21 #include "event_buffer.h"
22 #include "cpu_buffer.h"
23 #include "buffer_sync.h"
24 #include "oprof.h"
25
26 #define OP_BUFFER_FLAGS 0
27
28 /* we allocate an array of these and set the pointer in pcpui */
29 struct oprofile_cpu_buffer *op_cpu_buffer;
30
31 /* this one queue is used by #K to get all events. */
32 struct queue *opq;
33
34 /* this is run from core 0 for all cpu buffers. */
35 static void wq_sync_buffer(void);
36 unsigned long oprofile_cpu_buffer_size = 65536;
37 unsigned long oprofile_backtrace_depth = 8;
38
39 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
40 static int work_enabled;
41
42 /*
43  * Resets the cpu buffer to a sane state.
44  *
45  * reset these to invalid values; the next sample collected will
46  * populate the buffer with proper values to initialize the buffer
47  */
48 static inline void op_cpu_buffer_reset(int cpu)
49 {
50         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
51
52         cpu_buf->last_is_kernel = -1;
53         cpu_buf->last_proc = NULL;
54 }
55
56 /* returns the remaining free size of data in the entry */
57 static inline
58 int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val)
59 {
60         assert(entry->size >= 0);
61         if (!entry->size)
62                 return 0;
63         *entry->data = val;
64         entry->size--;
65         entry->data++;
66         return entry->size;
67 }
68
69 /* returns the size of data in the entry */
70 static inline
71 int op_cpu_buffer_get_size(struct op_entry *entry)
72 {
73         return entry->size;
74 }
75
76 /* returns 0 if empty or the size of data including the current value */
77 static inline
78 int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val)
79 {
80         int size = entry->size;
81         if (!size)
82                 return 0;
83         *val = *entry->data;
84         entry->size--;
85         entry->data++;
86         return size;
87 }
88
89 unsigned long oprofile_get_cpu_buffer_size(void)
90 {
91         return oprofile_cpu_buffer_size;
92 }
93
94 void oprofile_cpu_buffer_inc_smpl_lost(void)
95 {
96         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
97
98         cpu_buf->sample_lost_overflow++;
99 }
100
101 void free_cpu_buffers(void)
102 {
103         kfree(op_cpu_buffer);
104         /* we can just leave the queue set up; it will then always return EOF */
105 }
106
107 #define RB_EVENT_HDR_SIZE 4
108
109 int alloc_cpu_buffers(void)
110 {
111         int i;
112         unsigned long buffer_size = oprofile_cpu_buffer_size;
113         unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
114                                                  RB_EVENT_HDR_SIZE);
115         /* this can get called lots of times. Things might have been freed.
116          * So be careful.
117          */
118         /* what limit? No idea. */
119         if (! opq)
120                 opq = qopen(1024, Qmsg, NULL, NULL);
121         if (! opq)
122                 goto fail;
123
124         /* we *really* don't want to block. Losing data is better. */
125         qnoblock(opq, 1);
126         if (! op_cpu_buffer) {
127                 op_cpu_buffer = kzmalloc(sizeof(*op_cpu_buffer), num_cpus);
128                 if (! op_cpu_buffer)
129                         goto fail;
130
131                 for(i = 0; i < num_cpus; i++) {
132                         struct oprofile_cpu_buffer *b = &op_cpu_buffer[i];
133                         /* short term: for each event, we're going to kmalloc a
134                          * sample and shove it into the opq.
135                          * Long term: TBD. One option is to create a big damn Block and
136                          * add to it as needed. Once the block is full we can push
137                          * it onto the opq. That will actually be pretty fast and easy
138                          * if we make the block page-sized. Far, far simpler than the
139                          * Linux tracebuffer stuff.
140                          */
141                         b->last_proc = NULL;
142                         b->last_is_kernel = -1;
143                         b->tracing = 0;
144                         b->buffer_size = buffer_size;
145                         b->sample_received = 0;
146                         b->sample_lost_overflow = 0;
147                         b->backtrace_aborted = 0;
148                         b->sample_invalid_eip = 0;
149                         b->cpu = i;
150                 }
151         }
152
153         return 0;
154
155 fail:
156         free_cpu_buffers();
157         return -ENOMEM;
158 }
159
160 void start_cpu_work(void)
161 {
162         int i;
163
164         work_enabled = 1;
165         /* task starts here.
166         schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
167         */
168 }
169
170 void end_cpu_work(void)
171 {
172         work_enabled = 0;
173 }
174
175 /* placeholder. Not used yet.
176  */
177 void flush_cpu_work(void)
178 {
179         int i;
180         struct oprofile_cpu_buffer *b = &op_cpu_buffer[core_id()];
181
182 }
183
184 /* Not used since we're not doing per-cpu buffering yet.
185  */
186
187 struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
188 {
189         return NULL;
190 }
191
192 static struct block *op_cpu_buffer_write_reserve(struct op_entry *entry, int size)
193 {
194         struct block *b;
195
196         b = allocb(sizeof(struct op_sample) +
197                    size * sizeof(entry->sample->data[0]));
198         if (!b)
199                 return NULL;
200         entry->sample = (void *)b->wp;
201         entry->size = size;
202         entry->data = entry->sample->data;
203
204         b->wp += sizeof(struct op_sample) +
205                 size * sizeof(entry->sample->data[0]);
206         return b;
207
208 }
209 static int
210 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
211             int is_kernel, struct proc *proc)
212 {
213         struct block *b;
214         struct op_entry entry;
215         struct op_sample *sample;
216         unsigned long flags;
217         int size;
218         ERRSTACK(1);
219
220         flags = 0;
221
222         if (waserror()) {
223                 poperror();
224                 return 1;
225         }
226
227         if (backtrace)
228                 flags |= TRACE_BEGIN;
229
230         /* notice a switch from user->kernel or vice versa */
231         is_kernel = !!is_kernel;
232         if (cpu_buf->last_is_kernel != is_kernel) {
233                 cpu_buf->last_is_kernel = is_kernel;
234                 flags |= KERNEL_CTX_SWITCH;
235                 if (is_kernel)
236                         flags |= IS_KERNEL;
237         }
238
239         /* notice a proc switch */
240         if (cpu_buf->last_proc != proc) {
241                 cpu_buf->last_proc = proc;
242                 flags |= USER_CTX_SWITCH;
243         }
244
245         if (!flags) {
246                 poperror();
247                 /* nothing to do */
248                 return 0;
249         }
250
251         if (flags & USER_CTX_SWITCH)
252                 size = 1;
253         else
254                 size = 0;
255
256         b = op_cpu_buffer_write_reserve(&entry, size);
257
258         entry.sample->eip = ESCAPE_CODE;
259         entry.sample->event = flags;
260
261         if (size)
262                 op_cpu_buffer_add_data(&entry, (unsigned long)proc);
263
264         qbwrite(opq, b); /* note: out of our hands now. Don't free. */
265         poperror();
266         return 0;
267 }
268
269 static inline int
270 op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
271               unsigned long pc, unsigned long event)
272 {
273         ERRSTACK(1);
274         struct op_entry entry;
275         struct op_sample *sample;
276         struct block *b;
277
278         if (waserror()) {
279                 poperror();
280                 return 1;
281         }
282
283         b = op_cpu_buffer_write_reserve(&entry, 0);
284
285         sample = entry.sample;
286         sample->eip = pc;
287         sample->event = event;
288
289         qbwrite(opq, b);
290         poperror();
291         return 1;
292 }
293
294 /*
295  * This must be safe from any context.
296  *
297  * is_kernel is needed because on some architectures you cannot
298  * tell if you are in kernel or user space simply by looking at
299  * pc. We tag this in the buffer by generating kernel enter/exit
300  * events whenever is_kernel changes
301  */
302 static int
303 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
304            unsigned long backtrace, int is_kernel, unsigned long event,
305            struct proc *proc)
306 {
307         struct proc *tsk = proc ? proc : current;
308         cpu_buf->sample_received++;
309
310         if (pc == ESCAPE_CODE) {
311                 cpu_buf->sample_invalid_eip++;
312                 return 0;
313         }
314
315         /* ah, so great. op_add* return 1 in event of failure.
316          * this function returns 0 in event of failure.
317          * what a cluster.
318          */
319         if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
320                 goto fail;
321
322         if (op_add_sample(cpu_buf, pc, event))
323                 goto fail;
324
325         return 1;
326
327 fail:
328         cpu_buf->sample_lost_overflow++;
329         return 0;
330 }
331
332 static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
333 {
334         cpu_buf->tracing = 1;
335 }
336
337 static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
338 {
339         cpu_buf->tracing = 0;
340 }
341
342 static inline void
343 __oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
344                           unsigned long event, int is_kernel,
345                           struct proc *proc)
346 {
347         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
348         unsigned long backtrace = oprofile_backtrace_depth;
349
350         /*
351          * if log_sample() fail we can't backtrace since we lost the
352          * source of this event
353          */
354         if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, proc))
355                 /* failed */
356                 return;
357
358         if (!backtrace)
359                 return;
360 #if 0
361         oprofile_begin_trace(cpu_buf);
362         oprofile_ops.backtrace(regs, backtrace);
363         oprofile_end_trace(cpu_buf);
364 #endif
365 }
366
367 void oprofile_add_ext_hw_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
368                                 unsigned long event, int is_kernel,
369                                 struct proc *proc)
370 {
371         __oprofile_add_ext_sample(pc, regs, event, is_kernel, proc);
372 }
373
374 void oprofile_add_ext_sample(unsigned long pc, void /*struct pt_regs*/ * const regs,
375                              unsigned long event, int is_kernel)
376 {
377         __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
378 }
379
380 void oprofile_add_sample(void /*struct pt_regs*/ * const regs, unsigned long event)
381 {
382         int is_kernel;
383         unsigned long pc;
384
385         if (regs) {
386                 is_kernel = 0; // FIXME!user_mode(regs);
387                 pc = 0; // FIXME profile_pc(regs);
388         } else {
389                 is_kernel = 0;    /* This value will not be used */
390                 pc = ESCAPE_CODE; /* as this causes an early return. */
391         }
392
393         __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
394 }
395
396 /*
397  * Add samples with data to the ring buffer.
398  *
399  * Use oprofile_add_data(&entry, val) to add data and
400  * oprofile_write_commit(&entry) to commit the sample.
401  */
402 void
403 oprofile_write_reserve(struct op_entry *entry, void /*struct pt_regs*/ * const regs,
404                        unsigned long pc, int code, int size)
405 {
406         ERRSTACK(1);
407         struct op_sample *sample;
408         struct block *b;
409         int is_kernel = 0; // FIXME!user_mode(regs);
410         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
411
412         if (waserror()){
413                 poperror();
414                 goto fail;
415         }
416         cpu_buf->sample_received++;
417
418         /* no backtraces for samples with data */
419         if (op_add_code(cpu_buf, 0, is_kernel, current))
420                 goto fail;
421
422         b = op_cpu_buffer_write_reserve(entry, size + 2);
423         sample = entry->sample;
424         sample->eip = ESCAPE_CODE;
425         sample->event = 0;              /* no flags */
426
427         op_cpu_buffer_add_data(entry, code);
428         op_cpu_buffer_add_data(entry, pc);
429         qbwrite(opq, b);
430         poperror();
431         return;
432 fail:
433         entry->event = NULL;
434         cpu_buf->sample_lost_overflow++;
435 }
436
437 int oprofile_add_data(struct op_entry *entry, unsigned long val)
438 {
439         if (!entry->event)
440                 return 0;
441         return op_cpu_buffer_add_data(entry, val);
442 }
443
444 int oprofile_add_data64(struct op_entry *entry, uint64_t val)
445 {
446         if (!entry->event)
447                 return 0;
448         if (op_cpu_buffer_get_size(entry) < 2)
449                 /*
450                  * the function returns 0 to indicate a too small
451                  * buffer, even if there is some space left
452                  */
453                 return 0;
454         if (!op_cpu_buffer_add_data(entry, (uint32_t)val))
455                 return 0;
456         return op_cpu_buffer_add_data(entry, (uint32_t)(val >> 32));
457 }
458
459 int oprofile_write_commit(struct op_entry *entry)
460 {
461         /* not much to do at present. In future, we might write the Block
462          * to opq.
463          */
464         return 0;
465 }
466
467 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
468 {
469         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
470         log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
471 }
472
473 void oprofile_add_trace(unsigned long pc)
474 {
475         struct oprofile_cpu_buffer *cpu_buf = &op_cpu_buffer[core_id()];
476
477         if (!cpu_buf->tracing)
478                 return;
479
480         /*
481          * broken frame can give an eip with the same value as an
482          * escape code, abort the trace if we get it
483          */
484         if (pc == ESCAPE_CODE)
485                 goto fail;
486
487         if (op_add_sample(cpu_buf, pc, 0))
488                 goto fail;
489
490         return;
491 fail:
492         cpu_buf->tracing = 0;
493         cpu_buf->backtrace_aborted++;
494         return;
495 }
496