Do not race when multiple init happen at the same time
[akaros.git] / kern / src / profiler.c
1
2 #include <ros/common.h>
3 #include <smp.h>
4 #include <trap.h>
5 #include <kthread.h>
6 #include <kmalloc.h>
7 #include <atomic.h>
8 #include <sys/types.h>
9 #include "profiler.h"
10
11 struct op_sample {
12         uint64_t hdr;
13         uint64_t event;
14         uint64_t data[0];
15 };
16
17 struct op_entry {
18         struct op_sample *sample;
19         size_t size;
20         uint64_t *data;
21 };
22
23 struct profiler_cpu_context {
24         spinlock_t lock;
25         int tracing;
26         struct block *block;
27 };
28
29 static int profiler_queue_limit = 1024;
30 static size_t profiler_cpu_buffer_size = 65536;
31 static size_t profiler_backtrace_depth = 16;
32 static struct semaphore mtx = SEMAPHORE_INITIALIZER(mtx, 1);
33 static int profiler_users = 0;
34 static struct profiler_cpu_context *profiler_percpu_ctx;
35 static struct queue *profiler_queue;
36
37 static inline struct profiler_cpu_context *profiler_get_cpu_ctx(int cpu)
38 {
39         return profiler_percpu_ctx + cpu;
40 }
41
42 static inline uint64_t profiler_create_header(int cpu, size_t nbt)
43 {
44         return (((uint64_t) 0xee01) << 48) | ((uint64_t) cpu << 16) |
45                 (uint64_t) nbt;
46 }
47
48 static inline size_t profiler_cpu_buffer_add_data(struct op_entry *entry,
49                                                                                                   const uintptr_t *values,
50                                                                                                   size_t count)
51 {
52         size_t i;
53
54         if (unlikely(count > entry->size))
55                 count = entry->size;
56         for (i = 0; i < count; i++)
57                 entry->data[i] = (uint64_t) values[i];
58         entry->size -= count;
59         entry->data += count;
60
61         return entry->size;
62 }
63
64 static void free_cpu_buffers(void)
65 {
66         kfree(profiler_percpu_ctx);
67         profiler_percpu_ctx = NULL;
68
69         qclose(profiler_queue);
70         profiler_queue = NULL;
71 }
72
73 static int alloc_cpu_buffers(void)
74 {
75         int i;
76
77         profiler_queue = qopen(profiler_queue_limit, 0, NULL, NULL);
78         if (!profiler_queue)
79                 return -ENOMEM;
80
81         qdropoverflow(profiler_queue, 1);
82         qnonblock(profiler_queue, 1);
83
84         profiler_percpu_ctx =
85                 kzmalloc(sizeof(*profiler_percpu_ctx) * num_cores, KMALLOC_WAIT);
86         if (!profiler_percpu_ctx)
87                 goto fail;
88
89         for (i = 0; i < num_cores; i++) {
90                 struct profiler_cpu_context *b = &profiler_percpu_ctx[i];
91
92                 b->tracing = 0;
93                 spinlock_init_irqsave(&b->lock);
94         }
95
96         return 0;
97
98 fail:
99         qclose(profiler_queue);
100         profiler_queue = NULL;
101         return -ENOMEM;
102 }
103
104 int profiler_init(void)
105 {
106         int error = 0;
107
108         sem_down(&mtx);
109         if (!profiler_queue)
110                 error = alloc_cpu_buffers();
111         profiler_users++;
112         sem_up(&mtx);
113
114         return error;
115 }
116
117 void profiler_cleanup(void)
118 {
119         sem_down(&mtx);
120         profiler_users--;
121         if (profiler_users == 0)
122                 free_cpu_buffers();
123         sem_up(&mtx);
124 }
125
126 static struct block *profiler_cpu_buffer_write_reserve(
127         struct profiler_cpu_context *cpu_buf, struct op_entry *entry, size_t size)
128 {
129         struct block *b = cpu_buf->block;
130     size_t totalsize = sizeof(struct op_sample) +
131                 size * sizeof(entry->sample->data[0]);
132
133         if (unlikely((!b) || (b->lim - b->wp) < totalsize)) {
134                 if (b)
135                         qibwrite(profiler_queue, b);
136                 /* For now. Later, we will grab a block off the
137                  * emptyblock queue.
138                  */
139                 cpu_buf->block = b = iallocb(profiler_cpu_buffer_size);
140         if (unlikely(!b)) {
141                         printk("%s: fail\n", __func__);
142                         return NULL;
143                 }
144         }
145         entry->sample = (struct op_sample *) b->wp;
146         entry->size = size;
147         entry->data = entry->sample->data;
148
149         b->wp += totalsize;
150
151         return b;
152 }
153
154 static inline int profiler_add_sample(struct profiler_cpu_context *cpu_buf,
155                                                                           uintptr_t pc, unsigned long event)
156 {
157         ERRSTACK(1);
158         struct op_entry entry;
159         struct block *b;
160
161         if (waserror()) {
162                 poperror();
163                 printk("%s: failed\n", __func__);
164                 return 1;
165         }
166
167         b = profiler_cpu_buffer_write_reserve(cpu_buf, &entry, 0);
168         if (likely(b)) {
169                 entry.sample->hdr = profiler_create_header(core_id(), 1);
170                 entry.sample->event = (uint64_t) event;
171                 profiler_cpu_buffer_add_data(&entry, &pc, 1);
172         }
173         poperror();
174
175         return b == NULL;
176 }
177
178 static inline void profiler_begin_trace(struct profiler_cpu_context *cpu_buf)
179 {
180         cpu_buf->tracing = 1;
181 }
182
183 static inline void profiler_end_trace(struct profiler_cpu_context *cpu_buf)
184 {
185         cpu_buf->tracing = 0;
186 }
187
188 static void profiler_cpubuf_flushone(int core, int newbuf)
189 {
190         struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core);
191
192         spin_lock_irqsave(&cpu_buf->lock);
193         if (cpu_buf->block) {
194                 printk("Core %d has data\n", core);
195                 qibwrite(profiler_queue, cpu_buf->block);
196                 printk("After qibwrite in %s, profiler_queue len %d\n",
197                            __func__, qlen(profiler_queue));
198         }
199         if (newbuf)
200                 cpu_buf->block = iallocb(profiler_cpu_buffer_size);
201         else
202                 cpu_buf->block = NULL;
203         spin_unlock_irqsave(&cpu_buf->lock);
204 }
205
206 void profiler_control_trace(int onoff)
207 {
208         int core;
209
210         for (core = 0; core < num_cores; core++) {
211                 struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core);
212
213                 cpu_buf->tracing = onoff;
214                 if (onoff) {
215                         printk("Enable tracing on %d\n", core);
216                 } else {
217                         printk("Disable tracing on %d\n", core);
218                         profiler_cpubuf_flushone(core, 0);
219                 }
220         }
221 }
222
223 void profiler_add_trace(uintptr_t pc)
224 {
225         struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(core_id());
226
227         if (profiler_percpu_ctx && cpu_buf->tracing)
228                 profiler_add_sample(cpu_buf, pc, nsec());
229 }
230
231 /* Format for samples:
232  * first word:
233  * high 8 bits is ee, which is an invalid address on amd64.
234  * next 8 bits is protocol version
235  * next 16 bits is unused, MBZ. Later, we can make it a packet type.
236  * next 16 bits is core id
237  * next 8 bits is unused
238  * next 8 bits is # PCs following. This should be at least 1, for one EIP.
239  *
240  * second word is time in ns.
241  *
242  * Third and following words are PCs, there must be at least one of them.
243  */
244 void profiler_add_backtrace(uintptr_t pc, uintptr_t fp)
245 {
246         int cpu = core_id();
247         struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(cpu);
248
249         if (profiler_percpu_ctx && cpu_buf->tracing) {
250                 struct op_entry entry;
251                 struct block *b;
252                 uintptr_t bt_pcs[profiler_backtrace_depth];
253                 size_t n = backtrace_list(pc, fp, bt_pcs, profiler_backtrace_depth);
254
255                 b = profiler_cpu_buffer_write_reserve(cpu_buf, &entry, n);
256                 if (likely(b)) {
257                         entry.sample->hdr = profiler_create_header(cpu, n);
258                         entry.sample->event = nsec();
259                         profiler_cpu_buffer_add_data(&entry, bt_pcs, n);
260                 }
261         }
262 }
263
264 void profiler_add_userpc(uintptr_t pc)
265 {
266         int cpu = core_id();
267         struct profiler_cpu_context *cpu_buf = profiler_get_cpu_ctx(cpu);
268
269         if (profiler_percpu_ctx && cpu_buf->tracing) {
270                 struct op_entry entry;
271                 struct block *b = profiler_cpu_buffer_write_reserve(cpu_buf,
272                                                                                                                         &entry, 1);
273
274                 if (likely(b)) {
275                         entry.sample->hdr = profiler_create_header(cpu, 1);
276                         entry.sample->event = nsec();
277                         profiler_cpu_buffer_add_data(&entry, &pc, 1);
278                 }
279         }
280 }
281
282 void profiler_add_hw_sample(struct hw_trapframe *hw_tf)
283 {
284         if (in_kernel(hw_tf))
285                 profiler_add_backtrace(get_hwtf_pc(hw_tf), get_hwtf_fp(hw_tf));
286         else
287                 profiler_add_userpc(get_hwtf_pc(hw_tf));
288 }
289
290 int profiler_size(void)
291 {
292         return qlen(profiler_queue);
293 }
294
295 int profiler_read(void *va, int n)
296 {
297         return qread(profiler_queue, va, n);
298 }