akaros/kern/src/smp.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2009 The Regents of the University of California
   3 * Barret Rhoden <brho@cs.berkeley.edu>
   4 * See LICENSE for details.
   5 */
   6
   7#include <arch/arch.h>
   8#include <atomic.h>
   9#include <smp.h>
  10#include <error.h>
  11#include <stdio.h>
  12#include <string.h>
  13#include <assert.h>
  14#include <pmap.h>
  15#include <process.h>
  16#include <schedule.h>
  17#include <trap.h>
  18#include <trace.h>
  19#include <kdebug.h>
  20#include <kmalloc.h>
  21#include <core_set.h>
  22#include <completion.h>
  23#include <rcu.h>
  24
  25struct all_cpu_work {
  26        struct completion comp;
  27        void (*func)(void *);
  28        void *opaque;
  29};
  30
  31struct per_cpu_info per_cpu_info[MAX_NUM_CORES];
  32
  33// tracks number of global waits on smp_calls, must be <= NUM_HANDLER_WRAPPERS
  34atomic_t outstanding_calls = 0;
  35
  36/* Helper for running a proc (if we should).  Lots of repetition with
  37 * proc_restartcore */
  38static void try_run_proc(void)
  39{
  40        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
  41
  42        /* There was a process running here, and we should return to it. */
  43        if (pcpui->owning_proc) {
  44                assert(!pcpui->cur_kthread->sysc);
  45                assert(pcpui->cur_ctx);
  46                __proc_startcore(pcpui->owning_proc, pcpui->cur_ctx);
  47                assert(0);
  48        } else {
  49                /* Make sure we have abandoned core.  It's possible to have an
  50                 * owner without a current (smp_idle, __startcore, __death).
  51                 *
  52                 * If we had a current process, we might trigger __proc_free,
  53                 * which could send us a KMSG.  Since we're called after PRKM,
  54                 * let's just restart the idle loop. */
  55                if (abandon_core())
  56                        smp_idle();
  57        }
  58}
  59
  60/* All cores end up calling this whenever there is nothing left to do or they
  61 * don't know explicitly what to do.  Non-zero cores call it when they are done
  62 * booting.  Other cases include after getting a DEATH IPI.
  63 *
  64 * All cores attempt to run the context of any owning proc.  Barring that, they
  65 * halt and wake up when interrupted, do any work on their work queue, then halt
  66 * again.  In between, the ksched gets a chance to tell it to do something else,
  67 * or perhaps to halt in another manner. */
  68static void __attribute__((noreturn)) __smp_idle(void *arg)
  69{
  70        struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
  71
  72        pcpui->cur_kthread->flags = KTH_DEFAULT_FLAGS;
  73        while (1) {
  74                /* This might wake a kthread (the gp ktask), so be sure to run
  75                 * PRKM after reporting the quiescent state. */
  76                rcu_report_qs();
  77                /* If this runs an RKM, we'll call smp_idle from the top. */
  78                process_routine_kmsg();
  79                try_run_proc();
  80                cpu_bored();            /* call out to the ksched */
  81                /* cpu_halt() atomically turns on interrupts and halts the core.
  82                 * Important to do this, since we could have a RKM come in via
  83                 * an interrupt right while PRKM is returning, and we wouldn't
  84                 * catch it.  When it returns, IRQs are back off. */
  85                __set_cpu_state(pcpui, CPU_STATE_IDLE);
  86                cpu_halt();
  87                __set_cpu_state(pcpui, CPU_STATE_KERNEL);
  88        }
  89        assert(0);
  90}
  91
  92void smp_idle(void)
  93{
  94        disable_irq();
  95        __reset_stack_pointer(0, get_stack_top(), __smp_idle);
  96}
  97
  98/* Arch-independent per-cpu initialization.  This will call the arch dependent
  99 * init first. */
 100void smp_percpu_init(void)
 101{
 102        uint32_t coreid = core_id();
 103        struct per_cpu_info *pcpui = &per_cpu_info[coreid];
 104        void *trace_buf;
 105        struct kthread *kthread;
 106        /* Don't initialize __ctx_depth here, since it is already 1 (at least on
 107         * x86), since this runs in irq context. */
 108        /* Do this first */
 109        __arch_pcpu_init(coreid);
 110        /* init our kthread (tracks our currently running context) */
 111        kthread = __kthread_zalloc();
 112        /* assumes we're on the 1st page */
 113        kthread->stacktop = get_stack_top();
 114        pcpui->cur_kthread = kthread;
 115        /* Treat the startup threads as ktasks.  This will last until smp_idle
 116         * when they clear it, either in anticipation of being a user-backing
 117         * kthread or to handle an RKM. */
 118        kthread->flags = KTH_KTASK_FLAGS;
 119        per_cpu_info[coreid].spare = 0;
 120        /* Init relevant lists */
 121        spinlock_init_irqsave(&per_cpu_info[coreid].immed_amsg_lock);
 122        STAILQ_INIT(&per_cpu_info[coreid].immed_amsgs);
 123        spinlock_init_irqsave(&per_cpu_info[coreid].routine_amsg_lock);
 124        STAILQ_INIT(&per_cpu_info[coreid].routine_amsgs);
 125        init_timer_chain(&this_pcpui_var(tchain), set_pcpu_alarm_interrupt);
 126        /* Init generic tracing ring */
 127        trace_buf = kpage_alloc_addr();
 128        assert(trace_buf);
 129        trace_ring_init(&pcpui->traces, trace_buf, PGSIZE,
 130                        sizeof(struct pcpu_trace_event));
 131        for (int i = 0; i < NR_CPU_STATES; i++)
 132                pcpui->state_ticks[i] = 0;
 133        pcpui->last_tick_cnt = read_tsc();
 134        /* Core 0 is in the KERNEL state, called from smp_boot.  The other cores
 135         * are too, at least on x86, where we were called from asm (woken by
 136         * POKE). */
 137        pcpui->cpu_state = CPU_STATE_KERNEL;
 138        /* Enable full lock debugging, after all pcpui work is done */
 139        pcpui->__lock_checking_enabled = 1;
 140}
 141
 142/* it's actually okay to set the state to the existing state.  originally, it
 143 * was a bug in the state tracking, but it is possible, at least on x86, to have
 144 * a halted core (state IDLE) get woken up by an IRQ that does not trigger the
 145 * IRQ handling state.  for example, there is the I_POKE_CORE ipi.  smp_idle
 146 * will just sleep again, and reset the state from IDLE to IDLE. */
 147void __set_cpu_state(struct per_cpu_info *pcpui, int state)
 148{
 149        uint64_t now_ticks;
 150
 151        assert(!irq_is_enabled());
 152        /* TODO: could put in an option to enable/disable state tracking. */
 153        now_ticks = read_tsc();
 154        pcpui->state_ticks[pcpui->cpu_state] += now_ticks -
 155                                                pcpui->last_tick_cnt;
 156        /* TODO: if the state was user, we could account for the vcore's time,
 157         * similar to the total_ticks in struct vcore.  the difference is that
 158         * the total_ticks tracks the vcore's virtual time, while this tracks
 159         * user time.  something like vcore->user_ticks. */
 160        pcpui->cpu_state = state;
 161        pcpui->last_tick_cnt = now_ticks;
 162}
 163
 164void reset_cpu_state_ticks(int coreid)
 165{
 166        struct per_cpu_info *pcpui = &per_cpu_info[coreid];
 167        uint64_t now_ticks;
 168
 169        if (coreid >= num_cores)
 170                return;
 171        /* need to update last_tick_cnt, so the current value doesn't get added
 172         * in next time we update */
 173        now_ticks = read_tsc();
 174        for (int i = 0; i < NR_CPU_STATES; i++) {
 175                pcpui->state_ticks[i] = 0;
 176                pcpui->last_tick_cnt = now_ticks;
 177        }
 178}
 179
 180/* PCPUI Trace Rings: */
 181
 182static void pcpui_trace_kmsg_handler(void *event, void *data)
 183{
 184        struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
 185        uintptr_t addr;
 186
 187        addr = te->arg1;
 188        printk("\tKMSG %p: %s\n", addr, get_fn_name(addr));
 189}
 190
 191static void pcpui_trace_locks_handler(void *event, void *data)
 192{
 193        struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
 194        const char *func_name;
 195        uintptr_t lock_addr = te->arg1;
 196
 197        if (lock_addr > KERN_LOAD_ADDR)
 198                func_name = get_fn_name(lock_addr);
 199        else
 200                func_name = "Dynamic lock";
 201        print_lock();
 202        printk("Time %uus, lock %p (%s)\n", te->arg0, lock_addr, func_name);
 203        printk("\t");
 204        spinlock_debug((spinlock_t*)lock_addr);
 205        print_unlock();
 206}
 207
 208/* Add specific trace handlers here: */
 209trace_handler_t pcpui_tr_handlers[PCPUI_NR_TYPES] = {
 210                                  0,
 211                                  pcpui_trace_kmsg_handler,
 212                                  pcpui_trace_locks_handler,
 213                                  };
 214
 215/* Generic handler for the pcpui ring.  Will switch out to the appropriate
 216 * type's handler */
 217static void pcpui_trace_fn(void *event, void *data)
 218{
 219        struct pcpu_trace_event *te = (struct pcpu_trace_event*)event;
 220        int desired_type = (int)(long)data;
 221
 222        if (te->type >= PCPUI_NR_TYPES)
 223                printk("Bad trace type %d\n", te->type);
 224        /* desired_type == 0 means all types */
 225        if (desired_type && desired_type != te->type)
 226                return;
 227        if (pcpui_tr_handlers[te->type])
 228                pcpui_tr_handlers[te->type](event, data);
 229}
 230
 231void pcpui_tr_foreach(int coreid, int type)
 232{
 233        struct trace_ring *tr = &per_cpu_info[coreid].traces;
 234        assert(tr);
 235        printk("\n\nTrace Ring on Core %d\n--------------\n", coreid);
 236        trace_ring_foreach(tr, pcpui_trace_fn, (void*)(long)type);
 237}
 238
 239void pcpui_tr_foreach_all(int type)
 240{
 241        for (int i = 0; i < num_cores; i++)
 242                pcpui_tr_foreach(i, type);
 243}
 244
 245void pcpui_tr_reset_all(void)
 246{
 247        for (int i = 0; i < num_cores; i++)
 248                trace_ring_reset(&per_cpu_info[i].traces);
 249}
 250
 251void pcpui_tr_reset_and_clear_all(void)
 252{
 253        for (int i = 0; i < num_cores; i++)
 254                trace_ring_reset_and_clear(&per_cpu_info[i].traces);
 255}
 256
 257static void smp_do_core_work(uint32_t srcid, long a0, long a1, long a2)
 258{
 259        struct all_cpu_work *acw = (struct all_cpu_work *) a0;
 260
 261        acw->func(acw->opaque);
 262        completion_complete(&acw->comp, 1);
 263}
 264
 265void smp_do_in_cores(const struct core_set *cset, void (*func)(void *),
 266                                         void *opaque)
 267{
 268        int cpu = core_id();
 269        struct all_cpu_work acw;
 270
 271        memset(&acw, 0, sizeof(acw));
 272        completion_init(&acw.comp, core_set_remote_count(cset));
 273        acw.func = func;
 274        acw.opaque = opaque;
 275
 276        for (int i = 0; i < num_cores; i++) {
 277                if (core_set_getcpu(cset, i)) {
 278                        if (i == cpu)
 279                                func(opaque);
 280                        else
 281                                send_kernel_message(i, smp_do_core_work,
 282                                                    (long)&acw, 0, 0,
 283                                                    KMSG_ROUTINE);
 284                }
 285        }
 286        completion_wait(&acw.comp);
 287}
 288