#endif
#include <ros/bcq.h>
+#include <event.h>
#include <arch/arch.h>
-#include <arch/bitmask.h>
+#include <bitmask.h>
#include <process.h>
#include <atomic.h>
#include <smp.h>
#include <manager.h>
#include <stdio.h>
#include <assert.h>
-#include <timing.h>
+#include <time.h>
#include <hashtable.h>
#include <slab.h>
#include <sys/queue.h>
#include <frontend.h>
#include <monitor.h>
-#include <resource.h>
#include <elf.h>
#include <arsc_server.h>
#include <devfs.h>
-/* Process Lists */
-struct proc_list proc_runnablelist = TAILQ_HEAD_INITIALIZER(proc_runnablelist);
-spinlock_t runnablelist_lock = SPINLOCK_INITIALIZER;
struct kmem_cache *proc_cache;
-/* Tracks which cores are idle, similar to the vcoremap. Each value is the
- * physical coreid of an unallocated core. */
-spinlock_t idle_lock = SPINLOCK_INITIALIZER;
-uint32_t LCKD(&idle_lock) (RO idlecoremap)[MAX_NUM_CPUS];
-uint32_t LCKD(&idle_lock) num_idlecores = 0;
-uint32_t num_mgmtcores = 1;
-
-/* Helper function to return a core to the idlemap. It causes some more lock
- * acquisitions (like in a for loop), but it's a little easier. Plus, one day
- * we might be able to do this without locks (for the putting). */
-void put_idle_core(uint32_t coreid)
-{
- spin_lock(&idle_lock);
- idlecoremap[num_idlecores++] = coreid;
- spin_unlock(&idle_lock);
-}
-
/* Other helpers, implemented later. */
static void __proc_startcore(struct proc *p, trapframe_t *tf);
-static uint32_t get_free_vcoreid(struct proc *SAFE p, uint32_t prev);
-static uint32_t get_busy_vcoreid(struct proc *SAFE p, uint32_t prev);
static bool is_mapped_vcore(struct proc *p, uint32_t pcoreid);
static uint32_t get_vcoreid(struct proc *p, uint32_t pcoreid);
+static uint32_t try_get_pcoreid(struct proc *p, uint32_t vcoreid);
static uint32_t get_pcoreid(struct proc *p, uint32_t vcoreid);
static void __proc_free(struct kref *kref);
+static bool scp_is_vcctx_ready(struct preempt_data *vcpd);
/* PID management. */
#define PID_MAX 32767 // goes from 0 to 32767, with 0 reserved
* RBS -> RGS
* RGS -> RBS
* RGS -> W
+ * RGM -> W
* W -> RBS
+ * W -> RBM
* RGS -> RBM
* RBM -> RGM
* RGM -> RBM
panic("Invalid State Transition! PROC_RUNNING_S to %02x", state);
break;
case PROC_WAITING:
- if (state != PROC_RUNNABLE_S)
+ if (!(state & (PROC_RUNNABLE_S | PROC_RUNNABLE_M)))
panic("Invalid State Transition! PROC_WAITING to %02x", state);
break;
case PROC_DYING:
panic("Invalid State Transition! PROC_RUNNABLE_M to %02x", state);
break;
case PROC_RUNNING_M:
- if (!(state & (PROC_RUNNABLE_S | PROC_RUNNABLE_M | PROC_DYING)))
+ if (!(state & (PROC_RUNNABLE_S | PROC_RUNNABLE_M | PROC_WAITING |
+ PROC_DYING)))
panic("Invalid State Transition! PROC_RUNNING_M to %02x", state);
break;
}
struct proc *pid2proc(pid_t pid)
{
spin_lock(&pid_hash_lock);
- struct proc *p = hashtable_search(pid_hash, (void*)pid);
+ struct proc *p = hashtable_search(pid_hash, (void*)(long)pid);
if (p)
- if (!kref_get_not_zero(&p->kref, 1))
+ if (!kref_get_not_zero(&p->p_kref, 1))
p = 0;
spin_unlock(&pid_hash_lock);
return p;
* any process related function. */
void proc_init(void)
{
+ /* Catch issues with the vcoremap and TAILQ_ENTRY sizes */
+ static_assert(sizeof(TAILQ_ENTRY(vcore)) == sizeof(void*) * 2);
proc_cache = kmem_cache_create("proc", sizeof(struct proc),
MAX(HW_CACHE_ALIGN, __alignof__(struct proc)), 0, 0, 0);
/* Init PID mask and hash. pid 0 is reserved. */
pid_hash = create_hashtable(100, __generic_hash, __generic_eq);
spin_unlock(&pid_hash_lock);
schedule_init();
- /* Init idle cores. Core 0 is the management core. */
- spin_lock(&idle_lock);
-#ifdef __CONFIG_DISABLE_SMT__
- /* assumes core0 is the only management core (NIC and monitor functionality
- * are run there too. it just adds the odd cores to the idlecoremap */
- assert(!(num_cpus % 2));
- // TODO: consider checking x86 for machines that actually hyperthread
- num_idlecores = num_cpus >> 1;
-#ifdef __CONFIG_ARSC_SERVER__
- // Dedicate one core (core 2) to sysserver, might be able to share wit NIC
- num_mgmtcores++;
- assert(num_cpus >= num_mgmtcores);
- send_kernel_message(2, (amr_t)arsc_server, 0,0,0, KMSG_ROUTINE);
-#endif
- for (int i = 0; i < num_idlecores; i++)
- idlecoremap[i] = (i * 2) + 1;
-#else
- // __CONFIG_DISABLE_SMT__
- #ifdef __CONFIG_NETWORKING__
- num_mgmtcores++; // Next core is dedicated to the NIC
- assert(num_cpus >= num_mgmtcores);
- #endif
- #ifdef __CONFIG_APPSERVER__
- #ifdef __CONFIG_DEDICATED_MONITOR__
- num_mgmtcores++; // Next core dedicated to running the kernel monitor
- assert(num_cpus >= num_mgmtcores);
- // Need to subtract 1 from the num_mgmtcores # to get the cores index
- send_kernel_message(num_mgmtcores-1, (amr_t)monitor, 0,0,0, KMSG_ROUTINE);
- #endif
- #endif
-#ifdef __CONFIG_ARSC_SERVER__
- // Dedicate one core (core 2) to sysserver, might be able to share wit NIC
- num_mgmtcores++;
- assert(num_cpus >= num_mgmtcores);
- send_kernel_message(num_mgmtcores-1, (amr_t)arsc_server, 0,0,0, KMSG_ROUTINE);
-#endif
- num_idlecores = num_cpus - num_mgmtcores;
- for (int i = 0; i < num_idlecores; i++)
- idlecoremap[i] = i + num_mgmtcores;
-#endif /* __CONFIG_DISABLE_SMT__ */
- spin_unlock(&idle_lock);
atomic_init(&num_envs, 0);
}
-void
-proc_init_procinfo(struct proc* p)
+/* Be sure you init'd the vcore lists before calling this. */
+static void proc_init_procinfo(struct proc* p)
{
+ p->procinfo->pid = p->pid;
+ p->procinfo->ppid = p->ppid;
+ p->procinfo->max_vcores = max_vcores(p);
+ p->procinfo->tsc_freq = system_timing.tsc_freq;
+ p->procinfo->heap_bottom = (void*)UTEXT;
+ /* 0'ing the arguments. Some higher function will need to set them */
+ memset(p->procinfo->argp, 0, sizeof(p->procinfo->argp));
+ memset(p->procinfo->argbuf, 0, sizeof(p->procinfo->argbuf));
+ memset(p->procinfo->res_grant, 0, sizeof(p->procinfo->res_grant));
+ /* 0'ing the vcore/pcore map. Will link the vcores later. */
memset(&p->procinfo->vcoremap, 0, sizeof(p->procinfo->vcoremap));
memset(&p->procinfo->pcoremap, 0, sizeof(p->procinfo->pcoremap));
p->procinfo->num_vcores = 0;
+ p->procinfo->is_mcp = FALSE;
p->procinfo->coremap_seqctr = SEQCTR_INITIALIZER;
- // TODO: change these too
- p->procinfo->pid = p->pid;
- p->procinfo->ppid = p->ppid;
- p->procinfo->tsc_freq = system_timing.tsc_freq;
- // TODO: maybe do something smarter here
-#ifdef __CONFIG_DISABLE_SMT__
- p->procinfo->max_vcores = num_cpus >> 1;
-#else
- p->procinfo->max_vcores = MAX(1,num_cpus-num_mgmtcores);
-#endif /* __CONFIG_DISABLE_SMT__ */
+ /* For now, we'll go up to the max num_cpus (at runtime). In the future,
+ * there may be cases where we can have more vcores than num_cpus, but for
+ * now we'll leave it like this. */
+ for (int i = 0; i < num_cpus; i++) {
+ TAILQ_INSERT_TAIL(&p->inactive_vcs, &p->procinfo->vcoremap[i], list);
+ }
+}
+
+static void proc_init_procdata(struct proc *p)
+{
+ memset(p->procdata, 0, sizeof(struct procdata));
+ /* processes can't go into vc context on vc 0 til they unset this. This is
+ * for processes that block before initing uthread code (like rtld). */
+ atomic_set(&p->procdata->vcore_preempt_data[0].flags, VC_SCP_NOVCCTX);
}
/* Allocates and initializes a process, with the given parent. Currently
{ INITSTRUCT(*p)
- /* one reference for the proc existing, and one for the ref we pass back. */
- kref_init(&p->kref, __proc_free, 2);
+ /* only one ref, which we pass back. the old 'existence' ref is managed by
+ * the ksched */
+ kref_init(&p->p_kref, __proc_free, 1);
// Setup the default map of where to get cache colors from
p->cache_colors_map = global_cache_colors_map;
p->next_cache_color = 0;
}
/* Set the basic status variables. */
spinlock_init(&p->proc_lock);
- p->exitcode = 0;
+ p->exitcode = 1337; /* so we can see processes killed by the kernel */
+ init_sem(&p->state_change, 0);
p->ppid = parent ? parent->pid : 0;
p->state = PROC_CREATED; /* shouldn't go through state machine for init */
p->env_flags = 0;
p->env_entry = 0; // cheating. this really gets set later
- p->procinfo->heap_bottom = (void*)UTEXT;
- p->heap_top = (void*)UTEXT;
- memset(&p->resources, 0, sizeof(p->resources));
+ p->heap_top = (void*)UTEXT; /* heap_bottom set in proc_init_procinfo */
memset(&p->env_ancillary_state, 0, sizeof(p->env_ancillary_state));
memset(&p->env_tf, 0, sizeof(p->env_tf));
+ spinlock_init(&p->mm_lock);
TAILQ_INIT(&p->vm_regions); /* could init this in the slab */
-
- /* Initialize the contents of the e->procinfo structure */
+ /* Initialize the vcore lists, we'll build the inactive list so that it includes
+ * all vcores when we initialize procinfo. Do this before initing procinfo. */
+ TAILQ_INIT(&p->online_vcs);
+ TAILQ_INIT(&p->bulk_preempted_vcs);
+ TAILQ_INIT(&p->inactive_vcs);
+ /* Init procinfo/procdata. Procinfo's argp/argb are 0'd */
proc_init_procinfo(p);
- /* Initialize the contents of the e->procdata structure */
-
- /* Initialize the generic syscall ring buffer */
- SHARED_RING_INIT(&p->procdata->syscallring);
- /* Initialize the backend of the syscall ring buffer */
- BACK_RING_INIT(&p->syscallbackring,
- &p->procdata->syscallring,
- SYSCALLRINGSIZE);
+ proc_init_procdata(p);
/* Initialize the generic sysevent ring buffer */
SHARED_RING_INIT(&p->procdata->syseventring);
p->open_files.max_fdset = NR_FILE_DESC_DEFAULT;
p->open_files.fd = p->open_files.fd_array;
p->open_files.open_fds = (struct fd_set*)&p->open_files.open_fds_init;
+ /* Init the ucq hash lock */
+ p->ucq_hashlock = (struct hashlock*)&p->ucq_hl_noref;
+ hashlock_init(p->ucq_hashlock, HASHLOCK_DEFAULT_SZ);
atomic_inc(&num_envs);
frontend_proc_init(p);
* push setting the state to CREATED into here. */
void __proc_ready(struct proc *p)
{
+ /* Tell the ksched about us */
+ register_proc(p);
spin_lock(&pid_hash_lock);
- hashtable_insert(pid_hash, (void*)p->pid, p);
+ hashtable_insert(pid_hash, (void*)(long)p->pid, p);
spin_unlock(&pid_hash_lock);
}
* address space and deallocate any other used memory. */
static void __proc_free(struct kref *kref)
{
- struct proc *p = container_of(kref, struct proc, kref);
+ struct proc *p = container_of(kref, struct proc, p_kref);
physaddr_t pa;
printd("[PID %d] freeing proc: %d\n", current ? current->pid : 0, p->pid);
// All parts of the kernel should have decref'd before __proc_free is called
- assert(kref_refcnt(&p->kref) == 0);
+ assert(kref_refcnt(&p->p_kref) == 0);
- close_all_files(&p->open_files, FALSE);
kref_put(&p->fs_env.root->d_kref);
kref_put(&p->fs_env.pwd->d_kref);
destroy_vmrs(p);
}
/* Remove us from the pid_hash and give our PID back (in that order). */
spin_lock(&pid_hash_lock);
- if (!hashtable_remove(pid_hash, (void*)p->pid))
+ if (!hashtable_remove(pid_hash, (void*)(long)p->pid))
panic("Proc not in the pid table in %s", __FUNCTION__);
spin_unlock(&pid_hash_lock);
put_free_pid(p->pid);
return ((actor == target) || (target->ppid == actor->pid));
}
-/* Dispatches a process to run, either on the current core in the case of a
- * RUNNABLE_S, or on its partition in the case of a RUNNABLE_M. This should
- * never be called to "restart" a core. This expects that the "instructions"
- * for which core(s) to run this on will be in the vcoremap, which needs to be
- * set externally.
+/* Helper to incref by val. Using the helper to help debug/interpose on proc
+ * ref counting. Note that pid2proc doesn't use this interface. */
+void proc_incref(struct proc *p, unsigned int val)
+{
+ kref_get(&p->p_kref, val);
+}
+
+/* Helper to decref for debugging. Don't directly kref_put() for now. */
+void proc_decref(struct proc *p)
+{
+ kref_put(&p->p_kref);
+}
+
+/* Helper, makes p the 'current' process, dropping the old current/cr3. This no
+ * longer assumes the passed in reference already counted 'current'. It will
+ * incref internally when needed. */
+static void __set_proc_current(struct proc *p)
+{
+ /* We use the pcpui to access 'current' to cut down on the core_id() calls,
+ * though who know how expensive/painful they are. */
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ /* If the process wasn't here, then we need to load its address space. */
+ if (p != pcpui->cur_proc) {
+ proc_incref(p, 1);
+ lcr3(p->env_cr3);
+ /* This is "leaving the process context" of the previous proc. The
+ * previous lcr3 unloaded the previous proc's context. This should
+ * rarely happen, since we usually proactively leave process context,
+ * but this is the fallback. */
+ if (pcpui->cur_proc)
+ proc_decref(pcpui->cur_proc);
+ pcpui->cur_proc = p;
+ }
+}
+
+/* Flag says if vcore context is not ready, which is set in init_procdata. The
+ * process must turn off this flag on vcore0 at some point. It's off by default
+ * on all other vcores. */
+static bool scp_is_vcctx_ready(struct preempt_data *vcpd)
+{
+ return !(atomic_read(&vcpd->flags) & VC_SCP_NOVCCTX);
+}
+
+/* Dispatches a _S process to run on the current core. This should never be
+ * called to "restart" a core.
*
- * When a process goes from RUNNABLE_M to RUNNING_M, its vcoremap will be
- * "packed" (no holes in the vcore->pcore mapping), vcore0 will continue to run
- * it's old core0 context, and the other cores will come in at the entry point.
- * Including in the case of preemption.
+ * This will always return, regardless of whether or not the calling core is
+ * being given to a process. (it used to pop the tf directly, before we had
+ * cur_tf).
*
- * This won't return if the current core is going to be one of the processes
- * cores (either for _S mode or for _M if it's in the vcoremap). proc_run will
- * eat your reference if it does not return. */
-void proc_run(struct proc *p)
+ * Since it always returns, it will never "eat" your reference (old
+ * documentation talks about this a bit). */
+void proc_run_s(struct proc *p)
{
- bool self_ipi_pending = FALSE;
+ int8_t state = 0;
+ uint32_t coreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
+ struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[0];
spin_lock(&p->proc_lock);
-
switch (p->state) {
case (PROC_DYING):
spin_unlock(&p->proc_lock);
- printk("Process %d not starting due to async death\n", p->pid);
- // if we're a worker core, smp_idle, o/w return
- if (!management_core())
- smp_idle(); // this never returns
+ printk("[kernel] _S %d not starting due to async death\n", p->pid);
return;
case (PROC_RUNNABLE_S):
- assert(current != p);
__proc_set_state(p, PROC_RUNNING_S);
/* We will want to know where this process is running, even if it is
* only in RUNNING_S. can use the vcoremap, which makes death easy.
* Also, this is the signal used in trap.c to know to save the tf in
* env_tf. */
__seq_start_write(&p->procinfo->coremap_seqctr);
- p->procinfo->num_vcores = 0;
- __map_vcore(p, 0, core_id()); // sort of. this needs work.
+ p->procinfo->num_vcores = 0; /* TODO (VC#) */
+ /* TODO: For now, we won't count this as an active vcore (on the
+ * lists). This gets unmapped in resource.c and yield_s, and needs
+ * work. */
+ __map_vcore(p, 0, coreid); /* not treated like a true vcore */
__seq_end_write(&p->procinfo->coremap_seqctr);
- /* __proc_startcore assumes the reference we give it is for current.
- * Decref if current is already properly set. */
- if (p == current)
- kref_put(&p->kref);
- /* We don't want to process routine messages here, since it's a bit
- * different than when we perform a syscall in this process's
- * context. We want interrupts disabled so that if there was a
- * routine message on the way, we'll get the interrupt once we pop
- * back to userspace. */
+ /* incref, since we're saving a reference in owning proc later */
+ proc_incref(p, 1);
+ /* disable interrupts to protect cur_tf, owning_proc, and current */
+ disable_irqsave(&state);
+ /* wait til ints are disabled before unlocking, in case someone else
+ * grabs the lock and IPIs us before we get set up in cur_tf */
spin_unlock(&p->proc_lock);
- disable_irq();
+ /* redundant with proc_startcore, might be able to remove that one*/
+ __set_proc_current(p);
+ /* set us up as owning_proc. ksched bug if there is already one,
+ * for now. can simply clear_owning if we want to. */
+ assert(!pcpui->owning_proc);
+ pcpui->owning_proc = p;
+ pcpui->owning_vcoreid = 0; /* TODO (VC#) */
+ /* TODO: (HSS) set silly state here (__startcore does it instantly) */
+ /* similar to the old __startcore, start them in vcore context if
+ * they have notifs and aren't already in vcore context. o/w, start
+ * them wherever they were before (could be either vc ctx or not) */
+ if (!vcpd->notif_disabled && vcpd->notif_pending
+ && scp_is_vcctx_ready(vcpd)) {
+ vcpd->notif_disabled = TRUE;
+ /* save the _S's tf in the notify slot, build and pop a new one
+ * in actual/cur_tf. */
+ vcpd->notif_tf = p->env_tf;
+ pcpui->cur_tf = &pcpui->actual_tf;
+ memset(pcpui->cur_tf, 0, sizeof(struct trapframe));
+ proc_init_trapframe(pcpui->cur_tf, 0, p->env_entry,
+ vcpd->transition_stack);
+ } else {
+ /* If they have no transition stack, then they can't receive
+ * events. The most they are getting is a wakeup from the
+ * kernel. They won't even turn off notif_pending, so we'll do
+ * that for them. */
+ if (!scp_is_vcctx_ready(vcpd))
+ vcpd->notif_pending = FALSE;
+ /* this is one of the few times cur_tf != &actual_tf */
+ pcpui->cur_tf = &p->env_tf;
+ }
+ enable_irqsave(&state);
+ /* When the calling core idles, it'll call restartcore and run the
+ * _S process's context. */
+ return;
+ default:
+ spin_unlock(&p->proc_lock);
+ panic("Invalid process state %p in %s()!!", p->state, __FUNCTION__);
+ }
+}
- __proc_startcore(p, &p->env_tf);
- break;
+/* Helper: sends preempt messages to all vcores on the bulk preempt list, and
+ * moves them to the inactive list. */
+static void __send_bulkp_events(struct proc *p)
+{
+ struct vcore *vc_i, *vc_temp;
+ struct event_msg preempt_msg = {0};
+ /* Send preempt messages for any left on the BP list. No need to set any
+ * flags, it all was done on the real preempt. Now we're just telling the
+ * process about any that didn't get restarted and are still preempted. */
+ TAILQ_FOREACH_SAFE(vc_i, &p->bulk_preempted_vcs, list, vc_temp) {
+ /* Note that if there are no active vcores, send_k_e will post to our
+ * own vcore, the last of which will be put on the inactive list and be
+ * the first to be started. We could have issues with deadlocking,
+ * since send_k_e() could grab the proclock (if there are no active
+ * vcores) */
+ preempt_msg.ev_type = EV_VCORE_PREEMPT;
+ preempt_msg.ev_arg2 = vcore2vcoreid(p, vc_i); /* arg2 is 32 bits */
+ send_kernel_event(p, &preempt_msg, 0);
+ /* TODO: we may want a TAILQ_CONCAT_HEAD, or something that does that.
+ * We need a loop for the messages, but not necessarily for the list
+ * changes. */
+ TAILQ_REMOVE(&p->bulk_preempted_vcs, vc_i, list);
+ TAILQ_INSERT_HEAD(&p->inactive_vcs, vc_i, list);
+ }
+}
+
+/* Run an _M. Can be called safely on one that is already running. Hold the
+ * lock before calling. Other than state checks, this just starts up the _M's
+ * vcores, much like the second part of give_cores_running. More specifically,
+ * give_cores_runnable puts cores on the online list, which this then sends
+ * messages to. give_cores_running immediately puts them on the list and sends
+ * the message. the two-step style may go out of fashion soon.
+ *
+ * This expects that the "instructions" for which core(s) to run this on will be
+ * in the vcoremap, which needs to be set externally (give_cores()). */
+void __proc_run_m(struct proc *p)
+{
+ struct vcore *vc_i;
+ switch (p->state) {
+ case (PROC_WAITING):
+ case (PROC_DYING):
+ warn("ksched tried to run proc %d in state %s\n", p->pid,
+ procstate2str(p->state));
+ return;
case (PROC_RUNNABLE_M):
/* vcoremap[i] holds the coreid of the physical core allocated to
* this process. It is set outside proc_run. For the kernel
* message, a0 = struct proc*, a1 = struct trapframe*. */
if (p->procinfo->num_vcores) {
+ __send_bulkp_events(p);
__proc_set_state(p, PROC_RUNNING_M);
- /* Up the refcnt, since num_vcores are going to start using this
- * process and have it loaded in their 'current'. */
- kref_get(&p->kref, p->procinfo->num_vcores);
- /* If the core we are running on is in the vcoremap, we will get
- * an IPI (once we reenable interrupts) and never return. */
- if (is_mapped_vcore(p, core_id()))
- self_ipi_pending = TRUE;
- for (int i = 0; i < p->procinfo->num_vcores; i++)
- send_kernel_message(get_pcoreid(p, i), __startcore, p, 0,
- 0, KMSG_ROUTINE);
+ /* Up the refcnt, to avoid the n refcnt upping on the
+ * destination cores. Keep in sync with __startcore */
+ proc_incref(p, p->procinfo->num_vcores * 2);
+ /* Send kernel messages to all online vcores (which were added
+ * to the list and mapped in __proc_give_cores()), making them
+ * turn online */
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list) {
+ send_kernel_message(vc_i->pcoreid, __startcore, (long)p,
+ (long)vcore2vcoreid(p, vc_i), 0,
+ KMSG_IMMEDIATE);
+ }
} else {
warn("Tried to proc_run() an _M with no vcores!");
}
- /* Unlock and decref/wait for the IPI if one is pending. This will
- * eat the reference if we aren't returning.
- *
- * There a subtle race avoidance here. __proc_startcore can handle
- * a death message, but we can't have the startcore come after the
- * death message. Otherwise, it would look like a new process. So
- * we hold the lock til after we send our message, which prevents a
- * possible death message.
+ /* There a subtle race avoidance here (when we unlock after sending
+ * the message). __proc_startcore can handle a death message, but
+ * we can't have the startcore come after the death message.
+ * Otherwise, it would look like a new process. So we hold the lock
+ * til after we send our message, which prevents a possible death
+ * message.
* - Note there is no guarantee this core's interrupts were on, so
* it may not get the message for a while... */
- spin_unlock(&p->proc_lock);
- __proc_kmsg_pending(p, self_ipi_pending);
- break;
+ return;
+ case (PROC_RUNNING_M):
+ return;
default:
+ /* unlock just so the monitor can call something that might lock*/
spin_unlock(&p->proc_lock);
- panic("Invalid process state %p in proc_run()!!", p->state);
+ panic("Invalid process state %p in %s()!!", p->state, __FUNCTION__);
}
}
static void __proc_startcore(struct proc *p, trapframe_t *tf)
{
assert(!irq_is_enabled());
- /* If the process wasn't here, then we need to load its address space. */
- if (p != current) {
- /* Do not incref here. We were given the reference to current,
- * pre-upped. */
- lcr3(p->env_cr3);
- /* This is "leaving the process context" of the previous proc. The
- * previous lcr3 unloaded the previous proc's context. This should
- * rarely happen, since we usually proactively leave process context,
- * but is the fallback. */
- if (current)
- kref_put(¤t->kref);
- set_current_proc(p);
- }
+ __set_proc_current(p);
/* need to load our silly state, preferably somewhere other than here so we
* can avoid the case where the context was just running here. it's not
* sufficient to do it in the "new process" if-block above (could be things
* __startcore. */
if (p->state == PROC_RUNNING_S)
env_pop_ancillary_state(p);
-
+ /* Clear the current_tf, since it is no longer used */
+ current_tf = 0; /* TODO: might not need this... */
env_pop_tf(tf);
}
-/* Restarts the given context (trapframe) of process p on the core this code
- * executes on. Calls an internal function to do the work.
+/* Restarts/runs the current_tf, which must be for the current process, on the
+ * core this code executes on. Calls an internal function to do the work.
*
* In case there are pending routine messages, like __death, __preempt, or
* __notify, we need to run them. Alternatively, if there are any, we could
* Refcnting: this will not return, and it assumes that you've accounted for
* your reference as if it was the ref for "current" (which is what happens when
* returning from local traps and such. */
-void proc_restartcore(struct proc *p, trapframe_t *tf)
+void proc_restartcore(void)
{
- if (current_tf != tf) {
- printk("Current_tf: %08p, tf: %08p\n", current_tf, tf);
- panic("Current TF is jacked...");
- }
-
- /* Need ints disabled when we return from processing (race) */
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ assert(!pcpui->cur_sysc);
+ /* TODO: can probably remove this enable_irq. it was an optimization for
+ * RKMs */
+ /* Try and get any interrupts before we pop back to userspace. If we didn't
+ * do this, we'd just get them in userspace, but this might save us some
+ * effort/overhead. */
+ enable_irq();
+ /* Need ints disabled when we return from processing (race on missing
+ * messages/IPIs) */
disable_irq();
- process_routine_kmsg();
- __proc_startcore(p, tf);
+ process_routine_kmsg(pcpui->cur_tf);
+ /* If there is no owning process, just idle, since we don't know what to do.
+ * This could be because the process had been restarted a long time ago and
+ * has since left the core, or due to a KMSG like __preempt or __death. */
+ if (!pcpui->owning_proc) {
+ abandon_core();
+ smp_idle();
+ }
+ assert(pcpui->cur_tf);
+ __proc_startcore(pcpui->owning_proc, pcpui->cur_tf);
}
-/*
- * Destroys the given process. This may be called from another process, a light
- * kernel thread (no real process context), asynchronously/cross-core, or from
- * the process on its own core.
+/* Destroys the process. This should be called by the ksched, which needs to
+ * hold the lock. It will destroy the process and return any cores allocated to
+ * the proc via pc_arr and nr_revoked. It's up to the caller to have enough
+ * space for pc_arr. This will return TRUE if we successfully killed it, FALSE
+ * otherwise. Failure isn't a big deal either - it can happen due to concurrent
+ * calls to proc_destroy.
*
* Here's the way process death works:
* 0. grab the lock (protects state transition and core map)
* Note that some cores can be processing async calls, but will eventually
* decref. Should think about this more, like some sort of callback/revocation.
*
- * This will eat your reference if it won't return. Note that this function
- * needs to change anyways when we make __death more like __preempt. (TODO) */
-void proc_destroy(struct proc *p)
+ * This function will now always return (it used to not return if the calling
+ * core was dying). However, when it returns, a kernel message will eventually
+ * come in, making you abandon_core, as if you weren't running. It may be that
+ * the only reference to p is the one you passed in, and when you decref, it'll
+ * get __proc_free()d. */
+bool __proc_destroy(struct proc *p, uint32_t *pc_arr, uint32_t *nr_revoked)
{
- bool self_ipi_pending = FALSE;
-
- spin_lock(&p->proc_lock);
- /* TODO: (DEATH) look at this again when we sort the __death IPI */
- if (current == p)
- self_ipi_pending = TRUE;
-
+ struct kthread *sleeper;
switch (p->state) {
case PROC_DYING: // someone else killed this already.
- spin_unlock(&p->proc_lock);
- __proc_kmsg_pending(p, self_ipi_pending);
- return;
+ return FALSE;
case PROC_RUNNABLE_M:
/* Need to reclaim any cores this proc might have, even though it's
* not running yet. */
- __proc_take_allcores(p, NULL, NULL, NULL, NULL);
+ *nr_revoked = __proc_take_allcores(p, pc_arr, FALSE);
// fallthrough
case PROC_RUNNABLE_S:
- // Think about other lists, like WAITING, or better ways to do this
- deschedule_proc(p);
+ /* might need to pull from lists, though i'm currently a fan of the
+ * model where external refs notice DYING (if it matters to them)
+ * and decref when they are done. the ksched will notice the proc
+ * is dying and handle it accordingly (which delay the reaping til
+ * the next call to schedule()) */
break;
case PROC_RUNNING_S:
#if 0
// here's how to do it manually
if (current == p) {
lcr3(boot_cr3);
- kref_put(&p->kref); /* this decref is for the cr3 */
+ proc_decref(p); /* this decref is for the cr3 */
current = NULL;
}
#endif
send_kernel_message(get_pcoreid(p, 0), __death, 0, 0, 0,
- KMSG_ROUTINE);
+ KMSG_IMMEDIATE);
__seq_start_write(&p->procinfo->coremap_seqctr);
// TODO: might need to sort num_vcores too later (VC#)
/* vcore is unmapped on the receive side */
__seq_end_write(&p->procinfo->coremap_seqctr);
- #if 0
- /* right now, RUNNING_S only runs on a mgmt core (0), not cores
- * managed by the idlecoremap. so don't do this yet. */
- put_idle_core(get_pcoreid(p, 0));
- #endif
+ /* If we ever have RUNNING_S run on non-mgmt cores, we'll need to
+ * tell the ksched about this now-idle core (after unlocking) */
break;
case PROC_RUNNING_M:
/* Send the DEATH message to every core running this process, and
* deallocate the cores.
* The rule is that the vcoremap is set before proc_run, and reset
* within proc_destroy */
- __proc_take_allcores(p, __death, (void *SNT)0, (void *SNT)0,
- (void *SNT)0);
+ *nr_revoked = __proc_take_allcores(p, pc_arr, FALSE);
break;
case PROC_CREATED:
break;
default:
- panic("Weird state(%s) in %s()", procstate2str(p->state),
- __FUNCTION__);
+ warn("Weird state(%s) in %s()", procstate2str(p->state),
+ __FUNCTION__);
+ return FALSE;
}
+ /* At this point, a death IPI should be on its way, either from the
+ * RUNNING_S one, or from proc_take_cores with a __death. in general,
+ * interrupts should be on when you call proc_destroy locally, but currently
+ * aren't for all things (like traphandlers). */
__proc_set_state(p, PROC_DYING);
- /* This kref_put() is for the process's existence. */
- kref_put(&p->kref);
- /* Unlock and possible decref and wait. A death IPI should be on its way,
- * either from the RUNNING_S one, or from proc_take_cores with a __death.
- * in general, interrupts should be on when you call proc_destroy locally,
- * but currently aren't for all things (like traphandlers). */
- spin_unlock(&p->proc_lock);
- /* at this point, we normally have one ref to be eaten in kmsg_pending and
- * one for every 'current'. and maybe one for a parent */
- __proc_kmsg_pending(p, self_ipi_pending);
- return;
+ /* This prevents processes from accessing their old files while dying, and
+ * will help if these files (or similar objects in the future) hold
+ * references to p (preventing a __proc_free()). */
+ close_all_files(&p->open_files, FALSE);
+ /* Signal our state change. Assuming we only have one waiter right now. */
+ sleeper = __up_sem(&p->state_change, TRUE);
+ if (sleeper)
+ kthread_runnable(sleeper);
+ return TRUE;
}
-/* Helper function. Starting from prev, it will find the next free vcoreid,
- * which is the next vcore that is not valid.
- * You better hold the lock before calling this. */
-static uint32_t get_free_vcoreid(struct proc *SAFE p, uint32_t prev)
+/* Turns *p into an MCP. Needs to be called from a local syscall of a RUNNING_S
+ * process. Returns 0 if it succeeded, an error code otherwise. You should
+ * hold the lock before calling. */
+int __proc_change_to_m(struct proc *p)
{
- uint32_t i;
- for (i = prev; i < MAX_NUM_CPUS; i++)
- if (!p->procinfo->vcoremap[i].valid)
+ int8_t state = 0;
+ /* in case userspace erroneously tries to change more than once */
+ if (__proc_is_mcp(p))
+ return -EINVAL;
+ switch (p->state) {
+ case (PROC_RUNNING_S):
+ /* issue with if we're async or not (need to preempt it)
+ * either of these should trip it. TODO: (ACR) async core req
+ * TODO: relies on vcore0 being the caller (VC#) */
+ if ((current != p) || (get_pcoreid(p, 0) != core_id()))
+ panic("We don't handle async RUNNING_S core requests yet.");
+ /* save the tf so userspace can restart it. Like in __notify,
+ * this assumes a user tf is the same as a kernel tf. We save
+ * it in the preempt slot so that we can also save the silly
+ * state. */
+ struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[0];
+ disable_irqsave(&state); /* protect cur_tf */
+ /* Note this won't play well with concurrent proc kmsgs, but
+ * since we're _S and locked, we shouldn't have any. */
+ assert(current_tf);
+ /* Copy uthread0's context to the notif slot */
+ vcpd->notif_tf = *current_tf;
+ clear_owning_proc(core_id()); /* so we don't restart */
+ save_fp_state(&vcpd->preempt_anc);
+ enable_irqsave(&state);
+ /* Userspace needs to not fuck with notif_disabled before
+ * transitioning to _M. */
+ if (vcpd->notif_disabled) {
+ printk("[kernel] user bug: notifs disabled for vcore 0\n");
+ vcpd->notif_disabled = FALSE;
+ }
+ /* in the async case, we'll need to remotely stop and bundle
+ * vcore0's TF. this is already done for the sync case (local
+ * syscall). */
+ /* this process no longer runs on its old location (which is
+ * this core, for now, since we don't handle async calls) */
+ __seq_start_write(&p->procinfo->coremap_seqctr);
+ // TODO: (VC#) might need to adjust num_vcores
+ // TODO: (ACR) will need to unmap remotely (receive-side)
+ __unmap_vcore(p, 0); /* VC# keep in sync with proc_run_s */
+ __seq_end_write(&p->procinfo->coremap_seqctr);
+ /* change to runnable_m (it's TF is already saved) */
+ __proc_set_state(p, PROC_RUNNABLE_M);
+ p->procinfo->is_mcp = TRUE;
break;
- if (i + 1 >= MAX_NUM_CPUS)
- warn("At the end of the vcorelist. Might want to check that out.");
- return i;
+ case (PROC_RUNNABLE_S):
+ /* Issues: being on the runnable_list, proc_set_state not liking
+ * it, and not clearly thinking through how this would happen.
+ * Perhaps an async call that gets serviced after you're
+ * descheduled? */
+ warn("Not supporting RUNNABLE_S -> RUNNABLE_M yet.\n");
+ return -EINVAL;
+ case (PROC_DYING):
+ warn("Dying, core request coming from %d\n", core_id());
+ return -EINVAL;
+ default:
+ return -EINVAL;
+ }
+ return 0;
}
-/* Helper function. Starting from prev, it will find the next busy vcoreid,
- * which is the next vcore that is valid.
- * You better hold the lock before calling this. */
-static uint32_t get_busy_vcoreid(struct proc *SAFE p, uint32_t prev)
+/* Old code to turn a RUNNING_M to a RUNNING_S, with the calling context
+ * becoming the new 'thread0'. Don't use this. Caller needs to send in a
+ * pc_arr big enough for all vcores. Will return the number of cores given up
+ * by the proc. */
+uint32_t __proc_change_to_s(struct proc *p, uint32_t *pc_arr)
{
- uint32_t i;
- for (i = prev; i < MAX_NUM_CPUS; i++)
- if (p->procinfo->vcoremap[i].valid)
- break;
- if (i + 1 >= MAX_NUM_CPUS)
- warn("At the end of the vcorelist. Might want to check that out.");
- return i;
+ int8_t state = 0;
+ uint32_t num_revoked;
+ printk("[kernel] trying to transition _M -> _S (deprecated)!\n");
+ assert(p->state == PROC_RUNNING_M); // TODO: (ACR) async core req
+ /* save the context, to be restarted in _S mode */
+ disable_irqsave(&state); /* protect cur_tf */
+ assert(current_tf);
+ p->env_tf = *current_tf;
+ clear_owning_proc(core_id()); /* so we don't restart */
+ enable_irqsave(&state);
+ env_push_ancillary_state(p); // TODO: (HSS)
+ /* sending death, since it's not our job to save contexts or anything in
+ * this case. */
+ num_revoked = __proc_take_allcores(p, pc_arr, FALSE);
+ __proc_set_state(p, PROC_RUNNABLE_S);
+ return num_revoked;
}
/* Helper function. Is the given pcore a mapped vcore? No locking involved, be
return p->procinfo->pcoremap[pcoreid].vcoreid;
}
+/* Helper function. Try to find the pcoreid for a given virtual core id for
+ * proc p. No locking involved, be careful. Use this when you can tolerate a
+ * stale or otherwise 'wrong' answer. */
+static uint32_t try_get_pcoreid(struct proc *p, uint32_t vcoreid)
+{
+ return p->procinfo->vcoremap[vcoreid].pcoreid;
+}
+
/* Helper function. Find the pcoreid for a given virtual core id for proc p.
* No locking involved, be careful. Panics on failure. */
static uint32_t get_pcoreid(struct proc *p, uint32_t vcoreid)
{
- assert(p->procinfo->vcoremap[vcoreid].valid);
- return p->procinfo->vcoremap[vcoreid].pcoreid;
+ assert(vcore_is_mapped(p, vcoreid));
+ return try_get_pcoreid(p, vcoreid);
}
-/* Helper function: yields / wraps up current_tf and schedules the _S */
-void __proc_yield_s(struct proc *p, struct trapframe *tf)
+/* Helper: saves the SCP's tf state and unmaps vcore 0. In the future, we'll
+ * probably use vc0's space for env_tf and the silly state. */
+void __proc_save_context_s(struct proc *p, struct trapframe *tf)
{
- assert(p->state == PROC_RUNNING_S);
p->env_tf= *tf;
env_push_ancillary_state(p); /* TODO: (HSS) */
- __proc_set_state(p, PROC_RUNNABLE_S);
- schedule_proc(p);
+ __unmap_vcore(p, 0); /* VC# keep in sync with proc_run_s */
}
/* Yields the calling core. Must be called locally (not async) for now.
- * - If RUNNING_S, you just give up your time slice and will eventually return.
+ * - If RUNNING_S, you just give up your time slice and will eventually return,
+ * possibly after WAITING on an event.
* - If RUNNING_M, you give up the current vcore (which never returns), and
* adjust the amount of cores wanted/granted.
- * - If you have only one vcore, you switch to RUNNABLE_M. When you run again,
- * you'll have one guaranteed core, starting from the entry point.
+ * - If you have only one vcore, you switch to WAITING. There's no 'classic
+ * yield' for MCPs (at least not now). When you run again, you'll have one
+ * guaranteed core, starting from the entry point.
*
- * - RES_CORES amt_wanted will be the amount running after taking away the
- * yielder, unless there are none left, in which case it will be 1.
+ * If the call is being nice, it means different things for SCPs and MCPs. For
+ * MCPs, it means that it is in response to a preemption (which needs to be
+ * checked). If there is no preemption pending, just return. For SCPs, it
+ * means the proc wants to give up the core, but still has work to do. If not,
+ * the proc is trying to wait on an event. It's not being nice to others, it
+ * just has no work to do.
*
- * If the call is being nice, it means that it is in response to a preemption
- * (which needs to be checked). If there is no preemption pending, just return.
- * No matter what, don't adjust the number of cores wanted.
+ * This usually does not return (smp_idle()), so it will eat your reference.
+ * Also note that it needs a non-current/edible reference, since it will abandon
+ * and continue to use the *p (current == 0, no cr3, etc).
*
- * This usually does not return (abandon_core()), so it will eat your reference.
- * */
+ * We disable interrupts for most of it too, since we need to protect current_tf
+ * and not race with __notify (which doesn't play well with concurrent
+ * yielders). */
void proc_yield(struct proc *SAFE p, bool being_nice)
{
- uint32_t vcoreid = get_vcoreid(p, core_id());
- struct vcore *vc = &p->procinfo->vcoremap[vcoreid];
-
- /* no reason to be nice, return */
- if (being_nice && !vc->preempt_pending)
- return;
-
+ uint32_t vcoreid, pcoreid = core_id();
+ struct vcore *vc;
+ struct preempt_data *vcpd;
+ int8_t state = 0;
+ /* Need to disable before even reading vcoreid, since we could be unmapped
+ * by a __preempt or __death. _S also needs ints disabled, so we'll just do
+ * it immediately. */
+ disable_irqsave(&state);
+ /* Need to lock before checking the vcoremap to find out who we are, in case
+ * we're getting __preempted and __startcored, from a remote core (in which
+ * case we might have come in thinking we were vcore X, but had X preempted
+ * and Y restarted on this pcore, and we suddenly are the wrong vcore
+ * yielding). Arguably, this is incredibly rare, since you'd need to
+ * preempt the core, then decide to give it back with another grant in
+ * between. */
spin_lock(&p->proc_lock); /* horrible scalability. =( */
-
- /* fate is sealed, return and take the preempt message on the way out.
- * we're making this check while holding the lock, since the preemptor
- * should hold the lock when sending messages. */
- if (vc->preempt_served) {
- spin_unlock(&p->proc_lock);
- return;
- }
- /* no need to preempt later, since we are yielding (nice or otherwise) */
- if (vc->preempt_pending)
- vc->preempt_pending = 0;
-
switch (p->state) {
case (PROC_RUNNING_S):
- __proc_yield_s(p, current_tf);
- break;
- case (PROC_RUNNING_M):
- printd("[K] Process %d (%p) is yielding on vcore %d\n", p->pid, p,
- get_vcoreid(p, core_id()));
- /* TODO: (RMS) the Scheduler cannot handle the Runnable Ms (RMS), so
- * don't yield the last vcore. */
- if (p->procinfo->num_vcores == 1) {
- spin_unlock(&p->proc_lock);
- return;
- }
- __seq_start_write(&p->procinfo->coremap_seqctr);
- // give up core
- __unmap_vcore(p, get_vcoreid(p, core_id()));
- p->resources[RES_CORES].amt_granted = --(p->procinfo->num_vcores);
- if (!being_nice)
- p->resources[RES_CORES].amt_wanted = p->procinfo->num_vcores;
- __seq_end_write(&p->procinfo->coremap_seqctr);
- // add to idle list
- put_idle_core(core_id());
- // last vcore? then we really want 1, and to yield the gang
- // TODO: (RMS) will actually do this.
- if (p->procinfo->num_vcores == 0) {
- p->resources[RES_CORES].amt_wanted = 1;
- __proc_set_state(p, PROC_RUNNABLE_M);
- schedule_proc(p);
+ if (!being_nice) {
+ /* waiting for an event to unblock us */
+ vcpd = &p->procdata->vcore_preempt_data[0];
+ /* this check is an early optimization (check, signal, check
+ * again pattern). We could also lock before spamming the
+ * vcore in event.c */
+ if (vcpd->notif_pending) {
+ /* they can't handle events, just need to prevent a yield.
+ * (note the notif_pendings are collapsed). */
+ if (!scp_is_vcctx_ready(vcpd))
+ vcpd->notif_pending = FALSE;
+ goto out_failed;
+ }
+ /* syncing with event's SCP code. we set waiting, then check
+ * pending. they set pending, then check waiting. it's not
+ * possible for us to miss the notif *and* for them to miss
+ * WAITING. one (or both) of us will see and make sure the proc
+ * wakes up. */
+ __proc_set_state(p, PROC_WAITING);
+ wrmb(); /* don't let the state write pass the notif read */
+ if (vcpd->notif_pending) {
+ __proc_set_state(p, PROC_RUNNING_S);
+ if (!scp_is_vcctx_ready(vcpd))
+ vcpd->notif_pending = FALSE;
+ goto out_failed;
+ }
+ /* if we're here, we want to sleep. a concurrent event that
+ * hasn't already written notif_pending will have seen WAITING,
+ * and will be spinning while we do this. */
+ __proc_save_context_s(p, current_tf);
+ spin_unlock(&p->proc_lock); /* note irqs are not enabled yet */
+ } else {
+ /* yielding to allow other processes to run. we're briefly
+ * WAITING, til we are woken up */
+ __proc_set_state(p, PROC_WAITING);
+ __proc_save_context_s(p, current_tf);
+ spin_unlock(&p->proc_lock); /* note irqs are not enabled yet */
+ /* immediately wake up the proc (makes it runnable) */
+ proc_wakeup(p);
}
- break;
+ goto out_yield_core;
+ case (PROC_RUNNING_M):
+ break; /* will handle this stuff below */
+ case (PROC_DYING): /* incoming __death */
+ case (PROC_RUNNABLE_M): /* incoming (bulk) preempt/myield TODO:(BULK) */
+ goto out_failed;
default:
- // there are races that can lead to this (async death, preempt, etc)
panic("Weird state(%s) in %s()", procstate2str(p->state),
__FUNCTION__);
}
+ /* If we're already unmapped (__preempt or a __death hit us), bail out.
+ * Note that if a __death hit us, we should have bailed when we saw
+ * PROC_DYING. */
+ if (!is_mapped_vcore(p, pcoreid))
+ goto out_failed;
+ vcoreid = get_vcoreid(p, pcoreid);
+ vc = vcoreid2vcore(p, vcoreid);
+ vcpd = &p->procdata->vcore_preempt_data[vcoreid];
+ /* no reason to be nice, return */
+ if (being_nice && !vc->preempt_pending)
+ goto out_failed;
+ /* Fate is sealed, return and take the preempt message when we enable_irqs.
+ * Note this keeps us from mucking with our lists, since we were already
+ * removed from the online_list. We have a similar concern with __death,
+ * but we check for DYING to handle that. */
+ if (vc->preempt_served)
+ goto out_failed;
+ /* At this point, AFAIK there should be no preempt/death messages on the
+ * way, and we're on the online list. So we'll go ahead and do the yielding
+ * business. */
+ /* If there's a preempt pending, we don't need to preempt later since we are
+ * yielding (nice or otherwise). If not, this is just a regular yield. */
+ if (vc->preempt_pending) {
+ vc->preempt_pending = 0;
+ } else {
+ /* Optional: on a normal yield, check to see if we are putting them
+ * below amt_wanted (help with user races) and bail. */
+ if (p->procdata->res_req[RES_CORES].amt_wanted >=
+ p->procinfo->num_vcores)
+ goto out_failed;
+ }
+ /* Don't let them yield if they are missing a notification. Userspace must
+ * not leave vcore context without dealing with notif_pending. pop_ros_tf()
+ * handles leaving via uthread context. This handles leaving via a yield.
+ *
+ * This early check is an optimization. The real check is below when it
+ * works with the online_vcs list (syncing with event.c and INDIR/IPI
+ * posting). */
+ if (vcpd->notif_pending)
+ goto out_failed;
+ /* Now we'll actually try to yield */
+ printd("[K] Process %d (%p) is yielding on vcore %d\n", p->pid, p,
+ get_vcoreid(p, coreid));
+ /* Remove from the online list, add to the yielded list, and unmap
+ * the vcore, which gives up the core. */
+ TAILQ_REMOVE(&p->online_vcs, vc, list);
+ /* Now that we're off the online list, check to see if an alert made
+ * it through (event.c sets this) */
+ wrmb(); /* prev write must hit before reading notif_pending */
+ /* Note we need interrupts disabled, since a __notify can come in
+ * and set pending to FALSE */
+ if (vcpd->notif_pending) {
+ /* We lost, put it back on the list and abort the yield */
+ TAILQ_INSERT_TAIL(&p->online_vcs, vc, list); /* could go HEAD */
+ goto out_failed;
+ }
+ /* We won the race with event sending, we can safely yield */
+ TAILQ_INSERT_HEAD(&p->inactive_vcs, vc, list);
+ /* Note this protects stuff userspace should look at, which doesn't
+ * include the TAILQs. */
+ __seq_start_write(&p->procinfo->coremap_seqctr);
+ /* Next time the vcore starts, it starts fresh */
+ vcpd->notif_disabled = FALSE;
+ __unmap_vcore(p, vcoreid);
+ p->procinfo->num_vcores--;
+ p->procinfo->res_grant[RES_CORES] = p->procinfo->num_vcores;
+ __seq_end_write(&p->procinfo->coremap_seqctr);
+ /* No more vcores? Then we wait on an event */
+ if (p->procinfo->num_vcores == 0) {
+ /* consider a ksched op to tell it about us WAITING */
+ __proc_set_state(p, PROC_WAITING);
+ }
+ spin_unlock(&p->proc_lock);
+ /* Hand the now-idle core to the ksched */
+ put_idle_core(p, pcoreid);
+ goto out_yield_core;
+out_failed:
+ /* for some reason we just want to return, either to take a KMSG that cleans
+ * us up, or because we shouldn't yield (ex: notif_pending). */
spin_unlock(&p->proc_lock);
- kref_put(&p->kref); /* need to eat the ref passed in */
- /* TODO: (RMS) If there was a change to the idle cores, try and give our
- * core to someone who was preempted. */
- /* Clean up the core and idle. For mgmt cores, they will ultimately call
- * manager, which will call schedule() and will repick the yielding proc. */
+ enable_irqsave(&state);
+ return;
+out_yield_core: /* successfully yielded the core */
+ proc_decref(p); /* need to eat the ref passed in */
+ /* Clean up the core and idle. Need to do this before enabling interrupts,
+ * since once we put_idle_core() and unlock, we could get a startcore. */
+ clear_owning_proc(pcoreid); /* so we don't restart */
abandon_core();
+ smp_idle(); /* will reenable interrupts */
}
-/* If you expect to notify yourself, cleanup state and process_routine_kmsg() */
-void do_notify(struct proc *p, uint32_t vcoreid, unsigned int notif,
- struct notif_event *ne)
+/* Sends a notification (aka active notification, aka IPI) to p's vcore. We
+ * only send a notification if one they are enabled. There's a bunch of weird
+ * cases with this, and how pending / enabled are signals between the user and
+ * kernel - check the documentation. Note that pending is more about messages.
+ * The process needs to be in vcore_context, and the reason is usually a
+ * message. We set pending here in case we were called to prod them into vcore
+ * context (like via a sys_self_notify). Also note that this works for _S
+ * procs, if you send to vcore 0 (and the proc is running). */
+void proc_notify(struct proc *p, uint32_t vcoreid)
{
- printd("sending notif %d to proc %p\n", notif, p);
- assert(notif < MAX_NR_NOTIF);
- if (ne)
- assert(notif == ne->ne_type);
-
- struct notif_method *nm = &p->procdata->notif_methods[notif];
struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[vcoreid];
-
- printd("nm = %p, vcpd = %p\n", nm, vcpd);
- /* enqueue notif message or toggle bits */
- if (ne && nm->flags & NOTIF_MSG) {
- if (bcq_enqueue(&vcpd->notif_evts, ne, NR_PERCORE_EVENTS, 4)) {
- atomic_inc((atomic_t)&vcpd->event_overflows); // careful here
- SET_BITMASK_BIT_ATOMIC(vcpd->notif_bmask, notif);
+ vcpd->notif_pending = TRUE;
+ wrmb(); /* must write notif_pending before reading notif_disabled */
+ if (!vcpd->notif_disabled) {
+ /* GIANT WARNING: we aren't using the proc-lock to protect the
+ * vcoremap. We want to be able to use this from interrupt context,
+ * and don't want the proc_lock to be an irqsave. Spurious
+ * __notify() kmsgs are okay (it checks to see if the right receiver
+ * is current). */
+ if (vcore_is_mapped(p, vcoreid)) {
+ printd("[kernel] sending notif to vcore %d\n", vcoreid);
+ /* This use of try_get_pcoreid is racy, might be unmapped */
+ send_kernel_message(try_get_pcoreid(p, vcoreid), __notify, (long)p,
+ 0, 0, KMSG_IMMEDIATE);
}
- } else {
- SET_BITMASK_BIT_ATOMIC(vcpd->notif_bmask, notif);
}
+}
- /* Active notification */
- /* TODO: Currently, there is a race for notif_pending, and multiple senders
- * can send an IPI. Worst thing is that the process gets interrupted
- * briefly and the kernel immediately returns back once it realizes notifs
- * are masked. To fix it, we'll need atomic_swapb() (right answer), or not
- * use a bool. (wrong answer). */
- if (nm->flags & NOTIF_IPI && !vcpd->notif_pending) {
- vcpd->notif_pending = TRUE;
- if (vcpd->notif_enabled) {
- /* GIANT WARNING: we aren't using the proc-lock to protect the
- * vcoremap. We want to be able to use this from interrupt context,
- * and don't want the proc_lock to be an irqsave.
- */
- if ((p->state & PROC_RUNNING_M) && // TODO: (VC#) (_S state)
- (p->procinfo->vcoremap[vcoreid].valid)) {
- printd("[kernel] sending notif to vcore %d\n", vcoreid);
- send_kernel_message(get_pcoreid(p, vcoreid), __notify, p, 0, 0,
- KMSG_ROUTINE);
- } else { // TODO: think about this, fallback, etc
- warn("Vcore unmapped, not receiving an active notif");
- }
+/* Makes sure p is runnable. May be spammed, via the ksched. Called only by
+ * the ksched when it holds the ksched lock (or whatever). We need to lock both
+ * the ksched and the proc at some point, so we need to start this call in the
+ * ksched (lock ordering).
+ *
+ * Will call back to the ksched via one of the __sched_.cp_wakeup() calls. */
+void __proc_wakeup(struct proc *p)
+{
+ spin_lock(&p->proc_lock);
+ if (__proc_is_mcp(p)) {
+ /* we only wake up WAITING mcps */
+ if (p->state != PROC_WAITING)
+ goto out_unlock;
+ if (!p->procdata->res_req[RES_CORES].amt_wanted)
+ p->procdata->res_req[RES_CORES].amt_wanted = 1;
+ __proc_set_state(p, PROC_RUNNABLE_M);
+ spin_unlock(&p->proc_lock);
+ __sched_mcp_wakeup(p);
+ goto out;
+ } else {
+ /* SCPs can wake up for a variety of reasons. the only times we need
+ * to do something is if it was waiting or just created. other cases
+ * are either benign (just go out), or potential bugs (_Ms) */
+ switch (p->state) {
+ case (PROC_CREATED):
+ case (PROC_WAITING):
+ __proc_set_state(p, PROC_RUNNABLE_S);
+ break;
+ case (PROC_RUNNABLE_S):
+ case (PROC_RUNNING_S):
+ case (PROC_DYING):
+ goto out_unlock;
+ case (PROC_RUNNABLE_M):
+ case (PROC_RUNNING_M):
+ warn("Weird state(%s) in %s()", procstate2str(p->state),
+ __FUNCTION__);
+ goto out_unlock;
}
+ printd("[kernel] FYI, waking up an _S proc\n"); /* thanks, past brho! */
+ spin_unlock(&p->proc_lock);
+ __sched_scp_wakeup(p);
+ goto out;
}
+out_unlock:
+ spin_unlock(&p->proc_lock);
+out:
+ return;
}
-/* Sends notification number notif to proc p. Meant for generic notifications /
- * reference implementation. do_notify does the real work. This one mostly
- * just determines where the notif should be sent, other checks, etc.
- * Specifically, it handles the parameters of notif_methods. If you happen to
- * notify yourself, make sure you process routine kmsgs. */
-void proc_notify(struct proc *p, unsigned int notif, struct notif_event *ne)
+/* Is the process in multi_mode / is an MCP or not? */
+bool __proc_is_mcp(struct proc *p)
{
- assert(notif < MAX_NR_NOTIF); // notifs start at 0
- struct notif_method *nm = &p->procdata->notif_methods[notif];
- struct notif_event local_ne;
-
- /* Caller can opt to not send an NE, in which case we use the notif */
- if (!ne) {
- ne = &local_ne;
- ne->ne_type = notif;
- }
-
- if (!(nm->flags & NOTIF_WANTED))
- return;
- do_notify(p, nm->vcoreid, ne->ne_type, ne);
+ /* in lieu of using the amount of cores requested, or having a bunch of
+ * states (like PROC_WAITING_M and _S), I'll just track it with a bool. */
+ return p->procinfo->is_mcp;
}
/************************ Preemption Functions ******************************
* (or local traps) may not yet be ready to handle seeing their future state.
* But they should be, so fix those when they pop up.
*
- * TODO: (RMS) we need to actually make the scheduler handle RUNNABLE_Ms and
- * then schedule these, or change proc_destroy to not assume they need to be
- * descheduled.
- *
* Another thing to do would be to make the _core functions take a pcorelist,
* and not just one pcoreid. */
* about locking, do it before calling. Takes a vcoreid! */
void __proc_preempt_warn(struct proc *p, uint32_t vcoreid, uint64_t when)
{
+ struct event_msg local_msg = {0};
/* danger with doing this unlocked: preempt_pending is set, but never 0'd,
* since it is unmapped and not dealt with (TODO)*/
p->procinfo->vcoremap[vcoreid].preempt_pending = when;
- /* notify, if they want to hear about this event. regardless of how they
- * want it, we can send this as a bit. Subject to change. */
- if (p->procdata->notif_methods[NE_PREEMPT_PENDING].flags | NOTIF_WANTED)
- do_notify(p, vcoreid, NE_PREEMPT_PENDING, 0);
+
+ /* Send the event (which internally checks to see how they want it) */
+ local_msg.ev_type = EV_PREEMPT_PENDING;
+ local_msg.ev_arg1 = vcoreid;
+ send_kernel_event(p, &local_msg, vcoreid);
+
/* TODO: consider putting in some lookup place for the alarm to find it.
* til then, it'll have to scan the vcoremap (O(n) instead of O(m)) */
}
* care about the mapping (and you should). */
void __proc_preempt_warnall(struct proc *p, uint64_t when)
{
- uint32_t active_vcoreid = 0;
- for (int i = 0; i < p->procinfo->num_vcores; i++) {
- active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
- __proc_preempt_warn(p, active_vcoreid, when);
- active_vcoreid++;
- }
+ struct vcore *vc_i;
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ __proc_preempt_warn(p, vcore2vcoreid(p, vc_i), when);
/* TODO: consider putting in some lookup place for the alarm to find it.
* til then, it'll have to scan the vcoremap (O(n) instead of O(m)) */
}
// TODO: function to set an alarm, if none is outstanding
-/* Raw function to preempt a single core. Returns TRUE if the calling core will
- * get a kmsg. If you care about locking, do it before calling. */
-bool __proc_preempt_core(struct proc *p, uint32_t pcoreid)
+/* Raw function to preempt a single core. If you care about locking, do it
+ * before calling. */
+void __proc_preempt_core(struct proc *p, uint32_t pcoreid)
{
uint32_t vcoreid = get_vcoreid(p, pcoreid);
-
+ struct event_msg preempt_msg = {0};
p->procinfo->vcoremap[vcoreid].preempt_served = TRUE;
// expects a pcorelist. assumes pcore is mapped and running_m
- return __proc_take_cores(p, &pcoreid, 1, __preempt, p, 0, 0);
+ __proc_take_corelist(p, &pcoreid, 1, TRUE);
+ /* Send a message about the preemption. */
+ preempt_msg.ev_type = EV_VCORE_PREEMPT;
+ preempt_msg.ev_arg2 = vcoreid;
+ send_kernel_event(p, &preempt_msg, 0);
}
-/* Raw function to preempt every vcore. Returns TRUE if the calling core will
- * get a kmsg. If you care about locking, do it before calling. */
-bool __proc_preempt_all(struct proc *p)
+/* Raw function to preempt every vcore. If you care about locking, do it before
+ * calling. */
+uint32_t __proc_preempt_all(struct proc *p, uint32_t *pc_arr)
{
/* instead of doing this, we could just preempt_served all possible vcores,
* and not just the active ones. We would need to sort out a way to deal
* with stale preempt_serveds first. This might be just as fast anyways. */
- uint32_t active_vcoreid = 0;
- for (int i = 0; i < p->procinfo->num_vcores; i++) {
- active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
- p->procinfo->vcoremap[active_vcoreid].preempt_served = TRUE;
- active_vcoreid++;
- }
- return __proc_take_allcores(p, __preempt, p, 0, 0);
+ struct vcore *vc_i;
+ /* TODO:(BULK) PREEMPT - don't bother with this, set a proc wide flag, or
+ * just make us RUNNABLE_M. */
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ vc_i->preempt_served = TRUE;
+ return __proc_take_allcores(p, pc_arr, TRUE);
}
/* Warns and preempts a vcore from p. No delaying / alarming, or anything. The
* warning will be for u usec from now. */
void proc_preempt_core(struct proc *p, uint32_t pcoreid, uint64_t usec)
{
- bool self_ipi_pending = FALSE;
- uint64_t warn_time = read_tsc() + usec * 1000000 / system_timing.tsc_freq;
-
+ uint64_t warn_time = read_tsc() + usec2tsc(usec);
+ bool preempted = FALSE;
/* DYING could be okay */
if (p->state != PROC_RUNNING_M) {
warn("Tried to preempt from a non RUNNING_M proc!");
return;
}
spin_lock(&p->proc_lock);
+ /* TODO: this is racy, could be messages in flight that haven't unmapped
+ * yet, so we need to do something more complicated */
if (is_mapped_vcore(p, pcoreid)) {
__proc_preempt_warn(p, get_vcoreid(p, pcoreid), warn_time);
- self_ipi_pending = __proc_preempt_core(p, pcoreid);
+ __proc_preempt_core(p, pcoreid);
+ preempted = TRUE;
} else {
warn("Pcore doesn't belong to the process!!");
}
- /* TODO: (RMS) do this once a scheduler can handle RUNNABLE_M, and make sure
- * to schedule it */
- #if 0
if (!p->procinfo->num_vcores) {
__proc_set_state(p, PROC_RUNNABLE_M);
- schedule_proc(p);
}
- #endif
spin_unlock(&p->proc_lock);
- __proc_kmsg_pending(p, self_ipi_pending);
+ if (preempted)
+ put_idle_core(p, pcoreid);
}
/* Warns and preempts all from p. No delaying / alarming, or anything. The
* warning will be for u usec from now. */
void proc_preempt_all(struct proc *p, uint64_t usec)
{
- bool self_ipi_pending = FALSE;
- uint64_t warn_time = read_tsc() + usec * 1000000 / system_timing.tsc_freq;
-
+ uint64_t warn_time = read_tsc() + usec2tsc(usec);
+ uint32_t num_revoked = 0;
spin_lock(&p->proc_lock);
+ /* storage for pc_arr is alloced at decl, which is after grabbing the lock*/
+ uint32_t pc_arr[p->procinfo->num_vcores];
/* DYING could be okay */
if (p->state != PROC_RUNNING_M) {
warn("Tried to preempt from a non RUNNING_M proc!");
return;
}
__proc_preempt_warnall(p, warn_time);
- self_ipi_pending = __proc_preempt_all(p);
+ num_revoked = __proc_preempt_all(p, pc_arr);
assert(!p->procinfo->num_vcores);
- /* TODO: (RMS) do this once a scheduler can handle RUNNABLE_M, and make sure
- * to schedule it */
- #if 0
__proc_set_state(p, PROC_RUNNABLE_M);
- schedule_proc(p);
- #endif
spin_unlock(&p->proc_lock);
- __proc_kmsg_pending(p, self_ipi_pending);
+ /* Return the cores to the ksched */
+ if (num_revoked)
+ put_idle_cores(p, pc_arr, num_revoked);
}
/* Give the specific pcore to proc p. Lots of assumptions, so don't really use
* free, etc. */
void proc_give(struct proc *p, uint32_t pcoreid)
{
- bool self_ipi_pending = FALSE;
-
+ warn("Your idlecoremap is now screwed up"); /* TODO (IDLE) */
spin_lock(&p->proc_lock);
// expects a pcorelist, we give it a list of one
- self_ipi_pending = __proc_give_cores(p, &pcoreid, 1);
+ __proc_give_cores(p, &pcoreid, 1);
spin_unlock(&p->proc_lock);
- __proc_kmsg_pending(p, self_ipi_pending);
}
/* Global version of the helper, for sys_get_vcoreid (might phase that syscall
* out). */
uint32_t proc_get_vcoreid(struct proc *SAFE p, uint32_t pcoreid)
{
- uint32_t vcoreid;
- // TODO: the code currently doesn't track the vcoreid properly for _S (VC#)
- spin_lock(&p->proc_lock);
- switch (p->state) {
- case PROC_RUNNING_S:
- spin_unlock(&p->proc_lock);
- return 0; // TODO: here's the ugly part
- case PROC_RUNNING_M:
- vcoreid = get_vcoreid(p, pcoreid);
- spin_unlock(&p->proc_lock);
- return vcoreid;
- case PROC_DYING: // death message is on the way
- spin_unlock(&p->proc_lock);
- return 0;
- default:
- spin_unlock(&p->proc_lock);
- panic("Weird state(%s) in %s()", procstate2str(p->state),
- __FUNCTION__);
+ struct per_cpu_info *pcpui = &per_cpu_info[pcoreid];
+ return pcpui->owning_vcoreid;
+}
+
+/* TODO: make all of these static inlines when we gut the env crap */
+bool vcore_is_mapped(struct proc *p, uint32_t vcoreid)
+{
+ return p->procinfo->vcoremap[vcoreid].valid;
+}
+
+/* Can do this, or just create a new field and save it in the vcoremap */
+uint32_t vcore2vcoreid(struct proc *p, struct vcore *vc)
+{
+ return (vc - p->procinfo->vcoremap);
+}
+
+struct vcore *vcoreid2vcore(struct proc *p, uint32_t vcoreid)
+{
+ return &p->procinfo->vcoremap[vcoreid];
+}
+
+/********** Core granting (bulk and single) ***********/
+
+/* Helper: gives pcore to the process, mapping it to the next available vcore
+ * from list vc_list. Returns TRUE if we succeeded (non-empty). If you pass in
+ * **vc, we'll tell you which vcore it was. */
+static bool __proc_give_a_pcore(struct proc *p, uint32_t pcore,
+ struct vcore_tailq *vc_list, struct vcore **vc)
+{
+ struct vcore *new_vc;
+ new_vc = TAILQ_FIRST(vc_list);
+ if (!new_vc)
+ return FALSE;
+ printd("setting vcore %d to pcore %d\n", vcore2vcoreid(p, new_vc),
+ pcorelist[i]);
+ TAILQ_REMOVE(vc_list, new_vc, list);
+ TAILQ_INSERT_TAIL(&p->online_vcs, new_vc, list);
+ __map_vcore(p, vcore2vcoreid(p, new_vc), pcore);
+ if (vc)
+ *vc = new_vc;
+ return TRUE;
+}
+
+static void __proc_give_cores_runnable(struct proc *p, uint32_t *pc_arr,
+ uint32_t num)
+{
+ assert(p->state == PROC_RUNNABLE_M);
+ assert(num); /* catch bugs */
+ /* add new items to the vcoremap */
+ __seq_start_write(&p->procinfo->coremap_seqctr);/* unncessary if offline */
+ p->procinfo->num_vcores += num;
+ for (int i = 0; i < num; i++) {
+ /* Try from the bulk list first */
+ if (__proc_give_a_pcore(p, pc_arr[i], &p->bulk_preempted_vcs, 0))
+ continue;
+ /* o/w, try from the inactive list. at one point, i thought there might
+ * be a legit way in which the inactive list could be empty, but that i
+ * wanted to catch it via an assert. */
+ assert(__proc_give_a_pcore(p, pc_arr[i], &p->inactive_vcs, 0));
}
+ __seq_end_write(&p->procinfo->coremap_seqctr);
}
-/* Gives process p the additional num cores listed in pcorelist. You must be
- * RUNNABLE_M or RUNNING_M before calling this. If you're RUNNING_M, this will
- * startup your new cores at the entry point with their virtual IDs (or restore
- * a preemption). If you're RUNNABLE_M, you should call proc_run after this so
- * that the process can start to use its cores.
+static void __proc_give_cores_running(struct proc *p, uint32_t *pc_arr,
+ uint32_t num)
+{
+ struct vcore *vc_i;
+ /* Up the refcnt, since num cores are going to start using this
+ * process and have it loaded in their owning_proc and 'current'. */
+ proc_incref(p, num * 2); /* keep in sync with __startcore */
+ __seq_start_write(&p->procinfo->coremap_seqctr);
+ p->procinfo->num_vcores += num;
+ assert(TAILQ_EMPTY(&p->bulk_preempted_vcs));
+ for (int i = 0; i < num; i++) {
+ assert(__proc_give_a_pcore(p, pc_arr[i], &p->inactive_vcs, &vc_i));
+ send_kernel_message(pc_arr[i], __startcore, (long)p,
+ (long)vcore2vcoreid(p, vc_i), 0, KMSG_IMMEDIATE);
+ }
+ __seq_end_write(&p->procinfo->coremap_seqctr);
+}
+
+/* Gives process p the additional num cores listed in pcorelist. If the proc is
+ * not RUNNABLE_M or RUNNING_M, this will fail and allocate none of the core
+ * (and return -1). If you're RUNNING_M, this will startup your new cores at
+ * the entry point with their virtual IDs (or restore a preemption). If you're
+ * RUNNABLE_M, you should call __proc_run_m after this so that the process can
+ * start to use its cores. In either case, this returns 0.
*
* If you're *_S, make sure your core0's TF is set (which is done when coming in
* via arch/trap.c and we are RUNNING_S), change your state, then call this.
- * Then call proc_run().
+ * Then call __proc_run_m().
*
* The reason I didn't bring the _S cases from core_request over here is so we
* can keep this family of calls dealing with only *_Ms, to avoiding caring if
- * this is called from another core, and to avoid the need_to_idle business.
- * The other way would be to have this function have the side effect of changing
- * state, and finding another way to do the need_to_idle.
- *
- * The returned bool signals whether or not a stack-crushing IPI will come in
- * once you unlock after this function.
+ * this is called from another core, and to avoid the _S -> _M transition.
*
* WARNING: You must hold the proc_lock before calling this! */
-bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
-{ TRUSTEDBLOCK
- bool self_ipi_pending = FALSE;
- uint32_t free_vcoreid = 0;
+int __proc_give_cores(struct proc *p, uint32_t *pc_arr, uint32_t num)
+{
+ /* should never happen: */
+ assert(num + p->procinfo->num_vcores <= MAX_NUM_CPUS);
switch (p->state) {
case (PROC_RUNNABLE_S):
case (PROC_RUNNING_S):
- panic("Don't give cores to a process in a *_S state!\n");
- break;
+ warn("Don't give cores to a process in a *_S state!\n");
+ return -1;
case (PROC_DYING):
- panic("Attempted to give cores to a DYING process.\n");
- break;
+ case (PROC_WAITING):
+ /* can't accept, just fail */
+ return -1;
case (PROC_RUNNABLE_M):
- // set up vcoremap. list should be empty, but could be called
- // multiple times before proc_running (someone changed their mind?)
- if (p->procinfo->num_vcores) {
- printk("[kernel] Yaaaaaarrrrr! Giving extra cores, are we?\n");
- // debugging: if we aren't packed, then there's a problem
- // somewhere, like someone forgot to take vcores after
- // preempting.
- for (int i = 0; i < p->procinfo->num_vcores; i++)
- assert(p->procinfo->vcoremap[i].valid);
- }
- // add new items to the vcoremap
- __seq_start_write(&p->procinfo->coremap_seqctr);
- for (int i = 0; i < num; i++) {
- // find the next free slot, which should be the next one
- free_vcoreid = get_free_vcoreid(p, free_vcoreid);
- printd("setting vcore %d to pcore %d\n", free_vcoreid,
- pcorelist[i]);
- __map_vcore(p, free_vcoreid, pcorelist[i]);
- p->procinfo->num_vcores++;
- }
- __seq_end_write(&p->procinfo->coremap_seqctr);
+ __proc_give_cores_runnable(p, pc_arr, num);
break;
case (PROC_RUNNING_M):
- /* Up the refcnt, since num cores are going to start using this
- * process and have it loaded in their 'current'. */
- kref_get(&p->kref, num);
- __seq_start_write(&p->procinfo->coremap_seqctr);
- for (int i = 0; i < num; i++) {
- free_vcoreid = get_free_vcoreid(p, free_vcoreid);
- printd("setting vcore %d to pcore %d\n", free_vcoreid,
- pcorelist[i]);
- __map_vcore(p, free_vcoreid, pcorelist[i]);
- p->procinfo->num_vcores++;
- send_kernel_message(pcorelist[i], __startcore, p, 0, 0,
- KMSG_ROUTINE);
- if (pcorelist[i] == core_id())
- self_ipi_pending = TRUE;
- }
- __seq_end_write(&p->procinfo->coremap_seqctr);
+ __proc_give_cores_running(p, pc_arr, num);
break;
default:
panic("Weird state(%s) in %s()", procstate2str(p->state),
__FUNCTION__);
}
- p->resources[RES_CORES].amt_granted += num;
- return self_ipi_pending;
+ /* TODO: considering moving to the ksched (hard, due to yield) */
+ p->procinfo->res_grant[RES_CORES] += num;
+ return 0;
}
-/* Makes process p's coremap look like pcorelist (add, remove, etc). Caller
- * needs to know what cores are free after this call (removed, failed, etc).
- * This info will be returned via corelist and *num. This will send message to
- * any cores that are getting removed.
- *
- * Before implementing this, we should probably think about when this will be
- * used. Implies preempting for the message. The more that I think about this,
- * the less I like it. For now, don't use this, and think hard before
- * implementing it.
- *
- * WARNING: You must hold the proc_lock before calling this! */
-bool __proc_set_allcores(struct proc *SAFE p, uint32_t *pcorelist,
- size_t *num, amr_t message,TV(a0t) arg0,
- TV(a1t) arg1, TV(a2t) arg2)
+/********** Core revocation (bulk and single) ***********/
+
+/* Revokes a single vcore from a process (unmaps or sends a KMSG to unmap). */
+static void __proc_revoke_core(struct proc *p, uint32_t vcoreid, bool preempt)
{
- panic("Set all cores not implemented.\n");
+ uint32_t pcoreid = get_pcoreid(p, vcoreid);
+ struct preempt_data *vcpd;
+ if (preempt) {
+ /* Lock the vcore's state (necessary for preemption recovery) */
+ vcpd = &p->procdata->vcore_preempt_data[vcoreid];
+ atomic_or(&vcpd->flags, VC_K_LOCK);
+ send_kernel_message(pcoreid, __preempt, (long)p, 0, 0, KMSG_IMMEDIATE);
+ } else {
+ send_kernel_message(pcoreid, __death, 0, 0, 0, KMSG_IMMEDIATE);
+ }
+}
+
+/* Revokes all cores from the process (unmaps or sends a KMSGS). */
+static void __proc_revoke_allcores(struct proc *p, bool preempt)
+{
+ struct vcore *vc_i;
+ /* TODO: if we ever get broadcast messaging, use it here (still need to lock
+ * the vcores' states for preemption) */
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ __proc_revoke_core(p, vcore2vcoreid(p, vc_i), preempt);
}
-/* Takes from process p the num cores listed in pcorelist, using the given
- * message for the kernel message (__death, __preempt, etc). Like the others
- * in this function group, bool signals whether or not an IPI is pending.
+/* Might be faster to scan the vcoremap than to walk the list... */
+static void __proc_unmap_allcores(struct proc *p)
+{
+ struct vcore *vc_i;
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ __unmap_vcore(p, vcore2vcoreid(p, vc_i));
+}
+
+/* Takes (revoke via kmsg or unmap) from process p the num cores listed in
+ * pc_arr. Will preempt if 'preempt' is set. o/w, no state will be saved, etc.
+ * Don't use this for taking all of a process's cores.
*
- * WARNING: You must hold the proc_lock before calling this! */
-bool __proc_take_cores(struct proc *SAFE p, uint32_t *pcorelist,
- size_t num, amr_t message, TV(a0t) arg0,
- TV(a1t) arg1, TV(a2t) arg2)
-{ TRUSTEDBLOCK
- uint32_t vcoreid, pcoreid;
- bool self_ipi_pending = FALSE;
- switch (p->state) {
- case (PROC_RUNNABLE_M):
- assert(!message);
- break;
- case (PROC_RUNNING_M):
- assert(message);
- break;
- default:
- panic("Weird state(%s) in %s()", procstate2str(p->state),
- __FUNCTION__);
- }
- spin_lock(&idle_lock);
- assert((num <= p->procinfo->num_vcores) &&
- (num_idlecores + num <= num_cpus));
- spin_unlock(&idle_lock);
+ * Make sure you hold the lock when you call this, and make sure that the pcore
+ * actually belongs to the proc, non-trivial due to other __preempt messages. */
+void __proc_take_corelist(struct proc *p, uint32_t *pc_arr, uint32_t num,
+ bool preempt)
+{
+ struct vcore *vc;
+ uint32_t vcoreid;
__seq_start_write(&p->procinfo->coremap_seqctr);
for (int i = 0; i < num; i++) {
- vcoreid = get_vcoreid(p, pcorelist[i]);
- // while ugly, this is done to facilitate merging with take_all_cores
- pcoreid = get_pcoreid(p, vcoreid);
- assert(pcoreid == pcorelist[i]);
- if (message) {
- if (pcoreid == core_id())
- self_ipi_pending = TRUE;
- send_kernel_message(pcoreid, message, arg0, arg1, arg2,
- KMSG_ROUTINE);
+ vcoreid = get_vcoreid(p, pc_arr[i]);
+ /* Sanity check */
+ assert(pc_arr[i] == get_pcoreid(p, vcoreid));
+ /* Revoke / unmap core */
+ if (p->state == PROC_RUNNING_M) {
+ __proc_revoke_core(p, vcoreid, preempt);
} else {
- /* if there was a msg, the vcore is unmapped on the receive side.
- * o/w, we need to do it here. */
+ assert(p->state == PROC_RUNNABLE_M);
__unmap_vcore(p, vcoreid);
}
- // give the pcore back to the idlecoremap
- put_idle_core(pcoreid);
+ /* Change lists for the vcore. Note, the messages are already in flight
+ * (or the vcore is already unmapped), if applicable. The only code
+ * that looks at the lists without holding the lock is event code, and
+ * it doesn't care if the vcore was unmapped (it handles that) */
+ vc = vcoreid2vcore(p, vcoreid);
+ TAILQ_REMOVE(&p->online_vcs, vc, list);
+ /* even for single preempts, we use the inactive list. bulk preempt is
+ * only used for when we take everything. */
+ TAILQ_INSERT_HEAD(&p->inactive_vcs, vc, list);
}
p->procinfo->num_vcores -= num;
__seq_end_write(&p->procinfo->coremap_seqctr);
- p->resources[RES_CORES].amt_granted -= num;
- return self_ipi_pending;
+ p->procinfo->res_grant[RES_CORES] -= num;
}
-/* Takes all cores from a process, which must be in an _M state. Cores are
- * placed back in the idlecoremap. If there's a message, such as __death or
- * __preempt, it will be sent to the cores. The bool signals whether or not an
- * IPI is coming in once you unlock.
+/* Takes all cores from a process (revoke via kmsg or unmap), putting them on
+ * the appropriate vcore list, and fills pc_arr with the pcores revoked, and
+ * returns the number of entries in pc_arr.
*
- * WARNING: You must hold the proc_lock before calling this! */
-bool __proc_take_allcores(struct proc *SAFE p, amr_t message,
- TV(a0t) arg0, TV(a1t) arg1, TV(a2t) arg2)
+ * Make sure pc_arr is big enough to handle num_vcores().
+ * Make sure you hold the lock when you call this. */
+uint32_t __proc_take_allcores(struct proc *p, uint32_t *pc_arr, bool preempt)
{
- uint32_t active_vcoreid = 0, pcoreid;
- bool self_ipi_pending = FALSE;
- switch (p->state) {
- case (PROC_RUNNABLE_M):
- assert(!message);
- break;
- case (PROC_RUNNING_M):
- assert(message);
- break;
- default:
- panic("Weird state(%s) in %s()", procstate2str(p->state),
- __FUNCTION__);
- }
- spin_lock(&idle_lock);
- assert(num_idlecores + p->procinfo->num_vcores <= num_cpus); // sanity
- spin_unlock(&idle_lock);
+ struct vcore *vc_i, *vc_temp;
+ uint32_t num = 0;
__seq_start_write(&p->procinfo->coremap_seqctr);
- for (int i = 0; i < p->procinfo->num_vcores; i++) {
- // find next active vcore
- active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
- pcoreid = get_pcoreid(p, active_vcoreid);
- if (message) {
- if (pcoreid == core_id())
- self_ipi_pending = TRUE;
- send_kernel_message(pcoreid, message, arg0, arg1, arg2,
- KMSG_ROUTINE);
- } else {
- /* if there was a msg, the vcore is unmapped on the receive side.
- * o/w, we need to do it here. */
- __unmap_vcore(p, active_vcoreid);
- }
- // give the pcore back to the idlecoremap
- put_idle_core(pcoreid);
- active_vcoreid++; // for the next loop, skip the one we just used
+ /* Write out which pcores we're going to take */
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ pc_arr[num++] = vc_i->pcoreid;
+ /* Revoke if they are running, o/w unmap. Both of these need the online
+ * list to not be changed yet. */
+ if (p->state == PROC_RUNNING_M) {
+ __proc_revoke_allcores(p, preempt);
+ } else {
+ assert(p->state == PROC_RUNNABLE_M);
+ __proc_unmap_allcores(p);
}
+ /* Move the vcores from online to the head of the appropriate list */
+ TAILQ_FOREACH_SAFE(vc_i, &p->online_vcs, list, vc_temp) {
+ /* TODO: we may want a TAILQ_CONCAT_HEAD, or something that does that */
+ TAILQ_REMOVE(&p->online_vcs, vc_i, list);
+ /* Put the cores on the appropriate list */
+ if (preempt)
+ TAILQ_INSERT_HEAD(&p->bulk_preempted_vcs, vc_i, list);
+ else
+ TAILQ_INSERT_HEAD(&p->inactive_vcs, vc_i, list);
+ }
+ assert(TAILQ_EMPTY(&p->online_vcs));
+ assert(num == p->procinfo->num_vcores);
p->procinfo->num_vcores = 0;
__seq_end_write(&p->procinfo->coremap_seqctr);
- p->resources[RES_CORES].amt_granted = 0;
- return self_ipi_pending;
-}
-
-/* Helper, to be used when a proc management kmsg should be on its way. This
- * used to also unlock and then handle the message, back when the proc_lock was
- * an irqsave, and we had an IPI pending. Now we use routine kmsgs. If a msg
- * is pending, this needs to decref (to eat the reference of the caller) and
- * then process the message. Unlock before calling this, since you might not
- * return.
- *
- * There should already be a kmsg waiting for us, since when we checked state to
- * see a message was coming, the message had already been sent before unlocking.
- * Note we do not need interrupts enabled for this to work (you can receive a
- * message before its IPI by polling), though in most cases they will be.
- *
- * TODO: consider inlining this, so __FUNCTION__ works (will require effort in
- * core_request(). */
-void __proc_kmsg_pending(struct proc *p, bool ipi_pending)
-{
- if (ipi_pending) {
- kref_put(&p->kref);
- process_routine_kmsg();
- panic("stack-killing kmsg not found in %s!!!", __FUNCTION__);
- }
+ p->procinfo->res_grant[RES_CORES] = 0;
+ return num;
}
/* Helper to do the vcore->pcore and inverse mapping. Hold the lock when
* calling. */
void __map_vcore(struct proc *p, uint32_t vcoreid, uint32_t pcoreid)
{
+ while (p->procinfo->vcoremap[vcoreid].valid)
+ cpu_relax();
p->procinfo->vcoremap[vcoreid].pcoreid = pcoreid;
+ wmb();
p->procinfo->vcoremap[vcoreid].valid = TRUE;
p->procinfo->pcoremap[pcoreid].vcoreid = vcoreid;
+ wmb();
p->procinfo->pcoremap[pcoreid].valid = TRUE;
}
* calling. */
void __unmap_vcore(struct proc *p, uint32_t vcoreid)
{
- p->procinfo->vcoremap[vcoreid].valid = FALSE;
p->procinfo->pcoremap[p->procinfo->vcoremap[vcoreid].pcoreid].valid = FALSE;
+ wmb();
+ p->procinfo->vcoremap[vcoreid].valid = FALSE;
}
-/* Stop running whatever context is on this core, load a known-good cr3, and
- * 'idle'. Note this leaves no trace of what was running. This "leaves the
- * process's context. */
+/* Stop running whatever context is on this core and load a known-good cr3.
+ * Note this leaves no trace of what was running. This "leaves the process's
+ * context. Also, we want interrupts disabled, to not conflict with kmsgs
+ * (__launch_kthread, proc mgmt, etc).
+ *
+ * This does not clear the owning proc. Use the other helper for that. */
void abandon_core(void)
{
- if (current)
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ assert(!irq_is_enabled());
+ /* Syscalls that don't return will ultimately call abadon_core(), so we need
+ * to make sure we don't think we are still working on a syscall. */
+ pcpui->cur_sysc = 0;
+ if (pcpui->cur_proc)
__abandon_core();
- smp_idle();
+}
+
+/* Helper to clear the core's owning processor and manage refcnting. Pass in
+ * core_id() to save a couple core_id() calls. */
+void clear_owning_proc(uint32_t coreid)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
+ struct proc *p = pcpui->owning_proc;
+ assert(!irq_is_enabled());
+ pcpui->owning_proc = 0;
+ pcpui->owning_vcoreid = 0xdeadbeef;
+ pcpui->cur_tf = 0; /* catch bugs for now (will go away soon) */
+ if (p);
+ proc_decref(p);
+}
+
+/* Switches to the address space/context of new_p, doing nothing if we are
+ * already in new_p. This won't add extra refcnts or anything, and needs to be
+ * paired with switch_back() at the end of whatever function you are in. Don't
+ * migrate cores in the middle of a pair. Specifically, the uncounted refs are
+ * one for the old_proc, which is passed back to the caller, and new_p is
+ * getting placed in cur_proc. */
+struct proc *switch_to(struct proc *new_p)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ struct proc *old_proc;
+ int8_t irq_state = 0;
+ disable_irqsave(&irq_state);
+ old_proc = pcpui->cur_proc; /* uncounted ref */
+ /* If we aren't the proc already, then switch to it */
+ if (old_proc != new_p) {
+ pcpui->cur_proc = new_p; /* uncounted ref */
+ lcr3(new_p->env_cr3);
+ }
+ enable_irqsave(&irq_state);
+ return old_proc;
+}
+
+/* This switches back to old_proc from new_p. Pair it with switch_to(), and
+ * pass in its return value for old_proc. */
+void switch_back(struct proc *new_p, struct proc *old_proc)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ int8_t irq_state = 0;
+ if (old_proc != new_p) {
+ disable_irqsave(&irq_state);
+ pcpui->cur_proc = old_proc;
+ if (old_proc)
+ lcr3(old_proc->env_cr3);
+ else
+ lcr3(boot_cr3);
+ enable_irqsave(&irq_state);
+ }
}
/* Will send a TLB shootdown message to every vcore in the main address space
* shootdown and batching our messages. Should do the sanity about rounding up
* and down in this function too.
*
- * Hold the proc_lock before calling this.
- *
* Would be nice to have a broadcast kmsg at this point. Note this may send a
* message to the calling core (interrupting it, possibly while holding the
* proc_lock). We don't need to process routine messages since it's an
* immediate message. */
-void __proc_tlbshootdown(struct proc *p, uintptr_t start, uintptr_t end)
+void proc_tlbshootdown(struct proc *p, uintptr_t start, uintptr_t end)
{
- uint32_t active_vcoreid = 0;
+ struct vcore *vc_i;
+ /* TODO: we might be able to avoid locking here in the future (we must hit
+ * all online, and we can check __mapped). it'll be complicated. */
+ spin_lock(&p->proc_lock);
switch (p->state) {
case (PROC_RUNNING_S):
tlbflush();
break;
case (PROC_RUNNING_M):
/* TODO: (TLB) sanity checks and rounding on the ranges */
- for (int i = 0; i < p->procinfo->num_vcores; i++) {
- /* find next active vcore */
- active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
- send_kernel_message(get_pcoreid(p, active_vcoreid),
- __tlbshootdown, (void*)start, (void*)end,
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list) {
+ send_kernel_message(vc_i->pcoreid, __tlbshootdown, start, end,
0, KMSG_IMMEDIATE);
- active_vcoreid++; /* next loop, skip the one we just used */
}
break;
+ case (PROC_DYING):
+ /* if it is dying, death messages are already on the way to all
+ * cores, including ours, which will clear the TLB. */
+ break;
default:
/* will probably get this when we have the short handlers */
- warn("Unexpected case in %s\n", __FUNCTION__);
+ warn("Unexpected case %s in %s", procstate2str(p->state),
+ __FUNCTION__);
}
+ spin_unlock(&p->proc_lock);
}
-/* Kernel message handler to start a process's context on this core. Tightly
- * coupled with proc_run(). Interrupts are disabled. */
-void __startcore(trapframe_t *tf, uint32_t srcid, void *a0, void *a1, void *a2)
+/* Helper, used by __startcore and change_to_vcore, which sets up cur_tf to run
+ * a given process's vcore. Caller needs to set up things like owning_proc and
+ * whatnot. Note that we might not have p loaded as current. */
+static void __set_curtf_to_vcoreid(struct proc *p, uint32_t vcoreid)
{
- uint32_t pcoreid = core_id(), vcoreid;
- struct proc *p_to_run = (struct proc *CT(1))a0;
- struct trapframe local_tf;
- struct preempt_data *vcpd;
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ struct preempt_data *vcpd = &p->procdata->vcore_preempt_data[vcoreid];
- assert(p_to_run);
- /* the sender of the amsg increfed, thinking we weren't running current. */
- if (p_to_run == current)
- kref_put(&p_to_run->kref);
- vcoreid = get_vcoreid(p_to_run, pcoreid);
- vcpd = &p_to_run->procdata->vcore_preempt_data[vcoreid];
+ /* We could let userspace do this, though they come into vcore entry many
+ * times, and we just need this to happen when the cores comes online the
+ * first time. That, and they want this turned on as soon as we know a
+ * vcore *WILL* be online. We could also do this earlier, when we map the
+ * vcore to its pcore, though we don't always have current loaded or
+ * otherwise mess with the VCPD in those code paths. */
+ vcpd->can_rcv_msg = TRUE;
+ /* Mark that this vcore as no longer preempted. No danger of clobbering
+ * other writes, since this would get turned on in __preempt (which can't be
+ * concurrent with this function on this core), and the atomic is just
+ * toggling the one bit (a concurrent VC_K_LOCK will work) */
+ atomic_and(&vcpd->flags, ~VC_PREEMPTED);
printd("[kernel] startcore on physical core %d for process %d's vcore %d\n",
- pcoreid, p_to_run->pid, vcoreid);
-
- if (seq_is_locked(vcpd->preempt_tf_valid)) {
- __seq_end_write(&vcpd->preempt_tf_valid); /* mark tf as invalid */
+ core_id(), p->pid, vcoreid);
+ /* If notifs are disabled, the vcore was in vcore context and we need to
+ * restart the preempt_tf. o/w, we give them a fresh vcore (which is also
+ * what happens the first time a vcore comes online). No matter what,
+ * they'll restart in vcore context. It's just a matter of whether or not
+ * it is the old, interrupted vcore context. */
+ if (vcpd->notif_disabled) {
restore_fp_state(&vcpd->preempt_anc);
- /* notif_pending and enabled means the proc wants to receive the IPI,
- * but might have missed it. copy over the tf so they can restart it
- * later, and give them a fresh vcore. */
- if (vcpd->notif_pending && vcpd->notif_enabled) {
- vcpd->notif_tf = vcpd->preempt_tf; // could memset
- proc_init_trapframe(&local_tf, vcoreid, p_to_run->env_entry,
- vcpd->transition_stack);
- vcpd->notif_enabled = FALSE;
- vcpd->notif_pending = FALSE;
- } else {
- /* copy-in the tf we'll pop, then set all security-related fields */
- local_tf = vcpd->preempt_tf;
- proc_secure_trapframe(&local_tf);
- }
+ /* copy-in the tf we'll pop, then set all security-related fields */
+ pcpui->actual_tf = vcpd->preempt_tf;
+ proc_secure_trapframe(&pcpui->actual_tf);
} else { /* not restarting from a preemption, use a fresh vcore */
- proc_init_trapframe(&local_tf, vcoreid, p_to_run->env_entry,
+ assert(vcpd->transition_stack);
+ /* TODO: consider 0'ing the FP state. We're probably leaking. */
+ proc_init_trapframe(&pcpui->actual_tf, vcoreid, p->env_entry,
vcpd->transition_stack);
/* Disable/mask active notifications for fresh vcores */
- vcpd->notif_enabled = FALSE;
+ vcpd->notif_disabled = TRUE;
}
- __proc_startcore(p_to_run, &local_tf); // TODO: (HSS) pass silly state *?
+ /* cur_tf was built above (in actual_tf), now use it */
+ pcpui->cur_tf = &pcpui->actual_tf;
+ /* this cur_tf will get run when the kernel returns / idles */
}
-/* Bail out if it's the wrong process, or if they no longer want a notif. Make
- * sure that you are passing in a user tf (otherwise, it's a bug). Try not to
- * grab locks or write access to anything that isn't per-core in here. */
-void __notify(trapframe_t *tf, uint32_t srcid, void *a0, void *a1, void *a2)
+/* Changes calling vcore to be vcoreid. enable_my_notif tells us about how the
+ * state calling vcore wants to be left in. It will look like caller_vcoreid
+ * was preempted. Note we don't care about notif_pending. */
+void proc_change_to_vcore(struct proc *p, uint32_t new_vcoreid,
+ bool enable_my_notif)
{
- struct user_trapframe local_tf;
+ uint32_t caller_vcoreid, pcoreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[pcoreid];
+ struct preempt_data *caller_vcpd;
+ struct vcore *caller_vc, *new_vc;
+ struct event_msg preempt_msg = {0};
+ int8_t state = 0;
+ /* Need to disable before even reading caller_vcoreid, since we could be
+ * unmapped by a __preempt or __death, like in yield. */
+ disable_irqsave(&state);
+ /* Need to lock before reading the vcoremap, like in yield */
+ spin_lock(&p->proc_lock);
+ /* new_vcoreid is already runing, abort */
+ if (vcore_is_mapped(p, new_vcoreid))
+ goto out_failed;
+ /* Need to make sure our vcore is allowed to switch. We might have a
+ * __preempt, __death, etc, coming in. Similar to yield. */
+ switch (p->state) {
+ case (PROC_RUNNING_M):
+ break; /* the only case we can proceed */
+ case (PROC_RUNNING_S): /* user bug, just return */
+ case (PROC_DYING): /* incoming __death */
+ case (PROC_RUNNABLE_M): /* incoming (bulk) preempt/myield TODO:(BULK) */
+ goto out_failed;
+ default:
+ panic("Weird state(%s) in %s()", procstate2str(p->state),
+ __FUNCTION__);
+ }
+ /* Make sure we're still mapped in the proc. */
+ if (!is_mapped_vcore(p, pcoreid))
+ goto out_failed;
+ /* Get all our info */
+ caller_vcoreid = get_vcoreid(p, pcoreid); /* holding lock, we can check */
+ assert(caller_vcoreid == pcpui->owning_vcoreid);
+ caller_vcpd = &p->procdata->vcore_preempt_data[caller_vcoreid];
+ caller_vc = vcoreid2vcore(p, caller_vcoreid);
+ /* Should only call from vcore context */
+ if (!caller_vcpd->notif_disabled) {
+ printk("[kernel] You tried to change vcores from uthread ctx\n");
+ goto out_failed;
+ }
+ /* Return and take the preempt message when we enable_irqs. */
+ if (caller_vc->preempt_served)
+ goto out_failed;
+ /* Ok, we're clear to do the switch. Lets figure out who the new one is */
+ new_vc = vcoreid2vcore(p, new_vcoreid);
+ printd("[kernel] changing vcore %d to vcore %d\n", caller_vcoreid,
+ new_vcoreid);
+ /* enable_my_notif signals how we'll be restarted */
+ if (enable_my_notif) {
+ /* if they set this flag, then the vcore can just restart from scratch,
+ * and we don't care about either the notif_tf or the preempt_tf. */
+ caller_vcpd->notif_disabled = FALSE;
+ } else {
+ /* need to set up the calling vcore's tf so that it'll get restarted by
+ * __startcore, to make the caller look like it was preempted. */
+ caller_vcpd->preempt_tf = *current_tf;
+ save_fp_state(&caller_vcpd->preempt_anc);
+ /* Mark our core as preempted (for userspace recovery). */
+ atomic_or(&caller_vcpd->flags, VC_PREEMPTED);
+ }
+ /* Either way, unmap and offline our current vcore */
+ /* Move the caller from online to inactive */
+ TAILQ_REMOVE(&p->online_vcs, caller_vc, list);
+ /* We don't bother with the notif_pending race. note that notif_pending
+ * could still be set. this was a preempted vcore, and userspace will need
+ * to deal with missed messages (preempt_recover() will handle that) */
+ TAILQ_INSERT_HEAD(&p->inactive_vcs, caller_vc, list);
+ /* Move the new one from inactive to online */
+ TAILQ_REMOVE(&p->inactive_vcs, new_vc, list);
+ TAILQ_INSERT_TAIL(&p->online_vcs, new_vc, list);
+ /* Change the vcore map (TODO: might get rid of this seqctr) */
+ __seq_start_write(&p->procinfo->coremap_seqctr);
+ __unmap_vcore(p, caller_vcoreid);
+ __map_vcore(p, new_vcoreid, pcoreid);
+ __seq_end_write(&p->procinfo->coremap_seqctr);
+ /* So this core knows which vcore is here: */
+ pcpui->owning_vcoreid = new_vcoreid;
+ /* Send either a PREEMPT msg or a CHECK_MSGS msg. If they said to
+ * enable_my_notif, then all userspace needs is to check messages, not a
+ * full preemption recovery. */
+ preempt_msg.ev_type = (enable_my_notif ? EV_CHECK_MSGS : EV_VCORE_PREEMPT);
+ preempt_msg.ev_arg2 = caller_vcoreid; /* arg2 is 32 bits */
+ send_kernel_event(p, &preempt_msg, new_vcoreid);
+ /* Change cur_tf so we'll be the new vcoreid */
+ __set_curtf_to_vcoreid(p, new_vcoreid);
+ /* Fall through to exit (we didn't fail) */
+out_failed:
+ spin_unlock(&p->proc_lock);
+ enable_irqsave(&state);
+}
+
+/* Kernel message handler to start a process's context on this core, when the
+ * core next considers running a process. Tightly coupled with __proc_run_m().
+ * Interrupts are disabled. */
+void __startcore(struct trapframe *tf, uint32_t srcid, long a0, long a1, long a2)
+{
+ uint32_t vcoreid = (uint32_t)a1;
+ uint32_t coreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
+ struct proc *p_to_run = (struct proc *CT(1))a0;
+
+ assert(p_to_run);
+ /* Can not be any TF from a process here already */
+ assert(!pcpui->owning_proc);
+ /* the sender of the amsg increfed already for this saved ref to p_to_run */
+ pcpui->owning_proc = p_to_run;
+ pcpui->owning_vcoreid = vcoreid;
+ /* sender increfed again, assuming we'd install to cur_proc. only do this
+ * if no one else is there. this is an optimization, since we expect to
+ * send these __startcores to idles cores, and this saves a scramble to
+ * incref when all of the cores restartcore/startcore later. Keep in sync
+ * with __proc_give_cores() and __proc_run_m(). */
+ if (!pcpui->cur_proc) {
+ pcpui->cur_proc = p_to_run; /* install the ref to cur_proc */
+ lcr3(p_to_run->env_cr3); /* load the page tables to match cur_proc */
+ } else {
+ proc_decref(p_to_run); /* can't install, decref the extra one */
+ }
+ /* Note we are not necessarily in the cr3 of p_to_run */
+ /* Now that we sorted refcnts and know p / which vcore it should be, set up
+ * pcpui->cur_tf so that it will run that particular vcore */
+ __set_curtf_to_vcoreid(p_to_run, vcoreid);
+}
+
+/* Bail out if it's the wrong process, or if they no longer want a notif. Don't
+ * use the TF we passed in, we care about cur_tf. Try not to grab locks or
+ * write access to anything that isn't per-core in here. */
+void __notify(struct trapframe *tf, uint32_t srcid, long a0, long a1, long a2)
+{
+ uint32_t vcoreid, coreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
struct preempt_data *vcpd;
- uint32_t vcoreid;
struct proc *p = (struct proc*)a0;
- if (p != current)
+ /* Not the right proc */
+ if (p != pcpui->owning_proc)
return;
- assert(!in_kernel(tf));
- /* We shouldn't need to lock here, since unmapping happens on the pcore and
- * mapping would only happen if the vcore was free, which it isn't until
- * after we unmap. */
- vcoreid = get_vcoreid(p, core_id());
+ /* Common cur_tf sanity checks. Note cur_tf could be an _S's env_tf */
+ assert(pcpui->cur_tf);
+ assert(!in_kernel(pcpui->cur_tf));
+ vcoreid = pcpui->owning_vcoreid;
+ assert(vcoreid == get_vcoreid(p, coreid));
vcpd = &p->procdata->vcore_preempt_data[vcoreid];
+ /* for SCPs that haven't (and might never) call vc_event_init, like rtld.
+ * this is harmless for MCPS to check this */
+ if (!scp_is_vcctx_ready(vcpd))
+ return;
printd("received active notification for proc %d's vcore %d on pcore %d\n",
- p->procinfo->pid, vcoreid, core_id());
+ p->procinfo->pid, vcoreid, coreid);
/* sort signals. notifs are now masked, like an interrupt gate */
- if (!vcpd->notif_enabled)
+ if (vcpd->notif_disabled)
return;
- vcpd->notif_enabled = FALSE;
- vcpd->notif_pending = FALSE; // no longer pending - it made it here
+ vcpd->notif_disabled = TRUE;
/* save the old tf in the notify slot, build and pop a new one. Note that
* silly state isn't our business for a notification. */
- // TODO: this is assuming the struct user_tf is the same as a regular TF
- vcpd->notif_tf = *tf;
- memset(&local_tf, 0, sizeof(local_tf));
- proc_init_trapframe(&local_tf, vcoreid, p->env_entry,
+ vcpd->notif_tf = *pcpui->cur_tf;
+ memset(pcpui->cur_tf, 0, sizeof(struct trapframe));
+ proc_init_trapframe(pcpui->cur_tf, vcoreid, p->env_entry,
vcpd->transition_stack);
- __proc_startcore(p, &local_tf);
+ /* this cur_tf will get run when the kernel returns / idles */
}
-void __preempt(trapframe_t *tf, uint32_t srcid, void *a0, void *a1, void *a2)
+void __preempt(struct trapframe *tf, uint32_t srcid, long a0, long a1, long a2)
{
- struct preempt_data *vcpd;
uint32_t vcoreid, coreid = core_id();
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
+ struct preempt_data *vcpd;
struct proc *p = (struct proc*)a0;
- if (p != current)
- panic("__preempt arrived for a process (%p) that was not current (%p)!",
- p, current);
- assert(!in_kernel(tf));
- /* We shouldn't need to lock here, since unmapping happens on the pcore and
- * mapping would only happen if the vcore was free, which it isn't until
- * after we unmap. */
- vcoreid = get_vcoreid(p, coreid);
+ assert(p);
+ if (p != pcpui->owning_proc) {
+ panic("__preempt arrived for a process (%p) that was not owning (%p)!",
+ p, pcpui->owning_proc);
+ }
+ /* Common cur_tf sanity checks */
+ assert(pcpui->cur_tf);
+ assert(pcpui->cur_tf == &pcpui->actual_tf);
+ assert(!in_kernel(pcpui->cur_tf));
+ vcoreid = pcpui->owning_vcoreid;
+ assert(vcoreid == get_vcoreid(p, coreid));
p->procinfo->vcoremap[vcoreid].preempt_served = FALSE;
/* either __preempt or proc_yield() ends the preempt phase. */
p->procinfo->vcoremap[vcoreid].preempt_pending = 0;
vcpd = &p->procdata->vcore_preempt_data[vcoreid];
printd("[kernel] received __preempt for proc %d's vcore %d on pcore %d\n",
- p->procinfo->pid, vcoreid, core_id());
-
- /* save the old tf in the preempt slot, save the silly state, and signal the
- * state is a valid tf. when it is 'written,' it is valid. Using the
- * seq_ctrs so userspace can tell between different valid versions. If the
- * TF was already valid, it will panic (if CONFIGed that way). */
- // TODO: this is assuming the struct user_tf is the same as a regular TF
- vcpd->preempt_tf = *tf;
+ p->procinfo->pid, vcoreid, coreid);
+ /* if notifs are disabled, the vcore is in vcore context (as far as we're
+ * concerned), and we save it in the preempt slot. o/w, we save the
+ * process's cur_tf in the notif slot, and it'll appear to the vcore when it
+ * comes back up that it just took a notification. */
+ if (vcpd->notif_disabled)
+ vcpd->preempt_tf = *pcpui->cur_tf;
+ else
+ vcpd->notif_tf = *pcpui->cur_tf;
+ /* either way, we save the silly state (FP) */
save_fp_state(&vcpd->preempt_anc);
- __seq_start_write(&vcpd->preempt_tf_valid);
+ /* Mark the vcore as preempted and unlock (was locked by the sender). */
+ atomic_or(&vcpd->flags, VC_PREEMPTED);
+ atomic_and(&vcpd->flags, ~VC_K_LOCK);
+ wmb(); /* make sure everything else hits before we unmap */
__unmap_vcore(p, vcoreid);
- abandon_core();
+ /* We won't restart the process later. current gets cleared later when we
+ * notice there is no owning_proc and we have nothing to do (smp_idle,
+ * restartcore, etc) */
+ clear_owning_proc(coreid);
}
/* Kernel message handler to clean up the core when a process is dying.
* Note this leaves no trace of what was running.
* It's okay if death comes to a core that's already idling and has no current.
* It could happen if a process decref'd before __proc_startcore could incref. */
-void __death(trapframe_t *tf, uint32_t srcid, void *SNT a0, void *SNT a1,
- void *SNT a2)
+void __death(struct trapframe *tf, uint32_t srcid, long a0, long a1, long a2)
{
uint32_t vcoreid, coreid = core_id();
- if (current) {
- vcoreid = get_vcoreid(current, coreid);
+ struct per_cpu_info *pcpui = &per_cpu_info[coreid];
+ struct proc *p = pcpui->owning_proc;
+ if (p) {
+ vcoreid = pcpui->owning_vcoreid;
+ assert(vcoreid == get_vcoreid(p, coreid));
printd("[kernel] death on physical core %d for process %d's vcore %d\n",
- coreid, current->pid, vcoreid);
- __unmap_vcore(current, vcoreid);
+ coreid, p->pid, vcoreid);
+ __unmap_vcore(p, vcoreid);
+ /* We won't restart the process later. current gets cleared later when
+ * we notice there is no owning_proc and we have nothing to do
+ * (smp_idle, restartcore, etc) */
+ clear_owning_proc(coreid);
}
- abandon_core();
}
/* Kernel message handler, usually sent IMMEDIATE, to shoot down virtual
* addresses from a0 to a1. */
-void __tlbshootdown(struct trapframe *tf, uint32_t srcid, void *a0, void *a1,
- void *a2)
+void __tlbshootdown(struct trapframe *tf, uint32_t srcid, long a0, long a1,
+ long a2)
{
/* TODO: (TLB) something more intelligent with the range */
tlbflush();
}
-void print_idlecoremap(void)
-{
- spin_lock(&idle_lock);
- printk("There are %d idle cores.\n", num_idlecores);
- for (int i = 0; i < num_idlecores; i++)
- printk("idlecoremap[%d] = %d\n", i, idlecoremap[i]);
- spin_unlock(&idle_lock);
-}
-
void print_allpids(void)
{
- spin_lock(&pid_hash_lock);
- if (hashtable_count(pid_hash)) {
- hashtable_itr_t *phtable_i = hashtable_iterator(pid_hash);
- printk("PID STATE \n");
- printk("------------------\n");
- do {
- struct proc *p = hashtable_iterator_value(phtable_i);
- printk("%8d %s\n", hashtable_iterator_key(phtable_i),
- p ? procstate2str(p->state) : "(null)");
- } while (hashtable_iterator_advance(phtable_i));
+ void print_proc_state(void *item)
+ {
+ struct proc *p = (struct proc*)item;
+ assert(p);
+ printk("%8d %s\n", p->pid, procstate2str(p->state));
}
+ printk("PID STATE \n");
+ printk("------------------\n");
+ spin_lock(&pid_hash_lock);
+ hash_for_each(pid_hash, print_proc_state);
spin_unlock(&pid_hash_lock);
}
{
int j = 0;
struct proc *p = pid2proc(pid);
+ struct vcore *vc_i;
if (!p) {
printk("Bad PID.\n");
return;
printk("struct proc: %p\n", p);
printk("PID: %d\n", p->pid);
printk("PPID: %d\n", p->ppid);
- printk("State: 0x%08x\n", p->state);
- printk("Refcnt: %d\n", atomic_read(&p->kref.refcount) - 1);
+ printk("State: %s (%p)\n", procstate2str(p->state), p->state);
+ printk("Refcnt: %d\n", atomic_read(&p->p_kref.refcount) - 1);
printk("Flags: 0x%08x\n", p->env_flags);
printk("CR3(phys): 0x%08x\n", p->env_cr3);
printk("Num Vcores: %d\n", p->procinfo->num_vcores);
- printk("Vcoremap:\n");
- for (int i = 0; i < p->procinfo->num_vcores; i++) {
- j = get_busy_vcoreid(p, j);
- printk("\tVcore %d: Pcore %d\n", j, get_pcoreid(p, j));
- j++;
- }
- printk("Resources:\n");
+ printk("Vcore Lists (may be in flux w/o locking):\n----------------------\n");
+ printk("Online:\n");
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+ printk("\tVcore %d -> Pcore %d\n", vcore2vcoreid(p, vc_i), vc_i->pcoreid);
+ printk("Bulk Preempted:\n");
+ TAILQ_FOREACH(vc_i, &p->bulk_preempted_vcs, list)
+ printk("\tVcore %d\n", vcore2vcoreid(p, vc_i));
+ printk("Inactive / Yielded:\n");
+ TAILQ_FOREACH(vc_i, &p->inactive_vcs, list)
+ printk("\tVcore %d\n", vcore2vcoreid(p, vc_i));
+ printk("Resources:\n------------------------\n");
for (int i = 0; i < MAX_NUM_RESOURCES; i++)
printk("\tRes type: %02d, amt wanted: %08d, amt granted: %08d\n", i,
- p->resources[i].amt_wanted, p->resources[i].amt_granted);
+ p->procdata->res_req[i].amt_wanted, p->procinfo->res_grant[i]);
printk("Open Files:\n");
struct files_struct *files = &p->open_files;
spin_lock(&files->lock);
//print_trapframe(&p->env_tf);
/* no locking / unlocking or refcnting */
// spin_unlock(&p->proc_lock);
- kref_put(&p->kref);
+ proc_decref(p);
+}
+
+/* Debugging function, checks what (process, vcore) is supposed to run on this
+ * pcore. Meant to be called from smp_idle() before halting. */
+void check_my_owner(void)
+{
+ struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
+ void shazbot(void *item)
+ {
+ struct proc *p = (struct proc*)item;
+ struct vcore *vc_i;
+ assert(p);
+ spin_lock(&p->proc_lock);
+ TAILQ_FOREACH(vc_i, &p->online_vcs, list) {
+ /* this isn't true, a __startcore could be on the way and we're
+ * already "online" */
+ if (vc_i->pcoreid == core_id()) {
+ /* Immediate message was sent, we should get it when we enable
+ * interrupts, which should cause us to skip cpu_halt() */
+ if (!STAILQ_EMPTY(&pcpui->immed_amsgs))
+ continue;
+ printk("Owned pcore (%d) has no owner, by %08p, vc %d!\n",
+ core_id(), p, vcore2vcoreid(p, vc_i));
+ spin_unlock(&p->proc_lock);
+ spin_unlock(&pid_hash_lock);
+ monitor(0);
+ }
+ }
+ spin_unlock(&p->proc_lock);
+ }
+ assert(!irq_is_enabled());
+ extern int booting;
+ if (!booting && !pcpui->owning_proc) {
+ spin_lock(&pid_hash_lock);
+ hash_for_each(pid_hash, shazbot);
+ spin_unlock(&pid_hash_lock);
+ }
}