Vcore management uses the lists
[akaros.git] / kern / src / process.c
index 4262ee1..32b3201 100644 (file)
@@ -54,8 +54,6 @@ void put_idle_core(uint32_t coreid)
 
 /* Other helpers, implemented later. */
 static void __proc_startcore(struct proc *p, trapframe_t *tf);
-static uint32_t get_free_vcoreid(struct proc *SAFE p, uint32_t prev);
-static uint32_t get_busy_vcoreid(struct proc *SAFE p, uint32_t prev);
 static bool is_mapped_vcore(struct proc *p, uint32_t pcoreid);
 static uint32_t get_vcoreid(struct proc *p, uint32_t pcoreid);
 static uint32_t get_pcoreid(struct proc *p, uint32_t vcoreid);
@@ -241,22 +239,26 @@ void proc_init(void)
 }
 
 /* Be sure you init'd the vcore lists before calling this. */
-void proc_init_procinfo(struct proc* p)
+static void proc_init_procinfo(struct proc* p)
 {
-       memset(&p->procinfo->vcoremap, 0, sizeof(p->procinfo->vcoremap));
-       memset(&p->procinfo->pcoremap, 0, sizeof(p->procinfo->pcoremap));
-       p->procinfo->num_vcores = 0;
-       p->procinfo->coremap_seqctr = SEQCTR_INITIALIZER;
-       // TODO: change these too
        p->procinfo->pid = p->pid;
        p->procinfo->ppid = p->ppid;
-       p->procinfo->tsc_freq = system_timing.tsc_freq;
        // TODO: maybe do something smarter here
 #ifdef __CONFIG_DISABLE_SMT__
        p->procinfo->max_vcores = num_cpus >> 1;
 #else
        p->procinfo->max_vcores = MAX(1,num_cpus-num_mgmtcores);
 #endif /* __CONFIG_DISABLE_SMT__ */
+       p->procinfo->tsc_freq = system_timing.tsc_freq;
+       p->procinfo->heap_bottom = (void*)UTEXT;
+       /* 0'ing the arguments.  Some higher function will need to set them */
+       memset(p->procinfo->argp, 0, sizeof(p->procinfo->argp));
+       memset(p->procinfo->argbuf, 0, sizeof(p->procinfo->argbuf));
+       /* 0'ing the vcore/pcore map.  Will link the vcores later. */
+       memset(&p->procinfo->vcoremap, 0, sizeof(p->procinfo->vcoremap));
+       memset(&p->procinfo->pcoremap, 0, sizeof(p->procinfo->pcoremap));
+       p->procinfo->num_vcores = 0;
+       p->procinfo->coremap_seqctr = SEQCTR_INITIALIZER;
        /* For now, we'll go up to the max num_cpus (at runtime).  In the future,
         * there may be cases where we can have more vcores than num_cpus, but for
         * now we'll leave it like this. */
@@ -265,6 +267,11 @@ void proc_init_procinfo(struct proc* p)
        }
 }
 
+static void proc_init_procdata(struct proc *p)
+{
+       memset(p->procdata, 0, sizeof(struct procdata));
+}
+
 /* Allocates and initializes a process, with the given parent.  Currently
  * writes the *p into **pp, and returns 0 on success, < 0 for an error.
  * Errors include:
@@ -301,8 +308,7 @@ error_t proc_alloc(struct proc **pp, struct proc *parent)
        p->state = PROC_CREATED; /* shouldn't go through state machine for init */
        p->env_flags = 0;
        p->env_entry = 0; // cheating.  this really gets set later
-       p->procinfo->heap_bottom = (void*)UTEXT;
-       p->heap_top = (void*)UTEXT;
+       p->heap_top = (void*)UTEXT;     /* heap_bottom set in proc_init_procinfo */
        memset(&p->resources, 0, sizeof(p->resources));
        memset(&p->env_ancillary_state, 0, sizeof(p->env_ancillary_state));
        memset(&p->env_tf, 0, sizeof(p->env_tf));
@@ -312,9 +318,9 @@ error_t proc_alloc(struct proc **pp, struct proc *parent)
        TAILQ_INIT(&p->online_vcs);
        TAILQ_INIT(&p->bulk_preempted_vcs);
        TAILQ_INIT(&p->inactive_vcs);
-       /* Initialize the contents of the e->procinfo structure */
+       /* Init procinfo/procdata.  Procinfo's argp/argb are 0'd */
        proc_init_procinfo(p);
-       /* Initialize the contents of the e->procdata structure */
+       proc_init_procdata(p);
 
        /* Initialize the generic sysevent ring buffer */
        SHARED_RING_INIT(&p->procdata->syseventring);
@@ -482,6 +488,7 @@ static void __set_proc_current(struct proc *p)
 void proc_run(struct proc *p)
 {
        bool self_ipi_pending = FALSE;
+       struct vcore *vc_i;
        spin_lock(&p->proc_lock);
 
        switch (p->state) {
@@ -500,7 +507,9 @@ void proc_run(struct proc *p)
                         * Also, this is the signal used in trap.c to know to save the tf in
                         * env_tf. */
                        __seq_start_write(&p->procinfo->coremap_seqctr);
-                       p->procinfo->num_vcores = 0;
+                       p->procinfo->num_vcores = 0;    /* TODO (VC#) */
+                       /* TODO: For now, we won't count this as an active vcore (on the
+                        * lists).  This gets unmapped in resource.c, and needs work. */
                        __map_vcore(p, 0, core_id()); // sort of.  this needs work.
                        __seq_end_write(&p->procinfo->coremap_seqctr);
                        /* __set_proc_current assumes the reference we give it is for
@@ -530,9 +539,13 @@ void proc_run(struct proc *p)
                                 * an IPI (once we reenable interrupts) and never return. */
                                if (is_mapped_vcore(p, core_id()))
                                        self_ipi_pending = TRUE;
-                               for (int i = 0; i < p->procinfo->num_vcores; i++)
-                                       send_kernel_message(get_pcoreid(p, i), __startcore, (long)p,
+                               /* Send kernel messages to all online vcores (which were added
+                                * to the list and mapped in __proc_give_cores()), making them
+                                * turn online */
+                               TAILQ_FOREACH(vc_i, &p->online_vcs, list) {
+                                       send_kernel_message(vc_i->pcoreid, __startcore, (long)p,
                                                            0, 0, KMSG_ROUTINE);
+                               }
                        } else {
                                warn("Tried to proc_run() an _M with no vcores!");
                        }
@@ -715,34 +728,6 @@ void proc_destroy(struct proc *p)
        return;
 }
 
-/* Helper function.  Starting from prev, it will find the next free vcoreid,
- * which is the next vcore that is not valid.
- * You better hold the lock before calling this. */
-static uint32_t get_free_vcoreid(struct proc *SAFE p, uint32_t prev)
-{
-       uint32_t i;
-       for (i = prev; i < MAX_NUM_CPUS; i++)
-               if (!vcore_is_mapped(p, i))
-                       break;
-       if (i + 1 >= MAX_NUM_CPUS)
-               warn("At the end of the vcorelist.  Might want to check that out.");
-       return i;
-}
-
-/* Helper function.  Starting from prev, it will find the next busy vcoreid,
- * which is the next vcore that is valid.
- * You better hold the lock before calling this. */
-static uint32_t get_busy_vcoreid(struct proc *SAFE p, uint32_t prev)
-{
-       uint32_t i;
-       for (i = prev; i < MAX_NUM_CPUS; i++)
-               if (vcore_is_mapped(p, i))
-                       break;
-       if (i + 1 >= MAX_NUM_CPUS)
-               warn("At the end of the vcorelist.  Might want to check that out.");
-       return i;
-}
-
 /* Helper function.  Is the given pcore a mapped vcore?  No locking involved, be
  * careful. */
 static bool is_mapped_vcore(struct proc *p, uint32_t pcoreid)
@@ -795,7 +780,7 @@ void __proc_yield_s(struct proc *p, struct trapframe *tf)
 void proc_yield(struct proc *SAFE p, bool being_nice)
 {
        uint32_t vcoreid = get_vcoreid(p, core_id());
-       struct vcore *vc = &p->procinfo->vcoremap[vcoreid];
+       struct vcore *vc = vcoreid2vcore(p, vcoreid);
 
        /* no reason to be nice, return */
        if (being_nice && !vc->preempt_pending)
@@ -828,8 +813,12 @@ void proc_yield(struct proc *SAFE p, bool being_nice)
                                return;
                        }
                        __seq_start_write(&p->procinfo->coremap_seqctr);
-                       // give up core
-                       __unmap_vcore(p, get_vcoreid(p, core_id()));
+                       /* Remove from the online list, add to the yielded list, and unmap
+                        * the vcore, which gives up the core. */
+                       TAILQ_REMOVE(&p->online_vcs, vc, list);
+                       TAILQ_INSERT_HEAD(&p->inactive_vcs, vc, list);
+                       __unmap_vcore(p, vcoreid);
+                       /* Adjust implied resource desires */
                        p->resources[RES_CORES].amt_granted = --(p->procinfo->num_vcores);
                        if (!being_nice)
                                p->resources[RES_CORES].amt_wanted = p->procinfo->num_vcores;
@@ -933,12 +922,9 @@ void __proc_preempt_warn(struct proc *p, uint32_t vcoreid, uint64_t when)
  * care about the mapping (and you should). */
 void __proc_preempt_warnall(struct proc *p, uint64_t when)
 {
-       uint32_t active_vcoreid = 0;
-       for (int i = 0; i < p->procinfo->num_vcores; i++) {
-               active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
-               __proc_preempt_warn(p, active_vcoreid, when);
-               active_vcoreid++;
-       }
+       struct vcore *vc_i;
+       TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+               __proc_preempt_warn(p, vcore2vcoreid(p, vc_i), when);
        /* TODO: consider putting in some lookup place for the alarm to find it.
         * til then, it'll have to scan the vcoremap (O(n) instead of O(m)) */
 }
@@ -963,12 +949,9 @@ bool __proc_preempt_all(struct proc *p)
        /* instead of doing this, we could just preempt_served all possible vcores,
         * and not just the active ones.  We would need to sort out a way to deal
         * with stale preempt_serveds first.  This might be just as fast anyways. */
-       uint32_t active_vcoreid = 0;
-       for (int i = 0; i < p->procinfo->num_vcores; i++) {
-               active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
-               p->procinfo->vcoremap[active_vcoreid].preempt_served = TRUE;
-               active_vcoreid++;
-       }
+       struct vcore *vc_i;
+       TAILQ_FOREACH(vc_i, &p->online_vcs, list)
+               vc_i->preempt_served = TRUE;
        return __proc_take_allcores(p, __preempt, (long)p, 0, 0);
 }
 
@@ -1107,9 +1090,9 @@ struct vcore *vcoreid2vcore(struct proc *p, uint32_t vcoreid)
  *
  * WARNING: You must hold the proc_lock before calling this! */
 bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
-{ TRUSTEDBLOCK
+{
        bool self_ipi_pending = FALSE;
-       uint32_t free_vcoreid = 0;
+       struct vcore *new_vc;
        switch (p->state) {
                case (PROC_RUNNABLE_S):
                case (PROC_RUNNING_S):
@@ -1131,12 +1114,16 @@ bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
                        }
                        // add new items to the vcoremap
                        __seq_start_write(&p->procinfo->coremap_seqctr);
+                       /* TODO: consider bulk preemption */
                        for (int i = 0; i < num; i++) {
-                               // find the next free slot, which should be the next one
-                               free_vcoreid = get_free_vcoreid(p, free_vcoreid);
-                               printd("setting vcore %d to pcore %d\n", free_vcoreid,
+                               new_vc = TAILQ_FIRST(&p->inactive_vcs);
+                               /* there are cases where this isn't true; deal with it later */
+                               assert(new_vc);
+                               printd("setting vcore %d to pcore %d\n", vcore2vcoreid(p, new_vc),
                                       pcorelist[i]);
-                               __map_vcore(p, free_vcoreid, pcorelist[i]);
+                               TAILQ_REMOVE(&p->inactive_vcs, new_vc, list);
+                               TAILQ_INSERT_TAIL(&p->online_vcs, new_vc, list);
+                               __map_vcore(p, vcore2vcoreid(p, new_vc), pcorelist[i]);
                                p->procinfo->num_vcores++;
                        }
                        __seq_end_write(&p->procinfo->coremap_seqctr);
@@ -1147,10 +1134,14 @@ bool __proc_give_cores(struct proc *SAFE p, uint32_t *pcorelist, size_t num)
                        proc_incref(p, num);
                        __seq_start_write(&p->procinfo->coremap_seqctr);
                        for (int i = 0; i < num; i++) {
-                               free_vcoreid = get_free_vcoreid(p, free_vcoreid);
-                               printd("setting vcore %d to pcore %d\n", free_vcoreid,
+                               new_vc = TAILQ_FIRST(&p->inactive_vcs);
+                               /* there are cases where this isn't true; deal with it later */
+                               assert(new_vc);
+                               printd("setting vcore %d to pcore %d\n", vcore2vcoreid(p, new_vc),
                                       pcorelist[i]);
-                               __map_vcore(p, free_vcoreid, pcorelist[i]);
+                               TAILQ_REMOVE(&p->inactive_vcs, new_vc, list);
+                               TAILQ_INSERT_TAIL(&p->online_vcs, new_vc, list);
+                               __map_vcore(p, vcore2vcoreid(p, new_vc), pcorelist[i]);
                                p->procinfo->num_vcores++;
                                send_kernel_message(pcorelist[i], __startcore, (long)p, 0, 0,
                                                    KMSG_ROUTINE);
@@ -1216,6 +1207,11 @@ bool __proc_take_cores(struct proc *p, uint32_t *pcorelist, size_t num,
                // while ugly, this is done to facilitate merging with take_all_cores
                pcoreid = get_pcoreid(p, vcoreid);
                assert(pcoreid == pcorelist[i]);
+               /* Change lists for the vcore.  We do this before either unmapping or
+                * sending the message, so the lists represent what will be very soon
+                * (before we unlock, the messages are in flight). */
+               TAILQ_REMOVE(&p->online_vcs, vcoreid2vcore(p, vcoreid), list);
+               TAILQ_INSERT_HEAD(&p->inactive_vcs, vcoreid2vcore(p, vcoreid), list);
                if (message) {
                        if (pcoreid == core_id())
                                self_ipi_pending = TRUE;
@@ -1244,7 +1240,7 @@ bool __proc_take_cores(struct proc *p, uint32_t *pcorelist, size_t num,
 bool __proc_take_allcores(struct proc *p, amr_t message, long arg0, long arg1,
                           long arg2)
 {
-       uint32_t active_vcoreid = 0, pcoreid;
+       struct vcore *vc_i, *vc_temp;
        bool self_ipi_pending = FALSE;
        switch (p->state) {
                case (PROC_RUNNABLE_M):
@@ -1261,23 +1257,24 @@ bool __proc_take_allcores(struct proc *p, amr_t message, long arg0, long arg1,
        assert(num_idlecores + p->procinfo->num_vcores <= num_cpus); // sanity
        spin_unlock(&idle_lock);
        __seq_start_write(&p->procinfo->coremap_seqctr);
-       for (int i = 0; i < p->procinfo->num_vcores; i++) {
-               // find next active vcore
-               active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
-               pcoreid = get_pcoreid(p, active_vcoreid);
+       TAILQ_FOREACH_SAFE(vc_i, &p->online_vcs, list, vc_temp) {
+               /* Change lists for the vcore.  We do this before either unmapping or
+                * sending the message, so the lists represent what will be very soon
+                * (before we unlock, the messages are in flight). */
+               TAILQ_REMOVE(&p->online_vcs, vc_i, list);
+               TAILQ_INSERT_HEAD(&p->inactive_vcs, vc_i, list);
                if (message) {
-                       if (pcoreid == core_id())
+                       if (vc_i->pcoreid == core_id())
                                self_ipi_pending = TRUE;
-                       send_kernel_message(pcoreid, message, arg0, arg1, arg2,
+                       send_kernel_message(vc_i->pcoreid, message, arg0, arg1, arg2,
                                            KMSG_ROUTINE);
                } else {
                        /* if there was a msg, the vcore is unmapped on the receive side.
                         * o/w, we need to do it here. */
-                       __unmap_vcore(p, active_vcoreid);
+                       __unmap_vcore(p, vcore2vcoreid(p, vc_i));
                }
-               // give the pcore back to the idlecoremap
-               put_idle_core(pcoreid);
-               active_vcoreid++; // for the next loop, skip the one we just used
+               /* give the pcore back to the idlecoremap */
+               put_idle_core(vc_i->pcoreid);
        }
        p->procinfo->num_vcores = 0;
        __seq_end_write(&p->procinfo->coremap_seqctr);
@@ -1387,20 +1384,16 @@ void switch_back(struct proc *new_p, struct proc *old_proc)
  * immediate message. */
 void __proc_tlbshootdown(struct proc *p, uintptr_t start, uintptr_t end)
 {
-       uint32_t active_vcoreid = 0;
+       struct vcore *vc_i;
        switch (p->state) {
                case (PROC_RUNNING_S):
                        tlbflush();
                        break;
                case (PROC_RUNNING_M):
                        /* TODO: (TLB) sanity checks and rounding on the ranges */
-                       for (int i = 0; i < p->procinfo->num_vcores; i++) {
-                               /* find next active vcore */
-                               active_vcoreid = get_busy_vcoreid(p, active_vcoreid);
-                               send_kernel_message(get_pcoreid(p, active_vcoreid),
-                                                   __tlbshootdown, start, end,
+                       TAILQ_FOREACH(vc_i, &p->online_vcs, list) {
+                               send_kernel_message(vc_i->pcoreid, __tlbshootdown, start, end,
                                                    0, KMSG_IMMEDIATE);
-                               active_vcoreid++; /* next loop, skip the one we just used */
                        }
                        break;
                case (PROC_DYING):
@@ -1605,13 +1598,7 @@ void print_proc_info(pid_t pid)
        printk("Flags: 0x%08x\n", p->env_flags);
        printk("CR3(phys): 0x%08x\n", p->env_cr3);
        printk("Num Vcores: %d\n", p->procinfo->num_vcores);
-       printk("Vcoremap (old style):\n");
-       for (int i = 0; i < p->procinfo->num_vcores; i++) {
-               j = get_busy_vcoreid(p, j);
-               printk("\tVcore %d: Pcore %d\n", j, get_pcoreid(p, j));
-               j++;
-       }
-       printk("Vcore Lists:\n----------------------\n");
+       printk("Vcore Lists (may be in flux w/o locking):\n----------------------\n");
        printk("Online:\n");
        TAILQ_FOREACH(vc_i, &p->online_vcs, list)
                printk("\tVcore %d -> Pcore %d\n", vcore2vcoreid(p, vc_i), vc_i->pcoreid);