1 /* Copyright (c) 2011 The Regents of the University of California
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * Kernel utility functions for sending events and notifications (IPIs) to
20 /* Userspace could give us a vcoreid that causes us to compute a vcpd that is
21 * outside procdata. If we hit UWLIM, then we've gone farther than we should.
22 * We check the vcoreid, instead of the resulting address, to avoid issues like
23 * address wrap-around. */
24 static bool vcoreid_is_safe(uint32_t vcoreid)
26 /* MAX_NUM_VCORES == MAX_NUM_CORES (check procinfo/procdata) */
27 return vcoreid < MAX_NUM_CORES;
30 /* Note these three helpers return the user address of the mbox, not the KVA.
31 * Load current to access this, and it will work for any process. */
32 static struct event_mbox *get_vcpd_mbox_priv(uint32_t vcoreid)
34 return &__procdata.vcore_preempt_data[vcoreid].ev_mbox_private;
37 static struct event_mbox *get_vcpd_mbox_pub(uint32_t vcoreid)
39 return &__procdata.vcore_preempt_data[vcoreid].ev_mbox_public;
42 static struct event_mbox *get_vcpd_mbox(uint32_t vcoreid, int ev_flags)
44 if (ev_flags & EVENT_VCORE_PRIVATE)
45 return get_vcpd_mbox_priv(vcoreid);
47 return get_vcpd_mbox_pub(vcoreid);
50 /* Can we message the vcore? (Will it check its messages). Note this checks
51 * procdata via the user pointer. */
52 static bool can_msg_vcore(uint32_t vcoreid)
54 struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
55 return atomic_read(&vcpd->flags) & VC_CAN_RCV_MSG;
58 /* Says a vcore can be messaged. Only call this once you are sure this is true
59 * (holding the proc_lock, etc). */
60 static void set_vcore_msgable(uint32_t vcoreid)
62 struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
63 atomic_or(&vcpd->flags, VC_CAN_RCV_MSG);
66 /* Posts a message to the mbox, subject to flags. Feel free to send 0 for the
67 * flags if you don't want to give them the option of EVENT_NOMSG (which is what
68 * we do when sending an indirection event). Make sure that if mbox is a user
69 * pointer, that you've checked it *and* have that processes address space
70 * loaded. This can get called with a KVA for mbox. */
71 static void post_ev_msg(struct proc *p, struct event_mbox *mbox,
72 struct event_msg *msg, int ev_flags)
74 printd("[kernel] Sending event type %d to mbox %p\n", msg->ev_type, mbox);
77 /* If they just want a bit (NOMSG), just set the bit */
78 if (ev_flags & EVENT_NOMSG) {
79 SET_BITMASK_BIT_ATOMIC(mbox->ev_bitmap, msg->ev_type);
81 mbox->ev_check_bits = TRUE;
83 send_ucq_msg(&mbox->ev_msgs, p, msg);
87 /* Helper: use this when sending a message to a VCPD mbox. It just posts to the
88 * ev_mbox and sets notif pending. Note this uses a userspace address for the
89 * VCPD (though not a user's pointer). */
90 static void post_vc_msg(struct proc *p, uint32_t vcoreid,
91 struct event_mbox *ev_mbox, struct event_msg *ev_msg,
94 struct preempt_data *vcpd = &__procdata.vcore_preempt_data[vcoreid];
95 post_ev_msg(p, ev_mbox, ev_msg, ev_flags);
96 /* Set notif pending so userspace doesn't miss the message while yielding */
97 wmb(); /* Ensure ev_msg write is before notif_pending */
98 /* proc_notify() also sets this, but the ev_q might not have requested an
99 * IPI, so we have to do it here too. */
100 vcpd->notif_pending = TRUE;
103 /* Helper: will IPI / proc_notify if the flags say so. We also check to make
104 * sure it is mapped (slight optimization) */
105 static void try_notify(struct proc *p, uint32_t vcoreid, int ev_flags)
107 /* Note this is an unlocked-peek at the vcoremap */
108 if ((ev_flags & EVENT_IPI) && vcore_is_mapped(p, vcoreid))
109 proc_notify(p, vcoreid);
112 /* Helper: sends the message and an optional IPI to the vcore. Sends to the
113 * public mbox. This is meant for spammy messages. */
114 static void spam_vcore(struct proc *p, uint32_t vcoreid,
115 struct event_msg *ev_msg, int ev_flags)
117 post_vc_msg(p, vcoreid, get_vcpd_mbox_pub(vcoreid), ev_msg, ev_flags);
118 try_notify(p, vcoreid, ev_flags);
121 /* Attempts to message a vcore that may or may not have VC_CAN_RCV_MSG set. If
122 * so, we'll post the message and the message will eventually get dealt with
123 * (when the vcore runs or when it is preempte-recovered). */
124 static bool try_spam_vcore(struct proc *p, uint32_t vcoreid,
125 struct event_msg *ev_msg, int ev_flags)
127 /* Not sure if we can or not, so check before spamming. Technically, the
128 * only critical part is that we __alert, then check can_alert. */
129 if (can_msg_vcore(vcoreid)) {
130 spam_vcore(p, vcoreid, ev_msg, ev_flags);
131 wrmb(); /* prev write (notif_pending) must come before following reads*/
132 if (can_msg_vcore(vcoreid))
138 /* Helper: will try to message (INDIR/IPI) a list member (lists of vcores). We
139 * use this on the online and bulk_preempted vcore lists. If this succeeds in
140 * alerting a vcore on the list, it'll return TRUE. We need to be careful here,
141 * since we're reading a list that could be concurrently modified. The
142 * important thing is that we can always fail if we're unsure (such as with
143 * lists being temporarily empty). The caller will be able to deal with it via
144 * the ultimate fallback. */
145 static bool spam_list_member(struct vcore_tailq *list, struct proc *p,
146 struct event_msg *ev_msg, int ev_flags)
148 struct vcore *vc, *vc_first;
151 vc = TAILQ_FIRST(list);
152 /* If the list appears empty, we'll bail out (failing) after the loop. */
154 vcoreid = vcore2vcoreid(p, vc);
155 /* post the alert. Not using the try_spam_vcore() helper since I want
156 * something more customized for the lists. */
157 spam_vcore(p, vcoreid, ev_msg, ev_flags);
158 wrmb(); /* prev write (notif_pending) must come before following reads*/
159 /* I used to check can_msg_vcore(vcoreid) here, but that would make
160 * spamming list members unusable for MUST_RUN scenarios.
162 * Regardless, if they are still the first on the list, then they are
163 * still going to get the message. For the online list, proc_yield()
164 * will return them to userspace (where they will get the message)
165 * because __alert_vcore() set notif_pending. For the BP list, they
166 * will either be turned on later, or have a preempt message sent about
169 * We race on list membership (and not exclusively VC_CAN_RCV_MSG, so
170 * that when it fails we can get a new vcore to try (or know WHP there
172 vc_first = TAILQ_FIRST(list);
175 /* At this point, the list has changed and the vcore we tried yielded,
176 * so we try the *new* list head. Track loops for sanity reasons. */
178 warn("Too many (%d) attempts to find a vcore, failing!", loops);
179 return FALSE; /* always safe to fail! */
181 /* Get set up for your attack run! */
187 /* This makes sure ev_msg is sent to some vcore, preferring vcoreid.
189 * One of the goals of SPAM_INDIR (and this func) is to allow processes to yield
190 * cores without fear of losing messages. Even when yielding and getting
191 * preempted, if your message is spammed, it will get to some vcore. If
192 * MUST_RUN is set, it'll get to a running vcore. Messages that you send like
193 * this must be able to handle spurious reads, since more than one vcore is
194 * likely to get the message and handle it.
196 * We try the desired vcore, using VC_CAN_RCV_MSG. Failing that, we'll search
197 * the online and then the bulk_preempted lists. These lists serve as a way to
198 * find likely messageable vcores. spam_list_member() helps us with them,
199 * failing if anything seems to go wrong. At which point we just lock and try
200 * to deal with things. In that scenario, we most likely would need to lock
201 * anyway to wake up the process (was WAITING).
203 * One tricky thing with sending to the bulk_preempt list is that we may want to
204 * send a message about a (bulk) preemption to someone on that list. This works
205 * since a given vcore that was preempted will be removed from that list before
206 * we try to send_event() (in theory, there isn't code that can send that event
207 * yet). Someone else will get the event and wake up the preempted vcore. */
208 static void spam_public_msg(struct proc *p, struct event_msg *ev_msg,
209 uint32_t vcoreid, int ev_flags)
212 if (ev_flags & EVENT_VCORE_MUST_RUN) {
213 /* Could check for waiting and skip these spams, which will fail. Could
214 * also skip trying for vcoreid, and just spam any old online VC. */
215 if (vcore_is_mapped(p, vcoreid)) { /* check, signal, check again */
216 spam_vcore(p, vcoreid, ev_msg, ev_flags);
217 wrmb(); /* notif_pending write must come before following read */
218 if (vcore_is_mapped(p, vcoreid))
221 if (spam_list_member(&p->online_vcs, p, ev_msg, ev_flags))
223 goto ultimate_fallback;
225 /* First, try posting to the desired vcore */
226 if (try_spam_vcore(p, vcoreid, ev_msg, ev_flags))
228 /* If the process is WAITING, let's just jump to the fallback */
229 if (p->state == PROC_WAITING)
230 goto ultimate_fallback;
231 /* If we're here, the desired vcore is unreachable, but the process is
232 * probably RUNNING_M (online_vs) or RUNNABLE_M (bulk preempted or recently
233 * woken up), so we'll need to find another vcore. */
234 if (spam_list_member(&p->online_vcs, p, ev_msg, ev_flags))
236 if (spam_list_member(&p->bulk_preempted_vcs, p, ev_msg, ev_flags))
238 /* Last chance, let's check the head of the inactives. It might be
239 * alertable (the kernel set it earlier due to an event, or it was a
240 * bulk_preempt that didn't restart), and we can avoid grabbing the
242 vc = TAILQ_FIRST(&p->inactive_vcs);
243 if (vc) { /* might be none in rare circumstances */
244 if (try_spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags)) {
245 /* Need to ensure the proc wakes up, but only if it was WAITING.
246 * One way for this to happen is if a normal vcore was preempted
247 * right as another vcore was yielding, and the preempted
248 * message was sent after the last vcore yielded (which caused
249 * us to be WAITING */
250 if (p->state == PROC_WAITING)
251 proc_wakeup(p); /* internally, this double-checks WAITING */
256 /* At this point, we can't find one. This could be due to a (hopefully
257 * rare) weird yield/request storm, or more commonly because the lists were
258 * empty and the process is simply WAITING (yielded all of its vcores and is
259 * waiting on an event). Time for the ultimate fallback: locking. Note
260 * that when we __alert_vcore(), there is a chance we need to mmap, which
261 * grabs the vmr_lock and pte_lock. */
262 spin_lock(&p->proc_lock);
263 if (p->state != PROC_WAITING) {
264 /* We need to check the online and bulk_preempt lists again, now that we are
265 * sure no one is messing with them. If we're WAITING, we can skip
266 * these (or assert they are empty!). */
267 vc = TAILQ_FIRST(&p->online_vcs);
269 /* there's an online vcore, so just alert it (we know it isn't going
270 * anywhere), and return */
271 spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags);
272 spin_unlock(&p->proc_lock);
275 vc = TAILQ_FIRST(&p->bulk_preempted_vcs);
277 /* the process is bulk preempted, similar deal to above */
278 spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags);
279 spin_unlock(&p->proc_lock);
283 /* At this point, we're sure all vcores are yielded, though we might not be
284 * WAITING. Post to the first on the inactive list (which is the one that
285 * will definitely be woken up) */
286 vc = TAILQ_FIRST(&p->inactive_vcs);
288 spam_vcore(p, vcore2vcoreid(p, vc), ev_msg, ev_flags);
289 /* Set the vcore's alertable flag, to short circuit our last ditch effort
291 set_vcore_msgable(vcore2vcoreid(p, vc));
292 /* The first event to catch the process with no online/bp vcores will need
293 * to wake it up. (We could be RUNNABLE_M here if another event already woke
294 * us.) and we didn't get lucky with the penultimate fallback.
295 * proc_wakeup (and __proc_wakeup()) will check for WAITING. */
296 spin_unlock(&p->proc_lock);
301 /* Helper: sends an indirection event for an ev_q, preferring vcoreid */
302 static void send_indir(struct proc *p, struct event_queue *ev_q,
305 struct event_msg local_msg = {0};
306 /* If an alert is already pending and they don't want repeats, just return.
307 * One of the few uses of NOTHROTTLE will be for preempt_msg ev_qs. Ex: an
308 * INDIR was already sent to the preempted vcore, then alert throttling
309 * would stop another vcore from getting the message about the original
311 if (!(ev_q->ev_flags & EVENT_NOTHROTTLE) && (ev_q->ev_alert_pending))
313 /* We'll eventually get an INDIR through, so don't send any more til
314 * userspace toggles this. Regardless of other writers to this flag, we
315 * eventually send an alert that causes userspace to turn throttling off
316 * again (before handling all of the ev_q's events).
318 * This will also squelch IPIs, since there's no reason to send the IPI if
319 * the INDIR is still un-acknowledged. The vcore is either in vcore
320 * context, attempting to deal with the INDIR, or offline. This statement
321 * is probably true. */
322 ev_q->ev_alert_pending = TRUE;
323 wmb(); /* force this write to happen before any event writes */
324 local_msg.ev_type = EV_EVENT;
325 local_msg.ev_arg3 = ev_q;
326 /* If we're not spamming indirs, just send and be done with it.
328 * It's possible that the user does not want to poll their evq and wants an
329 * INDIR, but also doesn't care about sleeping or otherwise not getting the
330 * message right away. The INDIR could sit in the VCPD of a vcore that
331 * doesn't run for a while. Perhaps if the app always made sure VC 0 was
332 * on when it was running at all, and sent the INDIR there. Or there was a
333 * per-vc evq that only needed to be handled when the VC turned on. This
334 * gets at another aspect of INDIRs, other than it's need for "only once"
335 * operation: maybe the mbox type isn't a UCQ (like the VCPD mboxes). */
336 if (!(ev_q->ev_flags & EVENT_SPAM_INDIR)) {
337 spam_vcore(p, vcoreid, &local_msg, ev_q->ev_flags);
340 /* At this point, we actually want to send and spam an INDIR.
341 * This will guarantee the message makes it to some vcore. For flags, we
342 * only want to send flags relevant to spamming messages. */
343 spam_public_msg(p, &local_msg, vcoreid, ev_q->ev_flags & EVENT_SPAM_FLAGS);
346 /* Send an event to ev_q, based on the parameters in ev_q's flag. We don't
347 * accept null ev_qs, since the caller ought to be checking before bothering to
348 * make a msg and send it to the event_q. Vcoreid is who the kernel thinks the
349 * message ought to go to (for IPIs). Appropriate for things like
350 * EV_PREEMPT_PENDING, where we tell the affected vcore. To have the message go
351 * where the kernel suggests, set EVENT_VCORE_APPRO(priate). */
352 void send_event(struct proc *p, struct event_queue *ev_q, struct event_msg *msg,
355 struct proc *old_proc;
356 struct event_mbox *ev_mbox = 0;
357 assert(!in_irq_ctx(&per_cpu_info[core_id()]));
359 if (p->state == PROC_DYING)
361 printd("[kernel] sending msg to proc %p, ev_q %p\n", p, ev_q);
363 warn("[kernel] Null ev_q - kernel code should check before sending!");
366 if (!is_user_rwaddr(ev_q, sizeof(struct event_queue))) {
367 /* Ought to kill them, just warn for now */
368 printk("[kernel] Illegal addr for ev_q\n");
371 /* This should be caught by "future technology" that can tell when the
372 * kernel PFs on the user's behalf. For now, we catch common userspace bugs
373 * (had this happen a few times). */
374 if (!PTE_ADDR(ev_q)) {
375 printk("[kernel] Bad addr %p for ev_q\n", ev_q);
378 /* ev_q is a user pointer, so we need to make sure we're in the right
380 old_proc = switch_to(p);
381 /* If we're an _S, just spam vcore0, and wake up if necessary. */
382 if (!__proc_is_mcp(p)) {
383 spam_vcore(p, 0, msg, ev_q->ev_flags);
384 wrmb(); /* don't let the notif_pending write pass the state read */
385 /* using the same pattern as in spam_public (which can have multiple
386 * unblock callbacks */
387 if (p->state == PROC_WAITING)
391 /* Get the vcoreid that we'll message (if appropriate). For INDIR and
392 * SPAMMING, this is the first choice of a vcore, but other vcores might get
393 * it. Common case is !APPRO and !ROUNDROBIN. Note we are clobbering the
394 * vcoreid parameter. */
395 if (!(ev_q->ev_flags & EVENT_VCORE_APPRO))
396 vcoreid = ev_q->ev_vcore; /* use the ev_q's vcoreid */
397 /* Note that RR overwrites APPRO */
398 if (ev_q->ev_flags & EVENT_ROUNDROBIN) {
399 /* Pick a vcore, round-robin style. Assuming ev_vcore was the previous
400 * one used. Note that round-robin overrides the passed-in vcoreid.
401 * Also note this may be 'wrong' if num_vcores changes. */
402 vcoreid = (ev_q->ev_vcore + 1) % p->procinfo->num_vcores;
403 ev_q->ev_vcore = vcoreid;
405 if (!vcoreid_is_safe(vcoreid)) {
406 /* Ought to kill them, just warn for now */
407 printk("[kernel] Vcoreid %d unsafe! (too big?)\n", vcoreid);
410 /* If we're a SPAM_PUBLIC, they just want us to spam the message. Note we
411 * don't care about the mbox, since it'll go to VCPD public mboxes, and
412 * we'll prefer to send it to whatever vcoreid we determined at this point
413 * (via APPRO or whatever). */
414 if (ev_q->ev_flags & EVENT_SPAM_PUBLIC) {
415 spam_public_msg(p, msg, vcoreid, ev_q->ev_flags & EVENT_SPAM_FLAGS);
418 /* We aren't spamming and we know the default vcore, and now we need to
419 * figure out which mbox to use. If they provided an mbox, we'll use it.
420 * If not, we'll use a VCPD mbox (public or private, depending on the
422 ev_mbox = ev_q->ev_mbox;
424 ev_mbox = get_vcpd_mbox(vcoreid, ev_q->ev_flags);
425 /* At this point, we ought to have the right mbox to send the msg to, and
426 * which vcore to alert (IPI/INDIR) (if applicable). The mbox could be the
427 * vcore's vcpd ev_mbox. */
429 /* This shouldn't happen any more, this is more for sanity's sake */
430 warn("[kernel] ought to have an mbox by now!");
433 /* Even if we're using an mbox in procdata (VCPD), we want a user pointer */
434 if (!is_user_rwaddr(ev_mbox, sizeof(struct event_mbox))) {
435 /* Ought to kill them, just warn for now */
436 printk("[kernel] Illegal addr for ev_mbox\n");
439 /* We used to support no msgs, but quit being lazy and send a 'msg'. If the
440 * ev_q is a NOMSG, we won't actually memcpy or anything, it'll just be a
441 * vehicle for sending the ev_type. */
443 post_ev_msg(p, ev_mbox, msg, ev_q->ev_flags);
444 wmb(); /* ensure ev_msg write is before alerting the vcore */
445 /* Prod/alert a vcore with an IPI or INDIR, if desired. INDIR will also
446 * call try_notify (IPI) later */
447 if (ev_q->ev_flags & EVENT_INDIR) {
448 send_indir(p, ev_q, vcoreid);
450 /* they may want an IPI despite not wanting an INDIR */
451 try_notify(p, vcoreid, ev_q->ev_flags);
455 /* Return to the old address space. */
456 switch_back(p, old_proc);
459 /* Send an event for the kernel event ev_num. These are the "one sided" kernel
460 * initiated events, that require a lookup of the ev_q in procdata. This is
461 * roughly equivalent to the old "proc_notify()" */
462 void send_kernel_event(struct proc *p, struct event_msg *msg, uint32_t vcoreid)
464 uint16_t ev_num = msg->ev_type;
465 assert(ev_num < MAX_NR_EVENT); /* events start at 0 */
466 struct event_queue *ev_q = p->procdata->kernel_evts[ev_num];
467 /* linux would put a rmb_depends() here too, i think. */
469 send_event(p, ev_q, msg, vcoreid);
472 /* Writes the msg to the vcpd mbox of the vcore. If you want the private mbox,
473 * send in the ev_flag EVENT_VCORE_PRIVATE. If not, the message could
474 * be received by other vcores if the given vcore is offline/preempted/etc.
475 * Whatever other flags you pass in will get sent to post_ev_msg. Currently,
476 * the only one that will get looked at is NO_MSG (set a bit).
478 * This needs to load current (switch_to), but doesn't need to care about what
479 * the process wants. Note this isn't commonly used - just the monitor and
480 * sys_self_notify(). */
481 void post_vcore_event(struct proc *p, struct event_msg *msg, uint32_t vcoreid,
484 /* Need to set p as current to post the event */
485 struct per_cpu_info *pcpui = &per_cpu_info[core_id()];
486 struct proc *old_proc = switch_to(p);
487 /* *ev_mbox is the user address of the vcpd mbox */
488 post_vc_msg(p, vcoreid, get_vcpd_mbox(vcoreid, ev_flags), msg, ev_flags);
489 switch_back(p, old_proc);
492 /* Attempts to send a posix signal to the process. If they do not have an ev_q
493 * registered for EV_POSIX_SIGNAL, then nothing will happen. */
494 void send_posix_signal(struct proc *p, int sig_nr)
496 struct event_msg local_msg = {0};
497 local_msg.ev_type = EV_POSIX_SIGNAL;
498 local_msg.ev_arg1 = sig_nr;
499 send_kernel_event(p, &local_msg, 0);