1 /* Copyright (c) 2013 The Regents of the University of California
2 * Copyright (c) 2016 Google Inc.
3 * Barret Rhoden <brho@cs.berkeley.edu>
4 * See LICENSE for details.
6 * #alarm: a device for registering per-process alarms.
8 * Allows a process to set up alarms, which they can tap to get events at a
11 * Every process has their own alarm sets and view of #alarm; gen and friends
12 * look at current's alarmset when it is time to gen or open a file.
14 * To use, first open #alarm/clone, and that gives you an alarm directory aN,
15 * where N is ID of the alarm. The FD you get from clone points to 'ctl.'
17 * 'ctl' takes no commands. You can read it to get the ID. That's it.
19 * 'timer' takes the hex string value (in absolute tsc time) to fire the alarm.
20 * Writing 0 disables the alarm. You can read 'timer' to get the next time it
21 * will fire, in TSC time. 0 means it is disabled. To find out about the timer
22 * firing, put an FD tap on 'timer' for FDTAP_FILT_WRITTEN.
24 * 'period' takes the hex string value (in TSC ticks) for the period of the
25 * alarm. If non-zero, the alarm will rearm when it fires. You can read the
28 * Reading the 'count' file will return the number of times the alarm has
29 * expired since the last read or the last write to 'timer'. If this is 0, then
30 * read() will block or EAGAIN. You cannot write 'count'. You can tap it for
31 * FDTAP_FILT_READABLE.
33 * While each process has a separate view of #alarm, it is possible to post a
34 * chan to Qctl or Qtimer to #srv. If another proc has your Qtimer, it can set
35 * it in the past, thereby triggering an immediate event. More clever than
38 * Notes on refcnting (the trickier parts here):
39 * - the proc_alarms have counted references to their proc
40 * proc won't free til all alarms are closed, which is fine. we close
41 * all files in destroy. if a proc drops a chan in srv, the proc will stay
42 * alive because the alarm is alive - til that chan is closed (srvremove)
44 * other shady ways to keep a chan alive: cd to it! if it is ., we'd
45 * keep a ref around. however, only alarmdir *file* grab refs, not
48 * - proc_alarms are kref'd, since there can be multiple chans per alarm
49 * the only thing that keeps an alarm alive is a chan on a CTL or TIMER (or
50 * other file). when you cloned, you got back an open CTL, which keeps the
51 * alarm (and the dir) alive.
53 * we need to be careful generating krefs, in case alarms are concurrently
54 * released and removed from the lists. just like with procs and pid2proc,
55 * we need to sync with the source of the kref. */
63 #include <sys/queue.h>
71 struct dev alarmdevtab;
73 static char *devname(void)
75 return alarmdevtab.name;
83 #define Qtimer 5 /* Qctl + 1 */
87 /* This paddr/kaddr is a bit dangerous. it'll work so long as we don't need all
88 * 64 bits for a physical address (48 is the current norm on x86_64). */
90 #define QID2A(q) ((struct proc_alarm*)KADDR(((q).path >> ADDR_SHIFT)))
91 #define TYPE(q) ((q).path & ((1 << ADDR_SHIFT) - 1))
92 #define QID(ptr, type) ((PADDR(ptr) << ADDR_SHIFT) | type)
93 extern struct username eve;
95 static void alarm_release(struct kref *kref)
97 struct proc_alarm *a = container_of(kref, struct proc_alarm, kref);
98 struct proc *p = a->proc;
101 spin_lock(&p->alarmset.lock);
102 TAILQ_REMOVE(&p->alarmset.list, a, link);
103 spin_unlock(&p->alarmset.lock);
104 /* When this returns, the alarm has either fired or it never will */
105 unset_alarm(p->alarmset.tchain, &a->a_waiter);
110 static void alarm_fire_taps(struct proc_alarm *a, int filter)
112 struct fd_tap *tap_i;
114 SLIST_FOREACH(tap_i, &a->fd_taps, link)
115 fire_tap(tap_i, filter);
118 static void proc_alarm_handler(struct alarm_waiter *a_waiter)
120 struct proc_alarm *a = container_of(a_waiter, struct proc_alarm,
126 a_waiter->wake_up_time = 0;
128 /* TODO: use an alarm helper, once we switch over to nsec */
129 a_waiter->wake_up_time += a->period;
130 set_alarm(a->proc->alarmset.tchain, a_waiter);
132 __cv_broadcast(&a->cv);
133 /* Fires taps for both Qtimer and Qcount. */
134 alarm_fire_taps(a, FDTAP_FILT_WRITTEN | FDTAP_FILT_READABLE);
138 void devalarm_init(struct proc *p)
140 TAILQ_INIT(&p->alarmset.list);
141 spinlock_init(&p->alarmset.lock);
142 /* Just running all the proc alarms on core 0. */
143 p->alarmset.tchain = &per_cpu_info[0].tchain;
144 p->alarmset.id_counter = 0;
147 static int alarmgen(struct chan *c, char *entry_name, struct dirtab *unused,
148 int unused_nr_dirtab, int s, struct dir *dp)
151 struct proc_alarm *a_i;
152 struct proc *p = current;
154 /* Whether we're in one dir or at the top, .. still takes us to the top.
156 if (s == DEVDOTDOT) {
157 mkqid(&q, Qtopdir, 0, QTDIR);
158 devdir(c, q, devname(), 0, eve.name, 0555, dp);
161 switch (TYPE(c->qid)) {
163 /* Generate elements for the top level dir. We support a clone
164 * and alarm dirs at the top level */
166 mkqid(&q, Qclone, 0, QTFILE);
167 devdir(c, q, "clone", 0, eve.name, 0666, dp);
170 s--; /* 1 -> 0th element, 2 -> 1st element, etc */
171 /* Gets the s-th element (0 index)
173 * I would like to take advantage of the state machine and our
174 * previous answer to get the sth element of the list. We can
175 * get at our previous run of gen from dp (struct dir), and use
176 * that to get the next item. I'd like to do something like:
178 * if (dp->qid.path >> ADDR_SHIFT)
179 * a_i = TAILQ_NEXT(QID2A(dp->qid), link);
181 * Dev would give us a 0'd dp path on the first run, so if we
182 * have a path, we know we're on an iterative run. However, the
183 * problem is that we could have lost the element dp refers to
184 * (QID2A(dp->qid)) since our previous run, so we can't even
185 * access that memory to check for refcnts or anything. We need
186 * a new model for how gen works (probably a gen_start and
187 * gen_stop devop, passed as parameters to devwalk), so that we
188 * can have some invariants between gen runs.
190 * Til then, we're stuck with arrays like in #ip (though we can
191 * use Linux style fdsets) or lousy O(n^2) linked lists (like
194 * Note that we won't always start a gen loop with s == 0
195 * (devdirread, for instance) */
196 spin_lock(&p->alarmset.lock);
197 TAILQ_FOREACH(a_i, &p->alarmset.list, link) {
201 /* As soon as we unlock, someone could free a_i */
203 spin_unlock(&p->alarmset.lock);
206 snprintf(get_cur_genbuf(), GENBUF_SZ, "a%d", a_i->id);
207 mkqid(&q, QID(a_i, Qalarmdir), 0, QTDIR);
208 devdir(c, q, get_cur_genbuf(), 0, eve.name, 0555, dp);
209 spin_unlock(&p->alarmset.lock);
212 /* Gen the contents of the alarm dirs */
213 s += Qctl; /* first time through, start on Qctl */
216 mkqid(&q, QID(QID2A(c->qid), Qctl), 0, QTFILE);
217 devdir(c, q, "ctl", 0, eve.name, 0666, dp);
220 mkqid(&q, QID(QID2A(c->qid), Qtimer), 0, QTFILE);
221 devdir(c, q, "timer", 0, eve.name, 0666, dp);
224 mkqid(&q, QID(QID2A(c->qid), Qperiod), 0, QTFILE);
225 devdir(c, q, "period", 0, eve.name, 0666, dp);
228 mkqid(&q, QID(QID2A(c->qid), Qcount), 0, QTFILE);
229 devdir(c, q, "count", 0, eve.name, 0666, dp);
233 /* Need to also provide a direct hit for Qclone and all other
234 * files (at all levels of the hierarchy). Every file is both
235 * generated (via the s increments in their respective
236 * directories) and directly gen-able. devstat() will call gen
237 * with a specific path in the qid. In these cases, we make a
238 * dir for whatever they are asking for. Note the qid stays the
239 * same. I think this is what the old plan9 comments above
240 * devgen were talking about for (ii).
242 * We don't need to do this for the directories - devstat will
243 * look for the a directory by path and fail. Then it will
244 * manually build the stat output (check the -1 case in
247 devdir(c, c->qid, "clone", 0, eve.name, 0666, dp);
250 devdir(c, c->qid, "ctl", 0, eve.name, 0666, dp);
253 devdir(c, c->qid, "timer", 0, eve.name, 0666, dp);
256 devdir(c, c->qid, "period", 0, eve.name, 0666, dp);
259 devdir(c, c->qid, "count", 0, eve.name, 0666, dp);
265 static void alarminit(void)
269 static struct chan *alarmattach(char *spec)
271 struct chan *c = devattach(devname(), spec);
273 mkqid(&c->qid, Qtopdir, 0, QTDIR);
277 static struct walkqid *alarmwalk(struct chan *c, struct chan *nc, char **name,
280 return devwalk(c, nc, name, nname, 0, 0, alarmgen);
283 static size_t alarmstat(struct chan *c, uint8_t *db, size_t n)
285 return devstat(c, db, n, 0, 0, alarmgen);
288 /* It shouldn't matter if p = current is DYING. We'll eventually fail to insert
289 * the open chan into p's fd table, then decref the chan. */
290 static struct chan *alarmopen(struct chan *c, int omode)
292 struct proc *p = current;
293 struct proc_alarm *a, *a_i;
294 switch (TYPE(c->qid)) {
297 if (omode & O_REMCLO)
298 error(EPERM, ERROR_FIXME);
300 error(EISDIR, ERROR_FIXME);
303 a = kzmalloc(sizeof(struct proc_alarm), MEM_WAIT);
304 kref_init(&a->kref, alarm_release, 1);
305 SLIST_INIT(&a->fd_taps);
307 qlock_init(&a->qlock);
308 init_awaiter(&a->a_waiter, proc_alarm_handler);
309 spin_lock(&p->alarmset.lock);
310 a->id = p->alarmset.id_counter++;
313 TAILQ_INSERT_TAIL(&p->alarmset.list, a, link);
314 spin_unlock(&p->alarmset.lock);
315 mkqid(&c->qid, QID(a, Qctl), 0, QTFILE);
321 /* the purpose of opening is to hold a kref on the proc_alarm */
324 /* this isn't a valid pointer yet, since our chan doesn't have a
325 * ref. since the time that walk gave our chan the qid, the
326 * chan could have been closed, and the alarm decref'd and
327 * freed. the qid is essentially an uncounted reference, and we
328 * need to go to the source to attempt to get a real ref.
329 * Unfortunately, this is another scan of the list, same as
331 spin_lock(&p->alarmset.lock);
332 TAILQ_FOREACH(a_i, &p->alarmset.list, link) {
334 assert(a->proc == current);
335 /* it's still possible we're not getting the
336 * ref, racing with the release method */
337 if (!kref_get_not_zero(&a->kref, 1)) {
338 /* lost the race; error out later */
344 spin_unlock(&p->alarmset.lock);
347 "Unable to open alarm, concurrent closing");
350 c->mode = openmode(omode);
351 /* Assumes c is unique (can't be closed concurrently */
357 static void alarmclose(struct chan *c)
359 /* There are more closes than opens. For instance, sysstat doesn't
360 * open, but it will close the chan it got from namec. We only want to
361 * clean up/decref chans that were actually open. */
362 if (!(c->flag & COPEN))
364 switch (TYPE(c->qid)) {
369 kref_put(&QID2A(c->qid)->kref);
374 /* Helper for Qcount to encapsulate timerfd. */
375 static long read_qcount(struct chan *c, void *ubuf, size_t n)
378 struct proc_alarm *a = QID2A(c->qid);
379 struct cv_lookup_elm cle;
380 unsigned long old_count;
382 if (n > sizeof(old_count))
383 error(EINVAL, "timerfd buffer is too small (%llu)", n);
384 /* TODO: have easily abortable CVs that don't require this mechanism. */
386 __reg_abortable_cv(&cle, &a->cv);
389 dereg_abortable_cv(&cle);
393 if (c->flag & O_NONBLOCK)
394 error(EAGAIN, "#alarm count was 0");
395 if (should_abort(&cle))
396 error(EINTR, "syscall aborted");
399 old_count = a->count;
402 dereg_abortable_cv(&cle);
404 if (copy_to_user(ubuf, &old_count, sizeof(old_count)))
405 error(EFAULT, "timerfd copy_to_user failed");
406 return sizeof(old_count);
409 static size_t alarmread(struct chan *c, void *ubuf, size_t n, off64_t offset)
411 struct proc_alarm *p_alarm;
413 switch (TYPE(c->qid)) {
416 return devdirread(c, ubuf, n, 0, 0, alarmgen);
418 p_alarm = QID2A(c->qid);
419 /* simple reads from p_alarm shouldn't need a lock */
420 return readnum(offset, ubuf, n, p_alarm->id, NUMSIZE32);
422 p_alarm = QID2A(c->qid);
423 return readnum(offset, ubuf, n, p_alarm->a_waiter.wake_up_time,
426 p_alarm = QID2A(c->qid);
427 return readnum(offset, ubuf, n, p_alarm->period, NUMSIZE64);
429 return read_qcount(c, ubuf, n); /* ignore offset */
431 panic("Bad QID %p in devalarm", c->qid.path);
436 /* Helper, sets the procalarm to hexval (abs TSC ticks). 0 disarms. */
437 static void set_proc_alarm(struct proc_alarm *a, uint64_t hexval)
439 /* Due to how we have to maintain 'count', we need to strictly account
440 * for the firings of the alarm. Easiest thing is to disarm it, reset
441 * everything, then rearm it. Note that if someone is blocked on count
442 * = 0, they may still be blocked until the next time the alarm fires.
444 * unset waits on the handler, which grabs the cv lock, so we don't grab
445 * the cv lock. However, we still need to protect ourselves from
446 * multiple setters trying to run this at once. Unset actually can
447 * handle being called concurrently, but alarm setters can't, nor can it
448 * handle the unsets and sets getting out of sync. For instance, two
449 * unsets followed by two sets would be a bug. Likewise, setting the
450 * awaiter value while it is on a tchain is a bug. The qlock prevents
453 unset_alarm(a->proc->alarmset.tchain, &a->a_waiter);
457 set_awaiter_abs(&a->a_waiter, hexval);
458 set_alarm(a->proc->alarmset.tchain, &a->a_waiter);
464 /* Note that in read and write we have an open chan, which means we have an
465 * active kref on the p_alarm. Also note that we make no assumptions about
466 * current here - we find the proc (and the tchain) via the ref stored in the
468 static size_t alarmwrite(struct chan *c, void *ubuf, size_t n, off64_t unused)
470 struct proc_alarm *p_alarm;
472 switch (TYPE(c->qid)) {
477 error(EPERM, ERROR_FIXME);
479 set_proc_alarm(QID2A(c->qid), strtoul_from_ubuf(ubuf, n, 16));
482 p_alarm = QID2A(c->qid);
483 /* racing with the handler which checks the val repeatedly */
484 cv_lock(&p_alarm->cv);
485 p_alarm->period = strtoul_from_ubuf(ubuf, n, 16);
486 cv_unlock(&p_alarm->cv);
489 panic("Bad QID %p in devalarm", c->qid.path);
494 /* We use the same tap list, regardless of Qtimer or Qcount */
495 static int tap_alarm(struct proc_alarm *a, struct fd_tap *tap, int cmd,
500 if (tap->filter & ~legal_filter) {
501 set_error(ENOSYS, "Unsupported #%s tap %p, must be %p",
502 devname(), tap->filter, legal_filter);
507 case (FDTAP_CMD_ADD):
508 SLIST_INSERT_HEAD(&a->fd_taps, tap, link);
511 case (FDTAP_CMD_REM):
512 SLIST_REMOVE(&a->fd_taps, tap, fd_tap, link);
516 set_error(ENOSYS, "Unsupported #%s tap command %p",
524 static int alarm_tapfd(struct chan *c, struct fd_tap *tap, int cmd)
526 struct proc_alarm *a = QID2A(c->qid);
528 /* We don't actually support HANGUP, but epoll implies it. */
529 #define ALARM_LEGAL_TIMER_TAPS (FDTAP_FILT_WRITTEN | FDTAP_FILT_HANGUP)
530 #define ALARM_LEGAL_COUNT_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
532 switch (TYPE(c->qid)) {
534 return tap_alarm(a, tap, cmd, ALARM_LEGAL_TIMER_TAPS);
536 return tap_alarm(a, tap, cmd, ALARM_LEGAL_COUNT_TAPS);
538 set_error(ENOSYS, "Can't tap #%s file type %d", devname(),
544 static char *alarm_chaninfo(struct chan *ch, char *ret, size_t ret_l)
546 struct proc_alarm *a;
549 switch (TYPE(ch->qid)) {
555 ts = tsc2timespec(a->a_waiter.wake_up_time);
557 "Id %d, %s, expires [%7d.%09d] (%p), period %llu, count %llu",
559 SLIST_EMPTY(&a->fd_taps) ? "untapped" : "tapped",
560 ts.tv_sec, ts.tv_nsec, a->a_waiter.wake_up_time,
561 a->period, a->count);
564 return devchaninfo(ch, ret, ret_l);
569 struct dev alarmdevtab __devtab = {
574 .shutdown = devshutdown,
575 .attach = alarmattach,
588 .chaninfo = alarm_chaninfo,
589 .tapfd = alarm_tapfd,