1 /* Copyright (c) 2015 Google Inc
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * FD taps. Allows the user to receive events when certain things happen to an
6 * FD's underlying device file/qid. */
15 static void tap_min_release(struct kref *kref)
17 struct fd_tap *tap = container_of(kref, struct fd_tap, kref);
22 static void tap_full_release(struct kref *kref)
24 struct fd_tap *tap = container_of(kref, struct fd_tap, kref);
25 devtab[tap->chan->type].tapfd(tap->chan, tap, FDTAP_CMD_REM);
26 tap_min_release(kref);
29 /* Adds a tap with the file/qid of the underlying device for the requested FD.
30 * The FD must be a chan, and the device must support the filter requested.
32 * Returns -1 or some other device-specific non-zero number on failure, 0 on
34 int add_fd_tap(struct proc *p, struct fd_tap_req *tap_req)
36 struct fd_table *fdt = &p->open_files;
46 tap = kzmalloc(sizeof(struct fd_tap), MEM_WAIT);
49 tap->filter = tap_req->filter;
50 tap->ev_q = tap_req->ev_q;
51 tap->ev_id = tap_req->ev_id;
52 tap->data = tap_req->data;
53 if (!is_user_rwaddr(tap->ev_q, sizeof(struct event_queue))) {
54 set_error(EINVAL, "Tap request with bad event_queue %p", tap->ev_q);
58 spin_lock(&fdt->lock);
59 if (fd >= fdt->max_fdset) {
63 if (!GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) {
67 if (!fdt->fd[fd].fd_chan) {
68 set_error(EINVAL, "Can't tap a VFS file");
71 chan = fdt->fd[fd].fd_chan;
72 if (fdt->fd[fd].fd_tap) {
73 set_error(EBUSY, "FD %d already has a tap", fd);
76 if (!devtab[chan->type].tapfd) {
77 set_error(ENOSYS, "Device %s does not handle taps",
78 devtab[chan->type].name);
81 /* need to keep chan alive for our call to the device. someone else
82 * could come in and close the FD and the chan, once we unlock */
85 /* One for the FD table, one for us to keep the removal of *this* tap from
86 * happening until we've attempted to register with the device. */
87 kref_init(&tap->kref, tap_full_release, 2);
88 fdt->fd[fd].fd_tap = tap;
89 /* As soon as we unlock, another thread can come in and remove our old tap
90 * from the table and decref it. Our ref keeps us from removing it yet,
91 * as well as keeps the memory safe. However, a new tap can be installed
92 * and registered with the device before we even attempt to register. The
93 * devices should be able to handle multiple, distinct taps, even if they
94 * happen to have the same {proc, fd} tuple. */
95 spin_unlock(&fdt->lock);
96 /* For refcnting fans, the tap ref is weak/uncounted. We'll protect the
97 * memory and call the device when tap is being released. */
98 ret = devtab[chan->type].tapfd(chan, tap, FDTAP_CMD_ADD);
100 /* we failed, so we need to make sure *our* tap is removed. We haven't
101 * decreffed, so we know our tap pointer is unique. */
102 spin_lock(&fdt->lock);
103 if (fdt->fd[fd].fd_tap == tap) {
104 fdt->fd[fd].fd_tap = 0;
105 /* normally we can't decref a tap while holding a lock, but we
106 * know we have another reference so this won't trigger a release */
107 kref_put(&tap->kref);
109 spin_unlock(&fdt->lock);
110 /* Regardless of whether someone else removed it or not, *we* are the
111 * only ones that know that registration failed and that we shouldn't
112 * remove it. Since we still hold a ref, we can change the release
113 * method to skip the device dereg. */
114 tap->kref.release = tap_min_release;
116 kref_put(&tap->kref);
119 spin_unlock(&fdt->lock);
124 /* Removes the FD tap associated with FD. Returns 0 on success, -1 with
125 * errno/errstr on failure. */
126 int remove_fd_tap(struct proc *p, int fd)
128 struct fd_table *fdt = &p->open_files;
131 spin_lock(&fdt->lock);
132 tap = fdt->fd[fd].fd_tap;
133 fdt->fd[fd].fd_tap = 0;
134 spin_unlock(&fdt->lock);
136 kref_put(&tap->kref);
139 set_error(EBADF, "FD %d was not tapped", fd);
144 /* Fires off tap, with the events of filter having occurred. Returns -1 on
145 * error, though this need a little more thought.
147 * Some callers may require this to not block. */
148 int fire_tap(struct fd_tap *tap, int filter)
151 struct event_msg ev_msg = {0};
152 int fire_filt = tap->filter & filter;
157 /* The process owning the tap could trigger a kernel PF, as with any
158 * send_event() call. Eventually we'll catch that with waserror. */
159 warn("Tap for proc %d, fd %d, threw %s", tap->proc->pid, tap->fd,
164 ev_msg.ev_type = tap->ev_id; /* e.g. CEQ idx */
165 ev_msg.ev_arg2 = fire_filt; /* e.g. CEQ coalesce */
166 ev_msg.ev_arg3 = tap->data; /* e.g. CEQ data */
167 send_event(tap->proc, tap->ev_q, &ev_msg, 0);