1 /* Copyright (c) 2015 Google Inc
2 * Barret Rhoden <brho@cs.berkeley.edu>
3 * See LICENSE for details.
5 * FD taps. Allows the user to receive events when certain things happen to an
6 * FD's underlying device file/qid. */
16 static void tap_min_release(struct kref *kref)
18 struct fd_tap *tap = container_of(kref, struct fd_tap, kref);
23 static void tap_full_release(struct kref *kref)
25 struct fd_tap *tap = container_of(kref, struct fd_tap, kref);
26 devtab[tap->chan->type].tapfd(tap->chan, tap, FDTAP_CMD_REM);
27 tap_min_release(kref);
30 /* Adds a tap with the file/qid of the underlying device for the requested FD.
31 * The FD must be a chan, and the device must support the filter requested.
33 * Returns -1 or some other device-specific non-zero number on failure, 0 on
35 int add_fd_tap(struct proc *p, struct fd_tap_req *tap_req)
37 struct fd_table *fdt = &p->open_files;
47 tap = kzmalloc(sizeof(struct fd_tap), MEM_WAIT);
50 tap->filter = tap_req->filter;
51 tap->ev_q = tap_req->ev_q;
52 tap->ev_id = tap_req->ev_id;
53 tap->data = tap_req->data;
54 if (!is_user_rwaddr(tap->ev_q, sizeof(struct event_queue))) {
55 set_error(EINVAL, "Tap request with bad event_queue %p", tap->ev_q);
59 spin_lock(&fdt->lock);
60 if (fd >= fdt->max_fdset) {
64 if (!GET_BITMASK_BIT(fdt->open_fds->fds_bits, fd)) {
68 if (!fdt->fd[fd].fd_chan) {
69 set_error(EINVAL, "Can't tap a VFS file");
72 chan = fdt->fd[fd].fd_chan;
73 if (fdt->fd[fd].fd_tap) {
74 set_error(EBUSY, "FD %d already has a tap", fd);
77 if (!devtab[chan->type].tapfd) {
78 set_error(ENOSYS, "Device %s does not handle taps",
79 devtab[chan->type].name);
82 /* need to keep chan alive for our call to the device. someone else
83 * could come in and close the FD and the chan, once we unlock */
86 /* One for the FD table, one for us to keep the removal of *this* tap from
87 * happening until we've attempted to register with the device. */
88 kref_init(&tap->kref, tap_full_release, 2);
89 fdt->fd[fd].fd_tap = tap;
90 /* As soon as we unlock, another thread can come in and remove our old tap
91 * from the table and decref it. Our ref keeps us from removing it yet,
92 * as well as keeps the memory safe. However, a new tap can be installed
93 * and registered with the device before we even attempt to register. The
94 * devices should be able to handle multiple, distinct taps, even if they
95 * happen to have the same {proc, fd} tuple. */
96 spin_unlock(&fdt->lock);
97 /* For refcnting fans, the tap ref is weak/uncounted. We'll protect the
98 * memory and call the device when tap is being released. */
99 ret = devtab[chan->type].tapfd(chan, tap, FDTAP_CMD_ADD);
101 /* we failed, so we need to make sure *our* tap is removed. We haven't
102 * decreffed, so we know our tap pointer is unique. */
103 spin_lock(&fdt->lock);
104 if (fdt->fd[fd].fd_tap == tap) {
105 fdt->fd[fd].fd_tap = 0;
106 /* normally we can't decref a tap while holding a lock, but we
107 * know we have another reference so this won't trigger a release */
108 kref_put(&tap->kref);
110 spin_unlock(&fdt->lock);
111 /* Regardless of whether someone else removed it or not, *we* are the
112 * only ones that know that registration failed and that we shouldn't
113 * remove it. Since we still hold a ref, we can change the release
114 * method to skip the device dereg. */
115 tap->kref.release = tap_min_release;
117 kref_put(&tap->kref);
120 spin_unlock(&fdt->lock);
125 /* Removes the FD tap associated with FD. Returns 0 on success, -1 with
126 * errno/errstr on failure. */
127 int remove_fd_tap(struct proc *p, int fd)
129 struct fd_table *fdt = &p->open_files;
132 spin_lock(&fdt->lock);
133 tap = fdt->fd[fd].fd_tap;
134 fdt->fd[fd].fd_tap = 0;
135 spin_unlock(&fdt->lock);
137 kref_put(&tap->kref);
140 set_error(EBADF, "FD %d was not tapped", fd);
145 /* Fires off tap, with the events of filter having occurred. Returns -1 on
146 * error, though this need a little more thought.
148 * Some callers may require this to not block. */
149 int fire_tap(struct fd_tap *tap, int filter)
152 struct event_msg ev_msg = {0};
153 int fire_filt = tap->filter & filter;
158 /* The process owning the tap could trigger a kernel PF, as with any
159 * send_event() call. Eventually we'll catch that with waserror. */
160 warn("Tap for proc %d, fd %d, threw %s", tap->proc->pid, tap->fd,
165 ev_msg.ev_type = tap->ev_id; /* e.g. CEQ idx */
166 ev_msg.ev_arg2 = fire_filt; /* e.g. CEQ coalesce */
167 ev_msg.ev_arg3 = tap->data; /* e.g. CEQ data */
168 send_event(tap->proc, tap->ev_q, &ev_msg, 0);