qio: Track the amount of bytes read
[akaros.git] / kern / drivers / dev / pipe.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev pipedevtab;
44
45 static char *devname(void)
46 {
47         return pipedevtab.name;
48 }
49
50 typedef struct Pipe Pipe;
51 struct Pipe {
52         qlock_t qlock;
53         Pipe *next;
54         struct kref ref;
55         uint32_t path;
56         struct queue *q[2];
57         int qref[2];
58         struct dirtab *pipedir;
59         char *user;
60         struct fdtap_slist data_taps;
61         spinlock_t tap_lock;
62 };
63
64 static struct {
65         spinlock_t lock;
66         uint32_t path;
67         int pipeqsize;
68 } pipealloc;
69
70 enum {
71         Qdir,
72         Qctl,
73         Qdata0,
74         Qdata1,
75 };
76
77 static
78 struct dirtab pipedir[] = {
79         {".", {Qdir, 0, QTDIR}, 0, DMDIR | 0500},
80         {"ctl", {Qctl}, 0, 0660},
81         {"data", {Qdata0}, 0, 0660},
82         {"data1", {Qdata1}, 0, 0660},
83 };
84
85 static void freepipe(Pipe * p)
86 {
87         if (p != NULL) {
88                 kfree(p->user);
89                 kfree(p->q[0]);
90                 kfree(p->q[1]);
91                 kfree(p->pipedir);
92                 kfree(p);
93         }
94 }
95
96 static void pipe_release(struct kref *kref)
97 {
98         Pipe *pipe = container_of(kref, Pipe, ref);
99         freepipe(pipe);
100 }
101
102 static void pipeinit(void)
103 {
104         pipealloc.pipeqsize = 32 * 1024;
105 }
106
107 /*
108  *  create a pipe, no streams are created until an open
109  */
110 static struct chan *pipeattach(char *spec)
111 {
112         ERRSTACK(2);
113         Pipe *p;
114         struct chan *c;
115
116         c = devattach(devname(), spec);
117         p = kzmalloc(sizeof(Pipe), 0);
118         if (p == 0)
119                 error(ENOMEM, ERROR_FIXME);
120         if (waserror()) {
121                 freepipe(p);
122                 nexterror();
123         }
124         p->pipedir = kzmalloc(sizeof(pipedir), 0);
125         if (p->pipedir == 0)
126                 error(ENOMEM, ERROR_FIXME);
127         memmove(p->pipedir, pipedir, sizeof(pipedir));
128         kstrdup(&p->user, current->user.name);
129         kref_init(&p->ref, pipe_release, 1);
130         qlock_init(&p->qlock);
131
132         p->q[0] = qopen(pipealloc.pipeqsize, Qcoalesce, 0, 0);
133         if (p->q[0] == 0)
134                 error(ENOMEM, ERROR_FIXME);
135         p->q[1] = qopen(pipealloc.pipeqsize, Qcoalesce, 0, 0);
136         if (p->q[1] == 0)
137                 error(ENOMEM, ERROR_FIXME);
138         poperror();
139
140         spin_lock(&(&pipealloc)->lock);
141         p->path = ++pipealloc.path;
142         spin_unlock(&(&pipealloc)->lock);
143
144         c->qid.path = NETQID(2 * p->path, Qdir);
145         c->qid.vers = 0;
146         c->qid.type = QTDIR;
147         c->aux = p;
148         c->dev = 0;
149
150         /* taps. */
151         SLIST_INIT(&p->data_taps);      /* already = 0; set to be futureproof */
152         spinlock_init(&p->tap_lock);
153         return c;
154 }
155
156 static int
157 pipegen(struct chan *c, char *unused,
158                 struct dirtab *tab, int ntab, int i, struct dir *dp)
159 {
160         int id, len;
161         struct qid qid;
162         Pipe *p;
163
164         if (i == DEVDOTDOT) {
165                 devdir(c, c->qid, devname(), 0, eve.name, 0555, dp);
166                 return 1;
167         }
168         i++;    /* skip . */
169         if (tab == 0 || i >= ntab)
170                 return -1;
171         tab += i;
172         p = c->aux;
173         switch (NETTYPE(tab->qid.path)) {
174                 case Qdata0:
175                         len = qlen(p->q[0]);
176                         break;
177                 case Qdata1:
178                         len = qlen(p->q[1]);
179                         break;
180                 default:
181                         len = tab->length;
182                         break;
183         }
184         id = NETID(c->qid.path);
185         qid.path = NETQID(id, tab->qid.path);
186         qid.vers = 0;
187         qid.type = QTFILE;
188         devdir(c, qid, tab->name, len, eve.name, tab->perm, dp);
189         return 1;
190 }
191
192 static struct walkqid *pipewalk(struct chan *c, struct chan *nc, char **name,
193                                                                 int nname)
194 {
195         struct walkqid *wq;
196         Pipe *p;
197
198         p = c->aux;
199         wq = devwalk(c, nc, name, nname, p->pipedir, ARRAY_SIZE(pipedir), pipegen);
200         if (wq != NULL && wq->clone != NULL && wq->clone != c) {
201                 qlock(&p->qlock);
202                 kref_get(&p->ref, 1);
203                 if (c->flag & COPEN) {
204                         switch (NETTYPE(c->qid.path)) {
205                                 case Qdata0:
206                                         p->qref[0]++;
207                                         break;
208                                 case Qdata1:
209                                         p->qref[1]++;
210                                         break;
211                         }
212                 }
213                 qunlock(&p->qlock);
214         }
215         return wq;
216 }
217
218 static int pipestat(struct chan *c, uint8_t * db, int n)
219 {
220         Pipe *p;
221         struct dir dir;
222         struct dirtab *tab;
223         int perm;
224         int type = NETTYPE(c->qid.path);
225
226         p = c->aux;
227         tab = p->pipedir;
228
229         switch (type) {
230                 case Qdir:
231                 case Qctl:
232                         devdir(c, c->qid, tab[type].name, tab[type].length, eve.name,
233                                tab[type].perm, &dir);
234                         break;
235                 case Qdata0:
236                         perm = tab[1].perm;
237                         perm |= qreadable(p->q[0]) ? DMREADABLE : 0;
238                         perm |= qwritable(p->q[1]) ? DMWRITABLE : 0;
239                         devdir(c, c->qid, tab[1].name, qlen(p->q[0]), eve.name, perm, &dir);
240                         break;
241                 case Qdata1:
242                         perm = tab[2].perm;
243                         perm |= qreadable(p->q[1]) ? DMREADABLE : 0;
244                         perm |= qwritable(p->q[0]) ? DMWRITABLE : 0;
245                         devdir(c, c->qid, tab[2].name, qlen(p->q[1]), eve.name, perm, &dir);
246                         break;
247                 default:
248                         panic("pipestat");
249         }
250         n = convD2M(&dir, db, n);
251         if (n < BIT16SZ)
252                 error(ENODATA, ERROR_FIXME);
253         return n;
254 }
255
256 /*
257  *  if the stream doesn't exist, create it
258  */
259 static struct chan *pipeopen(struct chan *c, int omode)
260 {
261         ERRSTACK(2);
262         Pipe *p;
263
264         if (c->qid.type & QTDIR) {
265                 if (omode & O_WRITE)
266                         error(EINVAL, "Can only open directories O_READ, mode is %o oct",
267                                   omode);
268                 c->mode = openmode(omode);
269                 c->flag |= COPEN;
270                 c->offset = 0;
271                 return c;
272         }
273
274         openmode(omode);        /* check it */
275
276         p = c->aux;
277         qlock(&p->qlock);
278         if (waserror()) {
279                 qunlock(&p->qlock);
280                 nexterror();
281         }
282         switch (NETTYPE(c->qid.path)) {
283                 case Qdata0:
284                         devpermcheck(p->user, p->pipedir[1].perm, omode);
285                         p->qref[0]++;
286                         break;
287                 case Qdata1:
288                         devpermcheck(p->user, p->pipedir[2].perm, omode);
289                         p->qref[1]++;
290                         break;
291         }
292         poperror();
293         qunlock(&p->qlock);
294
295         c->mode = openmode(omode);
296         c->flag |= COPEN;
297         c->offset = 0;
298         c->iounit = qiomaxatomic;
299         return c;
300 }
301
302 static void pipeclose(struct chan *c)
303 {
304         Pipe *p;
305
306         p = c->aux;
307         qlock(&p->qlock);
308
309         if (c->flag & COPEN) {
310                 /*
311                  *  closing either side hangs up the stream
312                  */
313                 switch (NETTYPE(c->qid.path)) {
314                         case Qdata0:
315                                 p->qref[0]--;
316                                 if (p->qref[0] == 0) {
317                                         qhangup(p->q[1], 0);
318                                         qclose(p->q[0]);
319                                 }
320                                 break;
321                         case Qdata1:
322                                 p->qref[1]--;
323                                 if (p->qref[1] == 0) {
324                                         qhangup(p->q[0], 0);
325                                         qclose(p->q[1]);
326                                 }
327                                 break;
328                 }
329         }
330
331         /*
332          *  if both sides are closed, they are reusable
333          */
334         if (p->qref[0] == 0 && p->qref[1] == 0) {
335                 qreopen(p->q[0]);
336                 qreopen(p->q[1]);
337         }
338
339         qunlock(&p->qlock);
340         /*
341          *  free the structure on last close
342          */
343         kref_put(&p->ref);
344 }
345
346 static long piperead(struct chan *c, void *va, long n, int64_t offset)
347 {
348         Pipe *p;
349
350         p = c->aux;
351
352         switch (NETTYPE(c->qid.path)) {
353                 case Qdir:
354                         return devdirread(c, va, n, p->pipedir, ARRAY_SIZE(pipedir),
355                                                           pipegen);
356                 case Qctl:
357                         return readnum(offset, va, n, p->path, NUMSIZE32);
358                 case Qdata0:
359                         if (c->flag & O_NONBLOCK)
360                                 return qread_nonblock(p->q[0], va, n);
361                         else
362                                 return qread(p->q[0], va, n);
363                 case Qdata1:
364                         if (c->flag & O_NONBLOCK)
365                                 return qread_nonblock(p->q[1], va, n);
366                         else
367                                 return qread(p->q[1], va, n);
368                 default:
369                         panic("piperead");
370         }
371         return -1;      /* not reached */
372 }
373
374 static struct block *pipebread(struct chan *c, long n, uint32_t offset)
375 {
376         Pipe *p;
377
378         p = c->aux;
379
380         switch (NETTYPE(c->qid.path)) {
381                 case Qdata0:
382                         if (c->flag & O_NONBLOCK)
383                                 return qbread_nonblock(p->q[0], n);
384                         else
385                                 return qbread(p->q[0], n);
386                 case Qdata1:
387                         if (c->flag & O_NONBLOCK)
388                                 return qbread_nonblock(p->q[1], n);
389                         else
390                                 return qbread(p->q[1], n);
391         }
392
393         return devbread(c, n, offset);
394 }
395
396 /*
397  *  A write to a closed pipe causes an EPIPE error to be thrown.
398  */
399 static long pipewrite(struct chan *c, void *va, long n, int64_t ignored)
400 {
401         ERRSTACK(1);
402         Pipe *p;
403         struct cmdbuf *cb;
404
405         p = c->aux;
406
407         switch (NETTYPE(c->qid.path)) {
408                 case Qctl:
409                         cb = parsecmd(va, n);
410                         if (waserror()) {
411                                 kfree(cb);
412                                 nexterror();
413                         }
414                         if (cb->nf < 1)
415                                 error(EFAIL, "short control request");
416                         if (strcmp(cb->f[0], "oneblock") == 0) {
417                                 q_toggle_qmsg(p->q[0], TRUE);
418                                 q_toggle_qcoalesce(p->q[0], TRUE);
419                                 q_toggle_qmsg(p->q[1], TRUE);
420                                 q_toggle_qcoalesce(p->q[1], TRUE);
421                         } else {
422                                 error(EFAIL, "unknown control request");
423                         }
424                         kfree(cb);
425                         poperror();
426                         break;
427
428                 case Qdata0:
429                         if (c->flag & O_NONBLOCK)
430                                 n = qwrite_nonblock(p->q[1], va, n);
431                         else
432                                 n = qwrite(p->q[1], va, n);
433                         break;
434
435                 case Qdata1:
436                         if (c->flag & O_NONBLOCK)
437                                 n = qwrite_nonblock(p->q[0], va, n);
438                         else
439                                 n = qwrite(p->q[0], va, n);
440                         break;
441
442                 default:
443                         panic("pipewrite");
444         }
445
446         return n;
447 }
448
449 static long pipebwrite(struct chan *c, struct block *bp, uint32_t offset)
450 {
451         long n;
452         Pipe *p;
453         //Prog *r;
454
455         p = c->aux;
456         switch (NETTYPE(c->qid.path)) {
457                 case Qctl:
458                         return devbwrite(c, bp, offset);
459                 case Qdata0:
460                         if (c->flag & O_NONBLOCK)
461                                 n = qbwrite_nonblock(p->q[1], bp);
462                         else
463                                 n = qbwrite(p->q[1], bp);
464                         break;
465
466                 case Qdata1:
467                         if (c->flag & O_NONBLOCK)
468                                 n = qbwrite_nonblock(p->q[0], bp);
469                         else
470                                 n = qbwrite(p->q[0], bp);
471                         break;
472
473                 default:
474                         n = 0;
475                         panic("pipebwrite");
476         }
477
478         return n;
479 }
480
481 static int pipewstat(struct chan *c, uint8_t *dp, int n)
482 {
483         ERRSTACK(2);
484         struct dir *d;
485         Pipe *p;
486         int d1;
487
488         if (c->qid.type & QTDIR)
489                 error(EPERM, ERROR_FIXME);
490         p = c->aux;
491         if (strcmp(current->user.name, p->user) != 0)
492                 error(EPERM, ERROR_FIXME);
493         d = kzmalloc(sizeof(*d) + n, 0);
494         if (waserror()) {
495                 kfree(d);
496                 nexterror();
497         }
498         n = convM2D(dp, n, d, (char *)&d[1]);
499         if (n == 0)
500                 error(ENODATA, ERROR_FIXME);
501         d1 = NETTYPE(c->qid.path) == Qdata1;
502         if (!emptystr(d->name)) {
503                 validwstatname(d->name);
504                 if (strlen(d->name) >= KNAMELEN)
505                         error(ENAMETOOLONG, ERROR_FIXME);
506                 if (strncmp(p->pipedir[1 + !d1].name, d->name, KNAMELEN) == 0)
507                         error(EEXIST, ERROR_FIXME);
508                 strlcpy(p->pipedir[1 + d1].name, d->name, KNAMELEN);
509         }
510         if (d->mode != -1)
511                 p->pipedir[d1 + 1].perm = d->mode & 0777;
512         poperror();
513         kfree(d);
514         return n;
515 }
516
517 static char *pipechaninfo(struct chan *chan, char *ret, size_t ret_l)
518 {
519         Pipe *p = chan->aux;
520
521         switch (NETTYPE(chan->qid.path)) {
522         case Qdir:
523                 snprintf(ret, ret_l, "Qdir, ID %d", p->path);
524                 break;
525         case Qctl:
526                 snprintf(ret, ret_l, "Qctl, ID %d", p->path);
527                 break;
528         case Qdata0:
529                 snprintf(ret, ret_l,
530                          "Qdata%d, ID %d, %s, rq len %d, wq len %d, total read %llu",
531                          0, p->path,
532                          SLIST_EMPTY(&p->data_taps) ? "untapped" : "tapped",
533                          qlen(p->q[0]),
534                          qlen(p->q[1]), q_bytes_read(p->q[0]));
535                 break;
536         case Qdata1:
537                 snprintf(ret, ret_l,
538                          "Qdata%d, ID %d, %s, rq len %d, wq len %d, total read %llu",
539                          1, p->path,
540                          SLIST_EMPTY(&p->data_taps) ? "untapped" : "tapped",
541                          qlen(p->q[1]),
542                          qlen(p->q[0]), q_bytes_read(p->q[1]));
543                 break;
544         default:
545                 ret = "Unknown type";
546                 break;
547         }
548         return ret;
549 }
550
551 /* We pass the pipe as data.  The pipe will outlive any potential qio callbacks.
552  * Meaning, we don't need to worry about the pipe disappearing if we're in here.
553  * If we're in here, then the q exists, which means the pipe exists.
554  *
555  * However, the chans do not necessarily exist.  The taps keep the chans around.
556  * So we only know which chan we're firing when we look at an individual tap. */
557 static void pipe_wake_cb(struct queue *q, void *data, int filter)
558 {
559         Pipe *p = (Pipe*)data;
560         struct fd_tap *tap_i;
561         struct chan *chan;
562
563         spin_lock(&p->tap_lock);
564         SLIST_FOREACH(tap_i, &p->data_taps, link) {
565                 chan = tap_i->chan;
566                 /* Depending which chan did the tapping, we'll care about different
567                  * filters on different qs.  For instance, if we tapped Qdata0, then we
568                  * only care about readables on q[0], writables on q[1], and hangups on
569                  * either.  More precisely, we don't care about writables on q[0] or
570                  * readables on q[1].
571                  *
572                  * Note the *tap's* filter might differ from the CB's filter.  The CB
573                  * could be for read|write|hangup on q[1], with a Qdata0 tap for just
574                  * read.  We don't want to just pass the CB filt directly to fire_tap,
575                  * since that would pass the CB's read on q[1] to the tap and fire.  The
576                  * user would think q[0] was readable.  This is why I mask out the CB
577                  * filter events that we know they don't want. */
578                 switch (NETTYPE(chan->qid.path)) {
579                 case Qdata0:
580                         if (q == p->q[0])
581                                 filter &= ~FDTAP_FILT_WRITABLE;
582                         else
583                                 filter &= ~FDTAP_FILT_READABLE;
584                         break;
585                 case Qdata1:
586                         if (q == p->q[1])
587                                 filter &= ~FDTAP_FILT_WRITABLE;
588                         else
589                                 filter &= ~FDTAP_FILT_READABLE;
590                         break;
591                 default:
592                         panic("Shouldn't be able to tap pipe qid %p", chan->qid.path);
593                 }
594                 fire_tap(tap_i, filter);
595         }
596         spin_unlock(&p->tap_lock);
597 }
598
599 static int pipetapfd(struct chan *chan, struct fd_tap *tap, int cmd)
600 {
601         int ret;
602         Pipe *p;
603
604         p = chan->aux;
605 #define DEVPIPE_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
606                                  FDTAP_FILT_HANGUP | FDTAP_FILT_ERROR)
607
608         switch (NETTYPE(chan->qid.path)) {
609         case Qdata0:
610         case Qdata1:
611                 if (tap->filter & ~DEVPIPE_LEGAL_DATA_TAPS) {
612                         set_errno(ENOSYS);
613                         set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
614                                    tap->filter, DEVPIPE_LEGAL_DATA_TAPS);
615                         return -1;
616                 }
617                 spin_lock(&p->tap_lock);
618                 switch (cmd) {
619                 case (FDTAP_CMD_ADD):
620                         if (SLIST_EMPTY(&p->data_taps)) {
621                                 qio_set_wake_cb(p->q[0], pipe_wake_cb, p);
622                                 qio_set_wake_cb(p->q[1], pipe_wake_cb, p);
623                         }
624                         SLIST_INSERT_HEAD(&p->data_taps, tap, link);
625                         ret = 0;
626                         break;
627                 case (FDTAP_CMD_REM):
628                         SLIST_REMOVE(&p->data_taps, tap, fd_tap, link);
629                         if (SLIST_EMPTY(&p->data_taps)) {
630                                 qio_set_wake_cb(p->q[0], 0, p);
631                                 qio_set_wake_cb(p->q[1], 0, p);
632                         }
633                         ret = 0;
634                         break;
635                 default:
636                         set_errno(ENOSYS);
637                         set_errstr("Unsupported #%s data tap command %p", devname(), cmd);
638                         ret = -1;
639                 }
640                 spin_unlock(&p->tap_lock);
641                 return ret;
642         default:
643                 set_errno(ENOSYS);
644                 set_errstr("Can't tap #%s file type %d", devname(),
645                            NETTYPE(chan->qid.path));
646                 return -1;
647         }
648 }
649
650 struct dev pipedevtab __devtab = {
651         .name = "pipe",
652
653         .reset = devreset,
654         .init = pipeinit,
655         .shutdown = devshutdown,
656         .attach = pipeattach,
657         .walk = pipewalk,
658         .stat = pipestat,
659         .open = pipeopen,
660         .create = devcreate,
661         .close = pipeclose,
662         .read = piperead,
663         .bread = pipebread,
664         .write = pipewrite,
665         .bwrite = pipebwrite,
666         .remove = devremove,
667         .wstat = pipewstat,
668         .power = devpower,
669         .chaninfo = pipechaninfo,
670         .tapfd = pipetapfd,
671 };