Use a "one block at a time" policy for snoop queues
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qndb,
55         Qiproute,
56         Qiprouter,
57         Qipselftab,
58         Qlog,
59
60         Qprotodir,      /* directory for a protocol */
61         Qprotobase,
62         Qclone = Qprotobase,
63         Qstats,
64
65         Qconvdir,       /* directory for a conversation */
66         Qconvbase,
67         Qctl = Qconvbase,
68         Qdata,
69         Qerr,
70         Qlisten,
71         Qlocal,
72         Qremote,
73         Qstatus,
74         Qsnoop,
75
76         Logtype = 5,
77         Masktype = (1 << Logtype) - 1,
78         Logconv = 12,
79         Maskconv = (1 << Logconv) - 1,
80         Shiftconv = Logtype,
81         Logproto = 8,
82         Maskproto = (1 << Logproto) - 1,
83         Shiftproto = Logtype + Logconv,
84
85         Nfs = 32,
86 };
87 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
88 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
89 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
90 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
91 static char network[] = "network";
92
93 qlock_t fslock;
94 struct Fs *ipfs[Nfs];                   /* attached fs's */
95 struct queue *qlog;
96
97 extern void nullmediumlink(void);
98 extern void pktmediumlink(void);
99 extern char *eve;
100 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
101 static void closeconv(struct conv *);
102
103 static struct conv *chan2conv(struct chan *chan)
104 {
105         /* That's a lot of pointers to get to the conv! */
106         return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
107 }
108
109 static inline int founddevdir(struct chan *c, struct qid q, char *n,
110                                                           int64_t length, char *user, long perm,
111                                                           struct dir *db)
112 {
113         devdir(c, q, n, length, user, perm, db);
114         return 1;
115 }
116
117 static int topdirgen(struct chan *c, struct dir *dp)
118 {
119         struct qid q;
120         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
121         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
122         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
123 }
124
125
126 static int ip3gen(struct chan *c, int i, struct dir *dp)
127 {
128         struct qid q;
129         struct conv *cv;
130         char *p;
131         int perm;
132
133         cv = chan2conv(c);
134         if (cv->owner == NULL)
135                 kstrdup(&cv->owner, eve);
136         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
137
138         switch (i) {
139                 default:
140                         return -1;
141                 case Qctl:
142                         return founddevdir(c, q, "ctl", 0,
143                                                    cv->owner, cv->perm, dp);
144                 case Qdata:
145                         perm = cv->perm;
146                         perm |= qreadable(cv->rq) ? DMREADABLE : 0;
147                         perm |= qwritable(cv->wq) ? DMWRITABLE : 0;
148                         return founddevdir(c, q, "data", qlen(cv->rq),
149                                                            cv->owner, perm, dp);
150                 case Qerr:
151                         perm = cv->perm;
152                         perm |= qreadable(cv->eq) ? DMREADABLE : 0;
153                         return founddevdir(c, q, "err", qlen(cv->eq),
154                                                            cv->owner, perm, dp);
155                 case Qlisten:
156                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
157                 case Qlocal:
158                         p = "local";
159                         break;
160                 case Qremote:
161                         p = "remote";
162                         break;
163                 case Qsnoop:
164                         if (strcmp(cv->p->name, "ipifc") != 0)
165                                 return -1;
166                         perm = 0400;
167                         perm |= qreadable(cv->sq) ? DMREADABLE : 0;
168                         return founddevdir(c, q, "snoop", qlen(cv->sq),
169                                                            cv->owner, perm, dp);
170                 case Qstatus:
171                         p = "status";
172                         break;
173         }
174         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
175 }
176
177 static int ip2gen(struct chan *c, int i, struct dir *dp)
178 {
179         struct qid q;
180         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
181         switch (i) {
182                 case Qclone:
183                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
184                 case Qstats:
185                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
186         }
187         return -1;
188 }
189
190 static int ip1gen(struct chan *c, int i, struct dir *dp)
191 {
192         struct qid q;
193         char *p;
194         int prot;
195         int len = 0;
196         struct Fs *f;
197         extern uint32_t kerndate;
198
199         f = ipfs[c->dev];
200
201         prot = 0666;
202         mkqid(&q, QID(0, 0, i), 0, QTFILE);
203         switch (i) {
204                 default:
205                         return -1;
206                 case Qarp:
207                         p = "arp";
208                         break;
209                 case Qndb:
210                         p = "ndb";
211                         len = strlen(f->ndb);
212                         q.vers = f->ndbvers;
213                         break;
214                 case Qiproute:
215                         p = "iproute";
216                         break;
217                 case Qipselftab:
218                         p = "ipselftab";
219                         prot = 0444;
220                         break;
221                 case Qiprouter:
222                         p = "iprouter";
223                         break;
224                 case Qlog:
225                         p = "log";
226                         break;
227         }
228         devdir(c, q, p, len, network, prot, dp);
229         if (i == Qndb && f->ndbmtime > kerndate)
230                 dp->mtime = f->ndbmtime;
231         return 1;
232 }
233
234 static int
235 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
236           int s, struct dir *dp)
237 {
238         struct qid q;
239         struct conv *cv;
240         struct Fs *f;
241
242         f = ipfs[c->dev];
243
244         switch (TYPE(c->qid)) {
245                 case Qtopdir:
246                         if (s == DEVDOTDOT)
247                                 return topdirgen(c, dp);
248                         if (s < f->np) {
249                                 if (f->p[s]->connect == NULL)
250                                         return 0;       /* protocol with no user interface */
251                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
252                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
253                         }
254                         s -= f->np;
255                         return ip1gen(c, s + Qtopbase, dp);
256                 case Qarp:
257                 case Qndb:
258                 case Qlog:
259                 case Qiproute:
260                 case Qiprouter:
261                 case Qipselftab:
262                         return ip1gen(c, TYPE(c->qid), dp);
263                 case Qprotodir:
264                         if (s == DEVDOTDOT)
265                                 return topdirgen(c, dp);
266                         else if (s < f->p[PROTO(c->qid)]->ac) {
267                                 cv = f->p[PROTO(c->qid)]->conv[s];
268                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
269                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
270                                 return
271                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
272                         }
273                         s -= f->p[PROTO(c->qid)]->ac;
274                         return ip2gen(c, s + Qprotobase, dp);
275                 case Qclone:
276                 case Qstats:
277                         return ip2gen(c, TYPE(c->qid), dp);
278                 case Qconvdir:
279                         if (s == DEVDOTDOT) {
280                                 s = PROTO(c->qid);
281                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
282                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
283                                 return 1;
284                         }
285                         return ip3gen(c, s + Qconvbase, dp);
286                 case Qctl:
287                 case Qdata:
288                 case Qerr:
289                 case Qlisten:
290                 case Qlocal:
291                 case Qremote:
292                 case Qstatus:
293                 case Qsnoop:
294                         return ip3gen(c, TYPE(c->qid), dp);
295         }
296         return -1;
297 }
298
299 static void ipinit(void)
300 {
301         qlock_init(&fslock);
302         nullmediumlink();
303         pktmediumlink();
304 /* if only
305         fmtinstall('i', eipfmt);
306         fmtinstall('I', eipfmt);
307         fmtinstall('E', eipfmt);
308         fmtinstall('V', eipfmt);
309         fmtinstall('M', eipfmt);
310 */
311 }
312
313 static void ipreset(void)
314 {
315 }
316
317 static struct Fs *ipgetfs(int dev)
318 {
319         extern void (*ipprotoinit[]) (struct Fs *);
320         struct Fs *f;
321         int i;
322
323         if (dev >= Nfs)
324                 return NULL;
325
326         qlock(&fslock);
327         if (ipfs[dev] == NULL) {
328                 f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
329                 rwinit(&f->rwlock);
330                 qlock_init(&f->iprouter.qlock);
331                 ip_init(f);
332                 arpinit(f);
333                 netloginit(f);
334                 for (i = 0; ipprotoinit[i]; i++)
335                         ipprotoinit[i] (f);
336                 f->dev = dev;
337                 ipfs[dev] = f;
338         }
339         qunlock(&fslock);
340
341         return ipfs[dev];
342 }
343
344 struct IPaux *newipaux(char *owner, char *tag)
345 {
346         struct IPaux *a;
347         int n;
348
349         a = kzmalloc(sizeof(*a), 0);
350         kstrdup(&a->owner, owner);
351         memset(a->tag, ' ', sizeof(a->tag));
352         n = strlen(tag);
353         if (n > sizeof(a->tag))
354                 n = sizeof(a->tag);
355         memmove(a->tag, tag, n);
356         return a;
357 }
358
359 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
360
361 static struct chan *ipattach(char *spec)
362 {
363         struct chan *c;
364         int dev;
365
366         dev = atoi(spec);
367         if (dev >= Nfs)
368                 error(EFAIL, "bad specification");
369
370         ipgetfs(dev);
371         c = devattach(devname(), spec);
372         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
373         c->dev = dev;
374
375         c->aux = newipaux(commonuser(), "none");
376
377         return c;
378 }
379
380 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
381                                                           int nname)
382 {
383         struct IPaux *a = c->aux;
384         struct walkqid *w;
385
386         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
387         if (w != NULL && w->clone != NULL)
388                 w->clone->aux = newipaux(a->owner, a->tag);
389         return w;
390 }
391
392 static int ipstat(struct chan *c, uint8_t * db, int n)
393 {
394         return devstat(c, db, n, NULL, 0, ipgen);
395 }
396
397 static int should_wake(void *arg)
398 {
399         struct conv *cv = arg;
400         /* signal that the conv is closed */
401         if (qisclosed(cv->rq))
402                 return TRUE;
403         return cv->incall != NULL;
404 }
405
406 static struct chan *ipopen(struct chan *c, int omode)
407 {
408         ERRSTACK(2);
409         struct conv *cv, *nc;
410         struct Proto *p;
411         int perm;
412         struct Fs *f;
413
414         /* perm is a lone rwx, not the rwx------ from the conversion */
415         perm = omode_to_rwx(omode) >> 6;
416
417         f = ipfs[c->dev];
418
419         switch (TYPE(c->qid)) {
420                 default:
421                         break;
422                 case Qndb:
423                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
424                                 error(EPERM, ERROR_FIXME);
425                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
426                                 f->ndb[0] = 0;
427                         break;
428                 case Qlog:
429                         netlogopen(f);
430                         break;
431                 case Qiprouter:
432                         iprouteropen(f);
433                         break;
434                 case Qiproute:
435                         break;
436                 case Qtopdir:
437                 case Qprotodir:
438                 case Qconvdir:
439                 case Qstatus:
440                 case Qremote:
441                 case Qlocal:
442                 case Qstats:
443                 case Qipselftab:
444                         if (omode & O_WRITE)
445                                 error(EPERM, ERROR_FIXME);
446                         break;
447                 case Qsnoop:
448                         if (omode & O_WRITE)
449                                 error(EPERM, ERROR_FIXME);
450                         /* might be racy.  note the lack of a proto lock, unlike Qdata */
451                         p = f->p[PROTO(c->qid)];
452                         cv = p->conv[CONV(c->qid)];
453                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
454                                 error(EPERM, ERROR_FIXME);
455                         atomic_inc(&cv->snoopers);
456                         break;
457                 case Qclone:
458                         p = f->p[PROTO(c->qid)];
459                         qlock(&p->qlock);
460                         if (waserror()) {
461                                 qunlock(&p->qlock);
462                                 nexterror();
463                         }
464                         cv = Fsprotoclone(p, ATTACHER(c));
465                         qunlock(&p->qlock);
466                         poperror();
467                         if (cv == NULL) {
468                                 error(ENODEV, ERROR_FIXME);
469                                 break;
470                         }
471                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
472                         break;
473                 case Qdata:
474                 case Qctl:
475                 case Qerr:
476                         p = f->p[PROTO(c->qid)];
477                         qlock(&p->qlock);
478                         cv = p->conv[CONV(c->qid)];
479                         qlock(&cv->qlock);
480                         if (waserror()) {
481                                 qunlock(&cv->qlock);
482                                 qunlock(&p->qlock);
483                                 nexterror();
484                         }
485                         if ((perm & (cv->perm >> 6)) != perm) {
486                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
487                                         error(EPERM, ERROR_FIXME);
488                                 if ((perm & cv->perm) != perm)
489                                         error(EPERM, ERROR_FIXME);
490
491                         }
492                         cv->inuse++;
493                         if (cv->inuse == 1) {
494                                 kstrdup(&cv->owner, ATTACHER(c));
495                                 cv->perm = 0660;
496                         }
497                         qunlock(&cv->qlock);
498                         qunlock(&p->qlock);
499                         poperror();
500                         break;
501                 case Qlisten:
502                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
503                         /* No permissions or Announce checks required.  We'll see if that's
504                          * a good idea or not. (the perm check would do nothing, as is,
505                          * since an O_PATH perm is 0).
506                          *
507                          * But we probably want to incref to keep the conversation around
508                          * until this FD/chan is closed.  #ip is a little weird in that
509                          * objects never really go away (high water mark for convs, you can
510                          * always find them in the ns).  I think it is possible to
511                          * namec/ipgen a chan, then have that conv close, then have that
512                          * chan be opened.  You can probably do this with a data file. */
513                         if (omode & O_PATH) {
514                                 qlock(&cv->qlock);
515                                 cv->inuse++;
516                                 qunlock(&cv->qlock);
517                                 break;
518                         }
519                         if ((perm & (cv->perm >> 6)) != perm) {
520                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
521                                         error(EPERM, ERROR_FIXME);
522                                 if ((perm & cv->perm) != perm)
523                                         error(EPERM, ERROR_FIXME);
524
525                         }
526
527                         if (cv->state != Announced)
528                                 error(EFAIL, "not announced");
529
530                         if (waserror()) {
531                                 closeconv(cv);
532                                 nexterror();
533                         }
534                         qlock(&cv->qlock);
535                         cv->inuse++;
536                         qunlock(&cv->qlock);
537
538                         nc = NULL;
539                         while (nc == NULL) {
540                                 /* give up if we got a hangup */
541                                 if (qisclosed(cv->rq))
542                                         error(EFAIL, "listen hungup");
543
544                                 qlock(&cv->listenq);
545                                 if (waserror()) {
546                                         qunlock(&cv->listenq);
547                                         nexterror();
548                                 }
549                                 /* we can peek at incall without grabbing the cv qlock.  if
550                                  * anything is there, it'll remain there until we dequeue it.
551                                  * no one else can, since we hold the listenq lock */
552                                 if ((c->flag & O_NONBLOCK) && !cv->incall)
553                                         error(EAGAIN, "listen queue empty");
554                                 /* wait for a connect */
555                                 rendez_sleep(&cv->listenr, should_wake, cv);
556
557                                 /* if there is a concurrent hangup, they will hold the qlock
558                                  * until the hangup is complete, including closing the cv->rq */
559                                 qlock(&cv->qlock);
560                                 nc = cv->incall;
561                                 if (nc != NULL) {
562                                         cv->incall = nc->next;
563                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
564                                         kstrdup(&cv->owner, ATTACHER(c));
565                                 }
566                                 qunlock(&cv->qlock);
567
568                                 qunlock(&cv->listenq);
569                                 poperror();
570                         }
571                         closeconv(cv);
572                         poperror();
573                         break;
574         }
575         c->mode = openmode(omode);
576         c->flag |= COPEN;
577         c->offset = 0;
578         return c;
579 }
580
581 static int ipwstat(struct chan *c, uint8_t * dp, int n)
582 {
583         ERRSTACK(2);
584         struct dir *d;
585         struct conv *cv;
586         struct Fs *f;
587         struct Proto *p;
588
589         f = ipfs[c->dev];
590         switch (TYPE(c->qid)) {
591                 default:
592                         error(EPERM, ERROR_FIXME);
593                         break;
594                 case Qctl:
595                 case Qdata:
596                         break;
597         }
598
599         d = kzmalloc(sizeof(*d) + n, 0);
600         if (waserror()) {
601                 kfree(d);
602                 nexterror();
603         }
604         n = convM2D(dp, n, d, (char *)&d[1]);
605         if (n == 0)
606                 error(ENODATA, ERROR_FIXME);
607         p = f->p[PROTO(c->qid)];
608         cv = p->conv[CONV(c->qid)];
609         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
610                 error(EPERM, ERROR_FIXME);
611         if (!emptystr(d->uid))
612                 kstrdup(&cv->owner, d->uid);
613         if (d->mode != ~0UL)
614                 cv->perm = d->mode & 0777;
615         poperror();
616         kfree(d);
617         return n;
618 }
619
620 /* Should be able to handle any file type chan. Feel free to extend it. */
621 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
622 {
623         struct conv *conv;
624         struct Proto *proto;
625         char *p;
626         struct Fs *f;
627
628         f = ipfs[ch->dev];
629
630         switch (TYPE(ch->qid)) {
631                 default:
632                         ret = "Unknown type";
633                         break;
634                 case Qdata:
635                         proto = f->p[PROTO(ch->qid)];
636                         conv = proto->conv[CONV(ch->qid)];
637                         snprintf(ret, ret_l, "Qdata, %s, proto %s, conv idx %d, rq len %d, wq len %d",
638                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
639                                  proto->name, conv->x, qlen(conv->rq), qlen(conv->wq));
640                         break;
641                 case Qarp:
642                         ret = "Qarp";
643                         break;
644                 case Qiproute:
645                         ret = "Qiproute";
646                         break;
647                 case Qlisten:
648                         proto = f->p[PROTO(ch->qid)];
649                         conv = proto->conv[CONV(ch->qid)];
650                         snprintf(ret, ret_l, "Qlisten, %s proto %s, conv idx %d",
651                                  SLIST_EMPTY(&conv->listen_taps) ? "untapped" : "tapped",
652                                  proto->name, conv->x);
653                         break;
654                 case Qlog:
655                         ret = "Qlog";
656                         break;
657                 case Qndb:
658                         ret = "Qndb";
659                         break;
660                 case Qctl:
661                         proto = f->p[PROTO(ch->qid)];
662                         conv = proto->conv[CONV(ch->qid)];
663                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
664                                          conv->x);
665                         break;
666         }
667         return ret;
668 }
669
670 static void closeconv(struct conv *cv)
671 {
672         ERRSTACK(1);
673         struct conv *nc;
674         struct Ipmulti *mp;
675
676         qlock(&cv->qlock);
677
678         if (--cv->inuse > 0) {
679                 qunlock(&cv->qlock);
680                 return;
681         }
682         if (waserror()) {
683                 qunlock(&cv->qlock);
684                 nexterror();
685         }
686         /* close all incoming calls since no listen will ever happen */
687         for (nc = cv->incall; nc; nc = cv->incall) {
688                 cv->incall = nc->next;
689                 closeconv(nc);
690         }
691         cv->incall = NULL;
692
693         kstrdup(&cv->owner, network);
694         cv->perm = 0660;
695
696         while ((mp = cv->multi) != NULL)
697                 ipifcremmulti(cv, mp->ma, mp->ia);
698
699         cv->r = NULL;
700         cv->rgen = 0;
701         cv->p->close(cv);
702         cv->state = Idle;
703         qunlock(&cv->qlock);
704         poperror();
705 }
706
707 static void ipclose(struct chan *c)
708 {
709         struct Fs *f;
710
711         f = ipfs[c->dev];
712         switch (TYPE(c->qid)) {
713                 default:
714                         break;
715                 case Qlog:
716                         if (c->flag & COPEN)
717                                 netlogclose(f);
718                         break;
719                 case Qiprouter:
720                         if (c->flag & COPEN)
721                                 iprouterclose(f);
722                         break;
723                 case Qdata:
724                 case Qctl:
725                 case Qerr:
726                 case Qlisten:
727                         if (c->flag & COPEN)
728                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
729                         break;
730                 case Qsnoop:
731                         if (c->flag & COPEN)
732                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
733                         break;
734         }
735         kfree(((struct IPaux *)c->aux)->owner);
736         kfree(c->aux);
737 }
738
739 enum {
740         Statelen = 32 * 1024,
741 };
742
743 static long ipread(struct chan *ch, void *a, long n, int64_t off)
744 {
745         struct conv *c;
746         struct Proto *x;
747         char *buf, *p;
748         long rv;
749         struct Fs *f;
750         uint32_t offset = off;
751         size_t sofar;
752
753         f = ipfs[ch->dev];
754
755         p = a;
756         switch (TYPE(ch->qid)) {
757                 default:
758                         error(EPERM, ERROR_FIXME);
759                 case Qtopdir:
760                 case Qprotodir:
761                 case Qconvdir:
762                         return devdirread(ch, a, n, 0, 0, ipgen);
763                 case Qarp:
764                         return arpread(f->arp, a, offset, n);
765                 case Qndb:
766                         return readstr(offset, a, n, f->ndb);
767                 case Qiproute:
768                         return routeread(f, a, offset, n);
769                 case Qiprouter:
770                         return iprouterread(f, a, n);
771                 case Qipselftab:
772                         return ipselftabread(f, a, offset, n);
773                 case Qlog:
774                         return netlogread(f, a, offset, n);
775                 case Qctl:
776                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
777                         return readstr(offset, p, n, get_cur_genbuf());
778                 case Qremote:
779                         buf = kzmalloc(Statelen, 0);
780                         x = f->p[PROTO(ch->qid)];
781                         c = x->conv[CONV(ch->qid)];
782                         if (x->remote == NULL) {
783                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
784                         } else {
785                                 (*x->remote) (c, buf, Statelen - 2);
786                         }
787                         rv = readstr(offset, p, n, buf);
788                         kfree(buf);
789                         return rv;
790                 case Qlocal:
791                         buf = kzmalloc(Statelen, 0);
792                         x = f->p[PROTO(ch->qid)];
793                         c = x->conv[CONV(ch->qid)];
794                         if (x->local == NULL) {
795                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
796                         } else {
797                                 (*x->local) (c, buf, Statelen - 2);
798                         }
799                         rv = readstr(offset, p, n, buf);
800                         kfree(buf);
801                         return rv;
802                 case Qstatus:
803                         /* this all is a bit screwed up since the size of some state's
804                          * buffers will change from one invocation to another.  a reader
805                          * will come in and read the entire buffer.  then it will come again
806                          * and read from the next offset, expecting EOF.  if the buffer
807                          * changed sizes, it'll reprint the end of the buffer slightly. */
808                         buf = kzmalloc(Statelen, 0);
809                         x = f->p[PROTO(ch->qid)];
810                         c = x->conv[CONV(ch->qid)];
811                         sofar = (*x->state) (c, buf, Statelen - 2);
812                         rv = readstr(offset, p, n, buf);
813                         kfree(buf);
814                         return rv;
815                 case Qdata:
816                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
817                         if (ch->flag & O_NONBLOCK)
818                                 return qread_nonblock(c->rq, a, n);
819                         else
820                                 return qread(c->rq, a, n);
821                 case Qerr:
822                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
823                         return qread(c->eq, a, n);
824                 case Qsnoop:
825                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
826                         return qread(c->sq, a, n);
827                 case Qstats:
828                         x = f->p[PROTO(ch->qid)];
829                         if (x->stats == NULL)
830                                 error(EFAIL, "stats not implemented");
831                         buf = kzmalloc(Statelen, 0);
832                         (*x->stats) (x, buf, Statelen);
833                         rv = readstr(offset, p, n, buf);
834                         kfree(buf);
835                         return rv;
836         }
837 }
838
839 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
840 {
841         struct conv *c;
842
843         switch (TYPE(ch->qid)) {
844                 case Qdata:
845                         c = chan2conv(ch);
846                         if (ch->flag & O_NONBLOCK)
847                                 return qbread_nonblock(c->rq, n);
848                         else
849                                 return qbread(c->rq, n);
850                 default:
851                         return devbread(ch, n, offset);
852         }
853 }
854
855 /*
856  *  set local address to be that of the ifc closest to remote address
857  */
858 static void setladdr(struct conv *c)
859 {
860         findlocalip(c->p->f, c->laddr, c->raddr);
861 }
862
863 /*
864  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
865  */
866 static void setluniqueport(struct conv *c, int lport)
867 {
868         struct Proto *p;
869         struct conv *xp;
870         int x;
871
872         p = c->p;
873
874         qlock(&p->qlock);
875         for (x = 0; x < p->nc; x++) {
876                 xp = p->conv[x];
877                 if (xp == NULL)
878                         break;
879                 if (xp == c)
880                         continue;
881                 if ((xp->state == Connected || xp->state == Announced)
882                         && xp->lport == lport
883                         && xp->rport == c->rport
884                         && ipcmp(xp->raddr, c->raddr) == 0
885                         && ipcmp(xp->laddr, c->laddr) == 0) {
886                         qunlock(&p->qlock);
887                         error(EFAIL, "address in use");
888                 }
889         }
890         c->lport = lport;
891         qunlock(&p->qlock);
892 }
893
894 /*
895  *  pick a local port and set it
896  */
897 static void setlport(struct conv *c)
898 {
899         struct Proto *p;
900         uint16_t *pp;
901         int x, found;
902
903         p = c->p;
904         if (c->restricted)
905                 pp = &p->nextrport;
906         else
907                 pp = &p->nextport;
908         qlock(&p->qlock);
909         for (;; (*pp)++) {
910                 /*
911                  * Fsproto initialises p->nextport to 0 and the restricted
912                  * ports (p->nextrport) to 600.
913                  * Restricted ports must lie between 600 and 1024.
914                  * For the initial condition or if the unrestricted port number
915                  * has wrapped round, select a random port between 5000 and 1<<15
916                  * to start at.
917                  */
918                 if (c->restricted) {
919                         if (*pp >= 1024)
920                                 *pp = 600;
921                 } else
922                         while (*pp < 5000)
923                                 urandom_read(pp, sizeof(*pp));
924
925                 found = 0;
926                 for (x = 0; x < p->nc; x++) {
927                         if (p->conv[x] == NULL)
928                                 break;
929                         if (p->conv[x]->lport == *pp) {
930                                 found = 1;
931                                 break;
932                         }
933                 }
934                 if (!found)
935                         break;
936         }
937         c->lport = (*pp)++;
938         qunlock(&p->qlock);
939 }
940
941 /*
942  *  set a local address and port from a string of the form
943  *      [address!]port[!r]
944  */
945 static void setladdrport(struct conv *c, char *str, int announcing)
946 {
947         char *p;
948         uint16_t lport;
949         uint8_t addr[IPaddrlen];
950
951         /*
952          *  ignore restricted part if it exists.  it's
953          *  meaningless on local ports.
954          */
955         p = strchr(str, '!');
956         if (p != NULL) {
957                 *p++ = 0;
958                 if (strcmp(p, "r") == 0)
959                         p = NULL;
960         }
961
962         c->lport = 0;
963         if (p == NULL) {
964                 if (announcing)
965                         ipmove(c->laddr, IPnoaddr);
966                 else
967                         setladdr(c);
968                 p = str;
969         } else {
970                 if (strcmp(str, "*") == 0)
971                         ipmove(c->laddr, IPnoaddr);
972                 else {
973                         parseip(addr, str);
974                         if (ipforme(c->p->f, addr))
975                                 ipmove(c->laddr, addr);
976                         else
977                                 error(EFAIL, "not a local IP address");
978                 }
979         }
980
981         /* one process can get all connections */
982         if (announcing && strcmp(p, "*") == 0) {
983                 if (!iseve())
984                         error(EPERM, ERROR_FIXME);
985                 setluniqueport(c, 0);
986         }
987
988         lport = atoi(p);
989         if (lport <= 0)
990                 setlport(c);
991         else
992                 setluniqueport(c, lport);
993 }
994
995 static void setraddrport(struct conv *c, char *str)
996 {
997         char *p;
998
999         p = strchr(str, '!');
1000         if (p == NULL)
1001                 error(EFAIL, "malformed address");
1002         *p++ = 0;
1003         parseip(c->raddr, str);
1004         c->rport = atoi(p);
1005         p = strchr(p, '!');
1006         if (p) {
1007                 if (strstr(p, "!r") != NULL)
1008                         c->restricted = 1;
1009         }
1010 }
1011
1012 /*
1013  *  called by protocol connect routine to set addresses
1014  */
1015 void Fsstdconnect(struct conv *c, char *argv[], int argc)
1016 {
1017         switch (argc) {
1018                 default:
1019                         error(EINVAL, "bad args to %s", __func__);
1020                 case 2:
1021                         setraddrport(c, argv[1]);
1022                         setladdr(c);
1023                         setlport(c);
1024                         break;
1025                 case 3:
1026                         setraddrport(c, argv[1]);
1027                         setladdrport(c, argv[2], 0);
1028                         break;
1029         }
1030
1031         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1032                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1033                 || ipcmp(c->raddr, IPnoaddr) == 0)
1034                 c->ipversion = V4;
1035         else
1036                 c->ipversion = V6;
1037 }
1038
1039 /*
1040  *  initiate connection and sleep till its set up
1041  */
1042 static int connected(void *a)
1043 {
1044         return ((struct conv *)a)->state == Connected;
1045 }
1046
1047 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1048 {
1049         ERRSTACK(1);
1050         char *p;
1051
1052         if (c->state != 0)
1053                 error(EBUSY, ERROR_FIXME);
1054         c->state = Connecting;
1055         c->cerr[0] = '\0';
1056         if (x->connect == NULL)
1057                 error(EFAIL, "connect not supported");
1058         x->connect(c, cb->f, cb->nf);
1059
1060         qunlock(&c->qlock);
1061         if (waserror()) {
1062                 qlock(&c->qlock);
1063                 nexterror();
1064         }
1065         rendez_sleep(&c->cr, connected, c);
1066         qlock(&c->qlock);
1067         poperror();
1068
1069         if (c->cerr[0] != '\0')
1070                 error(EFAIL, c->cerr);
1071 }
1072
1073 /*
1074  *  called by protocol announce routine to set addresses
1075  */
1076 void Fsstdannounce(struct conv *c, char *argv[], int argc)
1077 {
1078         memset(c->raddr, 0, sizeof(c->raddr));
1079         c->rport = 0;
1080         switch (argc) {
1081                 default:
1082                         error(EINVAL, "bad args to announce");
1083                 case 2:
1084                         setladdrport(c, argv[1], 1);
1085                         break;
1086         }
1087 }
1088
1089 /*
1090  *  initiate announcement and sleep till its set up
1091  */
1092 static int announced(void *a)
1093 {
1094         return ((struct conv *)a)->state == Announced;
1095 }
1096
1097 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1098 {
1099         ERRSTACK(1);
1100         char *p;
1101
1102         if (c->state != 0)
1103                 error(EBUSY, ERROR_FIXME);
1104         c->state = Announcing;
1105         c->cerr[0] = '\0';
1106         if (x->announce == NULL)
1107                 error(EFAIL, "announce not supported");
1108         x->announce(c, cb->f, cb->nf);
1109
1110         qunlock(&c->qlock);
1111         if (waserror()) {
1112                 qlock(&c->qlock);
1113                 nexterror();
1114         }
1115         rendez_sleep(&c->cr, announced, c);
1116         qlock(&c->qlock);
1117         poperror();
1118
1119         if (c->cerr[0] != '\0')
1120                 error(EFAIL, c->cerr);
1121 }
1122
1123 /*
1124  *  called by protocol bind routine to set addresses
1125  */
1126 void Fsstdbind(struct conv *c, char *argv[], int argc)
1127 {
1128         switch (argc) {
1129                 default:
1130                         error(EINVAL, "bad args to bind");
1131                 case 2:
1132                         setladdrport(c, argv[1], 0);
1133                         break;
1134         }
1135 }
1136
1137 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1138 {
1139         if (x->bind == NULL)
1140                 Fsstdbind(c, cb->f, cb->nf);
1141         else
1142                 x->bind(c, cb->f, cb->nf);
1143 }
1144
1145 static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
1146 {
1147         if (cb->nf < 2)
1148                 goto err;
1149         if (!strcmp(cb->f[1], "rd")) {
1150                 qhangup(cv->rq, "shutdown");
1151                 if (cv->p->shutdown)
1152                         cv->p->shutdown(cv, SHUT_RD);
1153         } else if (!strcmp(cb->f[1], "wr")) {
1154                 qhangup(cv->wq, "shutdown");
1155                 if (cv->p->shutdown)
1156                         cv->p->shutdown(cv, SHUT_WR);
1157         } else if (!strcmp(cb->f[1], "rdwr")) {
1158                 qhangup(cv->rq, "shutdown");
1159                 qhangup(cv->wq, "shutdown");
1160                 if (cv->p->shutdown)
1161                         cv->p->shutdown(cv, SHUT_RDWR);
1162         } else {
1163                 goto err;
1164         }
1165         return;
1166 err:
1167         error(EINVAL, "shutdown [rx|tx|rxtx]");
1168 }
1169
1170 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1171 {
1172         if (cb->nf < 2)
1173                 c->tos = 0;
1174         else
1175                 c->tos = atoi(cb->f[1]);
1176 }
1177
1178 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1179 {
1180         if (cb->nf < 2)
1181                 c->ttl = MAXTTL;
1182         else
1183                 c->ttl = atoi(cb->f[1]);
1184 }
1185
1186 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1187 {
1188         ERRSTACK(1);
1189         struct conv *c;
1190         struct Proto *x;
1191         char *p;
1192         struct cmdbuf *cb;
1193         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1194         struct Fs *f;
1195         char *a;
1196
1197         a = v;
1198         f = ipfs[ch->dev];
1199
1200         switch (TYPE(ch->qid)) {
1201                 default:
1202                         error(EPERM, ERROR_FIXME);
1203                 case Qdata:
1204                         x = f->p[PROTO(ch->qid)];
1205                         c = x->conv[CONV(ch->qid)];
1206                         if (ch->flag & O_NONBLOCK)
1207                                 qwrite_nonblock(c->wq, a, n);
1208                         else
1209                                 qwrite(c->wq, a, n);
1210                         break;
1211                 case Qarp:
1212                         return arpwrite(f, a, n);
1213                 case Qiproute:
1214                         return routewrite(f, ch, a, n);
1215                 case Qlog:
1216                         netlogctl(f, a, n);
1217                         return n;
1218                 case Qndb:
1219                         return ndbwrite(f, a, off, n);
1220                 case Qctl:
1221                         x = f->p[PROTO(ch->qid)];
1222                         c = x->conv[CONV(ch->qid)];
1223                         cb = parsecmd(a, n);
1224
1225                         qlock(&c->qlock);
1226                         if (waserror()) {
1227                                 qunlock(&c->qlock);
1228                                 kfree(cb);
1229                                 nexterror();
1230                         }
1231                         if (cb->nf < 1)
1232                                 error(EFAIL, "short control request");
1233                         if (strcmp(cb->f[0], "connect") == 0)
1234                                 connectctlmsg(x, c, cb);
1235                         else if (strcmp(cb->f[0], "announce") == 0)
1236                                 announcectlmsg(x, c, cb);
1237                         else if (strcmp(cb->f[0], "bind") == 0)
1238                                 bindctlmsg(x, c, cb);
1239                         else if (strcmp(cb->f[0], "shutdown") == 0)
1240                                 shutdownctlmsg(c, cb);
1241                         else if (strcmp(cb->f[0], "ttl") == 0)
1242                                 ttlctlmsg(c, cb);
1243                         else if (strcmp(cb->f[0], "tos") == 0)
1244                                 tosctlmsg(c, cb);
1245                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1246                                 c->ignoreadvice = 1;
1247                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1248                                 if (cb->nf < 2)
1249                                         error(EFAIL, "addmulti needs interface address");
1250                                 if (cb->nf == 2) {
1251                                         if (!ipismulticast(c->raddr))
1252                                                 error(EFAIL, "addmulti for a non multicast address");
1253                                         parseip(ia, cb->f[1]);
1254                                         ipifcaddmulti(c, c->raddr, ia);
1255                                 } else {
1256                                         parseip(ma, cb->f[2]);
1257                                         if (!ipismulticast(ma))
1258                                                 error(EFAIL, "addmulti for a non multicast address");
1259                                         parseip(ia, cb->f[1]);
1260                                         ipifcaddmulti(c, ma, ia);
1261                                 }
1262                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1263                                 if (cb->nf < 2)
1264                                         error(EFAIL, "remmulti needs interface address");
1265                                 if (!ipismulticast(c->raddr))
1266                                         error(EFAIL, "remmulti for a non multicast address");
1267                                 parseip(ia, cb->f[1]);
1268                                 ipifcremmulti(c, c->raddr, ia);
1269                         } else if (x->ctl != NULL) {
1270                                 x->ctl(c, cb->f, cb->nf);
1271                         } else
1272                                 error(EFAIL, "unknown control request");
1273                         qunlock(&c->qlock);
1274                         kfree(cb);
1275                         poperror();
1276         }
1277         return n;
1278 }
1279
1280 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1281 {
1282         struct conv *c;
1283         int n;
1284
1285         switch (TYPE(ch->qid)) {
1286                 case Qdata:
1287                         c = chan2conv(ch);
1288                         if (bp->next)
1289                                 bp = concatblock(bp);
1290                         n = BLEN(bp);
1291                         if (ch->flag & O_NONBLOCK)
1292                                 qbwrite_nonblock(c->wq, bp);
1293                         else
1294                                 qbwrite(c->wq, bp);
1295                         return n;
1296                 default:
1297                         return devbwrite(ch, bp, offset);
1298         }
1299 }
1300
1301 static void ip_wake_cb(struct queue *q, void *data, int filter)
1302 {
1303         struct conv *conv = (struct conv*)data;
1304         struct fd_tap *tap_i;
1305         /* For these two, we want to ignore events on the opposite end of the
1306          * queues.  For instance, we want to know when the WQ is writable.  Our
1307          * writes will actually make it readable - we don't want to trigger a tap
1308          * for that.  However, qio doesn't know how/why we are using a queue, or
1309          * even who the ends are (hence the callbacks) */
1310         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1311                 return;
1312         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1313                 return;
1314         /* At this point, we have an event we want to send to our taps (if any).
1315          * The lock protects list integrity and the existence of the tap.
1316          *
1317          * Previously, I thought of using the conv qlock.  That actually breaks, due
1318          * to weird usages of the qlock (someone holds it for a long time, blocking
1319          * the inbound wakeup from etherread4).
1320          *
1321          * I opted for a spinlock for a couple reasons:
1322          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1323          * send_event).
1324          * - our callers might not want to block.  A lot of network wakeups will
1325          * come network processes (etherread4) or otherwise unrelated to this
1326          * particular conversation.  I'd rather do something like fire off a KMSG
1327          * than block those.
1328          * - if fire_tap takes a while, holding the lock only slows down other
1329          * events on this *same* conversation, or other tap registration.  not a
1330          * huge deal. */
1331         spin_lock(&conv->tap_lock);
1332         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1333                 fire_tap(tap_i, filter);
1334         spin_unlock(&conv->tap_lock);
1335 }
1336
1337 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1338 {
1339         struct conv *conv = chan2conv(chan);
1340         int ret;
1341
1342         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1343                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1344                                        FDTAP_FILT_ERROR)
1345         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1346
1347         switch (TYPE(chan->qid)) {
1348                 case Qdata:
1349                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1350                                 set_errno(ENOSYS);
1351                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1352                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1353                                 return -1;
1354                         }
1355                         spin_lock(&conv->tap_lock);
1356                         switch (cmd) {
1357                                 case (FDTAP_CMD_ADD):
1358                                         if (SLIST_EMPTY(&conv->data_taps)) {
1359                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1360                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1361                                         }
1362                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1363                                         ret = 0;
1364                                         break;
1365                                 case (FDTAP_CMD_REM):
1366                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1367                                         if (SLIST_EMPTY(&conv->data_taps)) {
1368                                                 qio_set_wake_cb(conv->rq, 0, conv);
1369                                                 qio_set_wake_cb(conv->wq, 0, conv);
1370                                         }
1371                                         ret = 0;
1372                                         break;
1373                                 default:
1374                                         set_errno(ENOSYS);
1375                                         set_errstr("Unsupported #%s data tap command %p",
1376                                                    devname(), cmd);
1377                                         ret = -1;
1378                         }
1379                         spin_unlock(&conv->tap_lock);
1380                         return ret;
1381                 case Qlisten:
1382                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1383                                 set_errno(ENOSYS);
1384                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1385                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1386                                 return -1;
1387                         }
1388                         spin_lock(&conv->tap_lock);
1389                         switch (cmd) {
1390                                 case (FDTAP_CMD_ADD):
1391                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1392                                         ret = 0;
1393                                         break;
1394                                 case (FDTAP_CMD_REM):
1395                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1396                                         ret = 0;
1397                                         break;
1398                                 default:
1399                                         set_errno(ENOSYS);
1400                                         set_errstr("Unsupported #%s listen tap command %p",
1401                                                    devname(), cmd);
1402                                         ret = -1;
1403                         }
1404                         spin_unlock(&conv->tap_lock);
1405                         return ret;
1406                 default:
1407                         set_errno(ENOSYS);
1408                         set_errstr("Can't tap #%s file type %d", devname(),
1409                                    TYPE(chan->qid));
1410                         return -1;
1411         }
1412 }
1413
1414 struct dev ipdevtab __devtab = {
1415         .name = "ip",
1416
1417         .reset = ipreset,
1418         .init = ipinit,
1419         .shutdown = devshutdown,
1420         .attach = ipattach,
1421         .walk = ipwalk,
1422         .stat = ipstat,
1423         .open = ipopen,
1424         .create = devcreate,
1425         .close = ipclose,
1426         .read = ipread,
1427         .bread = ipbread,
1428         .write = ipwrite,
1429         .bwrite = ipbwrite,
1430         .remove = devremove,
1431         .wstat = ipwstat,
1432         .power = devpower,
1433         .chaninfo = ipchaninfo,
1434         .tapfd = iptapfd,
1435 };
1436
1437 int Fsproto(struct Fs *f, struct Proto *p)
1438 {
1439         if (f->np >= Maxproto)
1440                 return -1;
1441
1442         qlock_init(&p->qlock);
1443         p->f = f;
1444
1445         if (p->ipproto > 0) {
1446                 if (f->t2p[p->ipproto] != NULL)
1447                         return -1;
1448                 f->t2p[p->ipproto] = p;
1449         }
1450
1451         p->qid.type = QTDIR;
1452         p->qid.path = QID(f->np, 0, Qprotodir);
1453         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1454         if (p->conv == NULL)
1455                 panic("Fsproto");
1456
1457         p->x = f->np;
1458         p->nextport = 0;
1459         p->nextrport = 600;
1460         f->p[f->np++] = p;
1461
1462         return 0;
1463 }
1464
1465 /*
1466  *  return true if this protocol is
1467  *  built in
1468  */
1469 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1470 {
1471         return f->t2p[proto] != NULL;
1472 }
1473
1474 /*
1475  *  called with protocol locked
1476  */
1477 struct conv *Fsprotoclone(struct Proto *p, char *user)
1478 {
1479         struct conv *c, **pp, **ep;
1480
1481 retry:
1482         c = NULL;
1483         ep = &p->conv[p->nc];
1484         for (pp = p->conv; pp < ep; pp++) {
1485                 c = *pp;
1486                 if (c == NULL) {
1487                         c = kzmalloc(sizeof(struct conv), 0);
1488                         if (c == NULL)
1489                                 error(ENOMEM, ERROR_FIXME);
1490                         qlock_init(&c->qlock);
1491                         qlock_init(&c->listenq);
1492                         rendez_init(&c->cr);
1493                         rendez_init(&c->listenr);
1494                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1495                         SLIST_INIT(&c->listen_taps);
1496                         spinlock_init(&c->tap_lock);
1497                         qlock(&c->qlock);
1498                         c->p = p;
1499                         c->x = pp - p->conv;
1500                         if (p->ptclsize != 0) {
1501                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1502                                 if (c->ptcl == NULL) {
1503                                         kfree(c);
1504                                         error(ENOMEM, ERROR_FIXME);
1505                                 }
1506                         }
1507                         *pp = c;
1508                         p->ac++;
1509                         c->eq = qopen(1024, Qmsg, 0, 0);
1510                         (*p->create) (c);
1511                         assert(c->rq && c->wq);
1512                         break;
1513                 }
1514                 if (canqlock(&c->qlock)) {
1515                         /*
1516                          *  make sure both processes and protocol
1517                          *  are done with this Conv
1518                          */
1519                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1520                                 break;
1521
1522                         qunlock(&c->qlock);
1523                 }
1524         }
1525         if (pp >= ep) {
1526                 if (p->gc != NULL && (*p->gc) (p))
1527                         goto retry;
1528                 return NULL;
1529         }
1530
1531         c->inuse = 1;
1532         kstrdup(&c->owner, user);
1533         c->perm = 0660;
1534         c->state = Idle;
1535         ipmove(c->laddr, IPnoaddr);
1536         ipmove(c->raddr, IPnoaddr);
1537         c->r = NULL;
1538         c->rgen = 0;
1539         c->lport = 0;
1540         c->rport = 0;
1541         c->restricted = 0;
1542         c->ttl = MAXTTL;
1543         c->tos = DFLTTOS;
1544         qreopen(c->rq);
1545         qreopen(c->wq);
1546         qreopen(c->eq);
1547
1548         qunlock(&c->qlock);
1549         return c;
1550 }
1551
1552 int Fsconnected(struct conv *c, char *msg)
1553 {
1554         if (msg != NULL && *msg != '\0')
1555                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1556
1557         switch (c->state) {
1558                 case Announcing:
1559                         c->state = Announced;
1560                         break;
1561
1562                 case Connecting:
1563                         c->state = Connected;
1564                         break;
1565         }
1566
1567         rendez_wakeup(&c->cr);
1568         return 0;
1569 }
1570
1571 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1572 {
1573         if (f->ipmux)
1574                 return f->ipmux;
1575         else
1576                 return f->t2p[proto];
1577 }
1578
1579 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1580 {
1581         return f->t2p[proto];
1582 }
1583
1584 static void fire_listener_taps(struct conv *conv)
1585 {
1586         struct fd_tap *tap_i;
1587         if (SLIST_EMPTY(&conv->listen_taps))
1588                 return;
1589         spin_lock(&conv->tap_lock);
1590         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1591                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1592         spin_unlock(&conv->tap_lock);
1593 }
1594
1595 /*
1596  *  called with protocol locked
1597  */
1598 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1599                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1600 {
1601         struct conv *nc;
1602         struct conv **l;
1603         int i;
1604
1605         qlock(&c->qlock);
1606         i = 0;
1607         for (l = &c->incall; *l; l = &(*l)->next)
1608                 i++;
1609         if (i >= Maxincall) {
1610                 qunlock(&c->qlock);
1611                 return NULL;
1612         }
1613
1614         /* find a free conversation */
1615         nc = Fsprotoclone(c->p, network);
1616         if (nc == NULL) {
1617                 qunlock(&c->qlock);
1618                 return NULL;
1619         }
1620         ipmove(nc->raddr, raddr);
1621         nc->rport = rport;
1622         ipmove(nc->laddr, laddr);
1623         nc->lport = lport;
1624         nc->next = NULL;
1625         *l = nc;
1626         nc->state = Connected;
1627         nc->ipversion = version;
1628
1629         qunlock(&c->qlock);
1630
1631         rendez_wakeup(&c->listenr);
1632         fire_listener_taps(c);
1633
1634         return nc;
1635 }
1636
1637 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1638 {
1639         if (off > strlen(f->ndb))
1640                 error(EIO, ERROR_FIXME);
1641         if (off + n >= sizeof(f->ndb) - 1)
1642                 error(EIO, ERROR_FIXME);
1643         memmove(f->ndb + off, a, n);
1644         f->ndb[off + n] = 0;
1645         f->ndbvers++;
1646         f->ndbmtime = seconds();
1647         return n;
1648 }
1649
1650 uint32_t scalednconv(void)
1651 {
1652         //if(conf.npage*BY2PG >= 128*MB)
1653         return Nchans * 4;
1654         //  return Nchans;
1655 }