Protect closeconv() with waserror()
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qndb,
55         Qiproute,
56         Qiprouter,
57         Qipselftab,
58         Qlog,
59
60         Qprotodir,      /* directory for a protocol */
61         Qprotobase,
62         Qclone = Qprotobase,
63         Qstats,
64
65         Qconvdir,       /* directory for a conversation */
66         Qconvbase,
67         Qctl = Qconvbase,
68         Qdata,
69         Qerr,
70         Qlisten,
71         Qlocal,
72         Qremote,
73         Qstatus,
74         Qsnoop,
75
76         Logtype = 5,
77         Masktype = (1 << Logtype) - 1,
78         Logconv = 12,
79         Maskconv = (1 << Logconv) - 1,
80         Shiftconv = Logtype,
81         Logproto = 8,
82         Maskproto = (1 << Logproto) - 1,
83         Shiftproto = Logtype + Logconv,
84
85         Nfs = 32,
86 };
87 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
88 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
89 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
90 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
91 static char network[] = "network";
92
93 qlock_t fslock;
94 struct Fs *ipfs[Nfs];                   /* attached fs's */
95 struct queue *qlog;
96
97 extern void nullmediumlink(void);
98 extern void pktmediumlink(void);
99 extern char *eve;
100 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
101 static void closeconv(struct conv *);
102
103 static struct conv *chan2conv(struct chan *chan)
104 {
105         /* That's a lot of pointers to get to the conv! */
106         return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
107 }
108
109 static inline int founddevdir(struct chan *c, struct qid q, char *n,
110                                                           int64_t length, char *user, long perm,
111                                                           struct dir *db)
112 {
113         devdir(c, q, n, length, user, perm, db);
114         return 1;
115 }
116
117 static int topdirgen(struct chan *c, struct dir *dp)
118 {
119         struct qid q;
120         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
121         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
122         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
123 }
124
125
126 static int ip3gen(struct chan *c, int i, struct dir *dp)
127 {
128         struct qid q;
129         struct conv *cv;
130         char *p;
131         int perm;
132
133         cv = chan2conv(c);
134         if (cv->owner == NULL)
135                 kstrdup(&cv->owner, eve);
136         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
137
138         switch (i) {
139                 default:
140                         return -1;
141                 case Qctl:
142                         return founddevdir(c, q, "ctl", 0,
143                                                    cv->owner, cv->perm, dp);
144                 case Qdata:
145                         perm = cv->perm;
146                         perm |= qreadable(cv->rq) ? DMREADABLE : 0;
147                         perm |= qwritable(cv->wq) ? DMWRITABLE : 0;
148                         return founddevdir(c, q, "data", qlen(cv->rq),
149                                                            cv->owner, perm, dp);
150                 case Qerr:
151                         perm = cv->perm;
152                         perm |= qreadable(cv->eq) ? DMREADABLE : 0;
153                         return founddevdir(c, q, "err", qlen(cv->eq),
154                                                            cv->owner, perm, dp);
155                 case Qlisten:
156                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
157                 case Qlocal:
158                         p = "local";
159                         break;
160                 case Qremote:
161                         p = "remote";
162                         break;
163                 case Qsnoop:
164                         if (strcmp(cv->p->name, "ipifc") != 0)
165                                 return -1;
166                         perm = 0400;
167                         perm |= qreadable(cv->sq) ? DMREADABLE : 0;
168                         return founddevdir(c, q, "snoop", qlen(cv->sq),
169                                                            cv->owner, perm, dp);
170                 case Qstatus:
171                         p = "status";
172                         break;
173         }
174         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
175 }
176
177 static int ip2gen(struct chan *c, int i, struct dir *dp)
178 {
179         struct qid q;
180         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
181         switch (i) {
182                 case Qclone:
183                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
184                 case Qstats:
185                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
186         }
187         return -1;
188 }
189
190 static int ip1gen(struct chan *c, int i, struct dir *dp)
191 {
192         struct qid q;
193         char *p;
194         int prot;
195         int len = 0;
196         struct Fs *f;
197         extern uint32_t kerndate;
198
199         f = ipfs[c->dev];
200
201         prot = 0666;
202         mkqid(&q, QID(0, 0, i), 0, QTFILE);
203         switch (i) {
204                 default:
205                         return -1;
206                 case Qarp:
207                         p = "arp";
208                         break;
209                 case Qndb:
210                         p = "ndb";
211                         len = strlen(f->ndb);
212                         q.vers = f->ndbvers;
213                         break;
214                 case Qiproute:
215                         p = "iproute";
216                         break;
217                 case Qipselftab:
218                         p = "ipselftab";
219                         prot = 0444;
220                         break;
221                 case Qiprouter:
222                         p = "iprouter";
223                         break;
224                 case Qlog:
225                         p = "log";
226                         break;
227         }
228         devdir(c, q, p, len, network, prot, dp);
229         if (i == Qndb && f->ndbmtime > kerndate)
230                 dp->mtime = f->ndbmtime;
231         return 1;
232 }
233
234 static int
235 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
236           int s, struct dir *dp)
237 {
238         struct qid q;
239         struct conv *cv;
240         struct Fs *f;
241
242         f = ipfs[c->dev];
243
244         switch (TYPE(c->qid)) {
245                 case Qtopdir:
246                         if (s == DEVDOTDOT)
247                                 return topdirgen(c, dp);
248                         if (s < f->np) {
249                                 if (f->p[s]->connect == NULL)
250                                         return 0;       /* protocol with no user interface */
251                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
252                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
253                         }
254                         s -= f->np;
255                         return ip1gen(c, s + Qtopbase, dp);
256                 case Qarp:
257                 case Qndb:
258                 case Qlog:
259                 case Qiproute:
260                 case Qiprouter:
261                 case Qipselftab:
262                         return ip1gen(c, TYPE(c->qid), dp);
263                 case Qprotodir:
264                         if (s == DEVDOTDOT)
265                                 return topdirgen(c, dp);
266                         else if (s < f->p[PROTO(c->qid)]->ac) {
267                                 cv = f->p[PROTO(c->qid)]->conv[s];
268                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
269                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
270                                 return
271                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
272                         }
273                         s -= f->p[PROTO(c->qid)]->ac;
274                         return ip2gen(c, s + Qprotobase, dp);
275                 case Qclone:
276                 case Qstats:
277                         return ip2gen(c, TYPE(c->qid), dp);
278                 case Qconvdir:
279                         if (s == DEVDOTDOT) {
280                                 s = PROTO(c->qid);
281                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
282                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
283                                 return 1;
284                         }
285                         return ip3gen(c, s + Qconvbase, dp);
286                 case Qctl:
287                 case Qdata:
288                 case Qerr:
289                 case Qlisten:
290                 case Qlocal:
291                 case Qremote:
292                 case Qstatus:
293                 case Qsnoop:
294                         return ip3gen(c, TYPE(c->qid), dp);
295         }
296         return -1;
297 }
298
299 static void ipinit(void)
300 {
301         qlock_init(&fslock);
302         nullmediumlink();
303         pktmediumlink();
304 /* if only
305         fmtinstall('i', eipfmt);
306         fmtinstall('I', eipfmt);
307         fmtinstall('E', eipfmt);
308         fmtinstall('V', eipfmt);
309         fmtinstall('M', eipfmt);
310 */
311 }
312
313 static void ipreset(void)
314 {
315 }
316
317 static struct Fs *ipgetfs(int dev)
318 {
319         extern void (*ipprotoinit[]) (struct Fs *);
320         struct Fs *f;
321         int i;
322
323         if (dev >= Nfs)
324                 return NULL;
325
326         qlock(&fslock);
327         if (ipfs[dev] == NULL) {
328                 f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
329                 rwinit(&f->rwlock);
330                 qlock_init(&f->iprouter.qlock);
331                 ip_init(f);
332                 arpinit(f);
333                 netloginit(f);
334                 for (i = 0; ipprotoinit[i]; i++)
335                         ipprotoinit[i] (f);
336                 f->dev = dev;
337                 ipfs[dev] = f;
338         }
339         qunlock(&fslock);
340
341         return ipfs[dev];
342 }
343
344 struct IPaux *newipaux(char *owner, char *tag)
345 {
346         struct IPaux *a;
347         int n;
348
349         a = kzmalloc(sizeof(*a), 0);
350         kstrdup(&a->owner, owner);
351         memset(a->tag, ' ', sizeof(a->tag));
352         n = strlen(tag);
353         if (n > sizeof(a->tag))
354                 n = sizeof(a->tag);
355         memmove(a->tag, tag, n);
356         return a;
357 }
358
359 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
360
361 static struct chan *ipattach(char *spec)
362 {
363         struct chan *c;
364         int dev;
365
366         dev = atoi(spec);
367         if (dev >= Nfs)
368                 error(EFAIL, "bad specification");
369
370         ipgetfs(dev);
371         c = devattach(devname(), spec);
372         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
373         c->dev = dev;
374
375         c->aux = newipaux(commonuser(), "none");
376
377         return c;
378 }
379
380 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
381                                                           int nname)
382 {
383         struct IPaux *a = c->aux;
384         struct walkqid *w;
385
386         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
387         if (w != NULL && w->clone != NULL)
388                 w->clone->aux = newipaux(a->owner, a->tag);
389         return w;
390 }
391
392 static int ipstat(struct chan *c, uint8_t * db, int n)
393 {
394         return devstat(c, db, n, NULL, 0, ipgen);
395 }
396
397 static int should_wake(void *arg)
398 {
399         struct conv *cv = arg;
400         /* signal that the conv is closed */
401         if (qisclosed(cv->rq))
402                 return TRUE;
403         return cv->incall != NULL;
404 }
405
406 static struct chan *ipopen(struct chan *c, int omode)
407 {
408         ERRSTACK(2);
409         struct conv *cv, *nc;
410         struct Proto *p;
411         int perm;
412         struct Fs *f;
413
414         /* perm is a lone rwx, not the rwx------ from the conversion */
415         perm = omode_to_rwx(omode) >> 6;
416
417         f = ipfs[c->dev];
418
419         switch (TYPE(c->qid)) {
420                 default:
421                         break;
422                 case Qndb:
423                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
424                                 error(EPERM, ERROR_FIXME);
425                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
426                                 f->ndb[0] = 0;
427                         break;
428                 case Qlog:
429                         netlogopen(f);
430                         break;
431                 case Qiprouter:
432                         iprouteropen(f);
433                         break;
434                 case Qiproute:
435                         break;
436                 case Qtopdir:
437                 case Qprotodir:
438                 case Qconvdir:
439                 case Qstatus:
440                 case Qremote:
441                 case Qlocal:
442                 case Qstats:
443                 case Qipselftab:
444                         if (omode & O_WRITE)
445                                 error(EPERM, ERROR_FIXME);
446                         break;
447                 case Qsnoop:
448                         if (omode & O_WRITE)
449                                 error(EPERM, ERROR_FIXME);
450                         p = f->p[PROTO(c->qid)];
451                         cv = p->conv[CONV(c->qid)];
452                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
453                                 error(EPERM, ERROR_FIXME);
454                         atomic_inc(&cv->snoopers);
455                         break;
456                 case Qclone:
457                         p = f->p[PROTO(c->qid)];
458                         qlock(&p->qlock);
459                         if (waserror()) {
460                                 qunlock(&p->qlock);
461                                 nexterror();
462                         }
463                         cv = Fsprotoclone(p, ATTACHER(c));
464                         qunlock(&p->qlock);
465                         poperror();
466                         if (cv == NULL) {
467                                 error(ENODEV, ERROR_FIXME);
468                                 break;
469                         }
470                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
471                         break;
472                 case Qdata:
473                 case Qctl:
474                 case Qerr:
475                         p = f->p[PROTO(c->qid)];
476                         qlock(&p->qlock);
477                         cv = p->conv[CONV(c->qid)];
478                         qlock(&cv->qlock);
479                         if (waserror()) {
480                                 qunlock(&cv->qlock);
481                                 qunlock(&p->qlock);
482                                 nexterror();
483                         }
484                         if ((perm & (cv->perm >> 6)) != perm) {
485                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
486                                         error(EPERM, ERROR_FIXME);
487                                 if ((perm & cv->perm) != perm)
488                                         error(EPERM, ERROR_FIXME);
489
490                         }
491                         cv->inuse++;
492                         if (cv->inuse == 1) {
493                                 kstrdup(&cv->owner, ATTACHER(c));
494                                 cv->perm = 0660;
495                         }
496                         qunlock(&cv->qlock);
497                         qunlock(&p->qlock);
498                         poperror();
499                         break;
500                 case Qlisten:
501                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
502                         /* No permissions or Announce checks required.  We'll see if that's
503                          * a good idea or not. (the perm check would do nothing, as is,
504                          * since an O_PATH perm is 0).
505                          *
506                          * But we probably want to incref to keep the conversation around
507                          * until this FD/chan is closed.  #ip is a little weird in that
508                          * objects never really go away (high water mark for convs, you can
509                          * always find them in the ns).  I think it is possible to
510                          * namec/ipgen a chan, then have that conv close, then have that
511                          * chan be opened.  You can probably do this with a data file. */
512                         if (omode & O_PATH) {
513                                 qlock(&cv->qlock);
514                                 cv->inuse++;
515                                 qunlock(&cv->qlock);
516                                 break;
517                         }
518                         if ((perm & (cv->perm >> 6)) != perm) {
519                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
520                                         error(EPERM, ERROR_FIXME);
521                                 if ((perm & cv->perm) != perm)
522                                         error(EPERM, ERROR_FIXME);
523
524                         }
525
526                         if (cv->state != Announced)
527                                 error(EFAIL, "not announced");
528
529                         if (waserror()) {
530                                 closeconv(cv);
531                                 nexterror();
532                         }
533                         qlock(&cv->qlock);
534                         cv->inuse++;
535                         qunlock(&cv->qlock);
536
537                         nc = NULL;
538                         while (nc == NULL) {
539                                 /* give up if we got a hangup */
540                                 if (qisclosed(cv->rq))
541                                         error(EFAIL, "listen hungup");
542
543                                 qlock(&cv->listenq);
544                                 if (waserror()) {
545                                         qunlock(&cv->listenq);
546                                         nexterror();
547                                 }
548                                 /* we can peek at incall without grabbing the cv qlock.  if
549                                  * anything is there, it'll remain there until we dequeue it.
550                                  * no one else can, since we hold the listenq lock */
551                                 if ((c->flag & O_NONBLOCK) && !cv->incall)
552                                         error(EAGAIN, "listen queue empty");
553                                 /* wait for a connect */
554                                 rendez_sleep(&cv->listenr, should_wake, cv);
555
556                                 /* if there is a concurrent hangup, they will hold the qlock
557                                  * until the hangup is complete, including closing the cv->rq */
558                                 qlock(&cv->qlock);
559                                 nc = cv->incall;
560                                 if (nc != NULL) {
561                                         cv->incall = nc->next;
562                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
563                                         kstrdup(&cv->owner, ATTACHER(c));
564                                 }
565                                 qunlock(&cv->qlock);
566
567                                 qunlock(&cv->listenq);
568                                 poperror();
569                         }
570                         closeconv(cv);
571                         poperror();
572                         break;
573         }
574         c->mode = openmode(omode);
575         c->flag |= COPEN;
576         c->offset = 0;
577         return c;
578 }
579
580 static int ipwstat(struct chan *c, uint8_t * dp, int n)
581 {
582         ERRSTACK(2);
583         struct dir *d;
584         struct conv *cv;
585         struct Fs *f;
586         struct Proto *p;
587
588         f = ipfs[c->dev];
589         switch (TYPE(c->qid)) {
590                 default:
591                         error(EPERM, ERROR_FIXME);
592                         break;
593                 case Qctl:
594                 case Qdata:
595                         break;
596         }
597
598         d = kzmalloc(sizeof(*d) + n, 0);
599         if (waserror()) {
600                 kfree(d);
601                 nexterror();
602         }
603         n = convM2D(dp, n, d, (char *)&d[1]);
604         if (n == 0)
605                 error(ENODATA, ERROR_FIXME);
606         p = f->p[PROTO(c->qid)];
607         cv = p->conv[CONV(c->qid)];
608         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
609                 error(EPERM, ERROR_FIXME);
610         if (!emptystr(d->uid))
611                 kstrdup(&cv->owner, d->uid);
612         if (d->mode != ~0UL)
613                 cv->perm = d->mode & 0777;
614         poperror();
615         kfree(d);
616         return n;
617 }
618
619 /* Should be able to handle any file type chan. Feel free to extend it. */
620 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
621 {
622         struct conv *conv;
623         struct Proto *proto;
624         char *p;
625         struct Fs *f;
626
627         f = ipfs[ch->dev];
628
629         switch (TYPE(ch->qid)) {
630                 default:
631                         ret = "Unknown type";
632                         break;
633                 case Qdata:
634                         proto = f->p[PROTO(ch->qid)];
635                         conv = proto->conv[CONV(ch->qid)];
636                         snprintf(ret, ret_l, "Qdata, %s, proto %s, conv idx %d, rq len %d, wq len %d",
637                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
638                                  proto->name, conv->x, qlen(conv->rq), qlen(conv->wq));
639                         break;
640                 case Qarp:
641                         ret = "Qarp";
642                         break;
643                 case Qiproute:
644                         ret = "Qiproute";
645                         break;
646                 case Qlisten:
647                         proto = f->p[PROTO(ch->qid)];
648                         conv = proto->conv[CONV(ch->qid)];
649                         snprintf(ret, ret_l, "Qlisten, %s proto %s, conv idx %d",
650                                  SLIST_EMPTY(&conv->listen_taps) ? "untapped" : "tapped",
651                                  proto->name, conv->x);
652                         break;
653                 case Qlog:
654                         ret = "Qlog";
655                         break;
656                 case Qndb:
657                         ret = "Qndb";
658                         break;
659                 case Qctl:
660                         proto = f->p[PROTO(ch->qid)];
661                         conv = proto->conv[CONV(ch->qid)];
662                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
663                                          conv->x);
664                         break;
665         }
666         return ret;
667 }
668
669 static void closeconv(struct conv *cv)
670 {
671         ERRSTACK(1);
672         struct conv *nc;
673         struct Ipmulti *mp;
674
675         qlock(&cv->qlock);
676
677         if (--cv->inuse > 0) {
678                 qunlock(&cv->qlock);
679                 return;
680         }
681         if (waserror()) {
682                 qunlock(&cv->qlock);
683                 nexterror();
684         }
685         /* close all incoming calls since no listen will ever happen */
686         for (nc = cv->incall; nc; nc = cv->incall) {
687                 cv->incall = nc->next;
688                 closeconv(nc);
689         }
690         cv->incall = NULL;
691
692         kstrdup(&cv->owner, network);
693         cv->perm = 0660;
694
695         while ((mp = cv->multi) != NULL)
696                 ipifcremmulti(cv, mp->ma, mp->ia);
697
698         cv->r = NULL;
699         cv->rgen = 0;
700         cv->p->close(cv);
701         cv->state = Idle;
702         qunlock(&cv->qlock);
703         poperror();
704 }
705
706 static void ipclose(struct chan *c)
707 {
708         struct Fs *f;
709
710         f = ipfs[c->dev];
711         switch (TYPE(c->qid)) {
712                 default:
713                         break;
714                 case Qlog:
715                         if (c->flag & COPEN)
716                                 netlogclose(f);
717                         break;
718                 case Qiprouter:
719                         if (c->flag & COPEN)
720                                 iprouterclose(f);
721                         break;
722                 case Qdata:
723                 case Qctl:
724                 case Qerr:
725                 case Qlisten:
726                         if (c->flag & COPEN)
727                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
728                         break;
729                 case Qsnoop:
730                         if (c->flag & COPEN)
731                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
732                         break;
733         }
734         kfree(((struct IPaux *)c->aux)->owner);
735         kfree(c->aux);
736 }
737
738 enum {
739         Statelen = 32 * 1024,
740 };
741
742 static long ipread(struct chan *ch, void *a, long n, int64_t off)
743 {
744         struct conv *c;
745         struct Proto *x;
746         char *buf, *p;
747         long rv;
748         struct Fs *f;
749         uint32_t offset = off;
750         size_t sofar;
751
752         f = ipfs[ch->dev];
753
754         p = a;
755         switch (TYPE(ch->qid)) {
756                 default:
757                         error(EPERM, ERROR_FIXME);
758                 case Qtopdir:
759                 case Qprotodir:
760                 case Qconvdir:
761                         return devdirread(ch, a, n, 0, 0, ipgen);
762                 case Qarp:
763                         return arpread(f->arp, a, offset, n);
764                 case Qndb:
765                         return readstr(offset, a, n, f->ndb);
766                 case Qiproute:
767                         return routeread(f, a, offset, n);
768                 case Qiprouter:
769                         return iprouterread(f, a, n);
770                 case Qipselftab:
771                         return ipselftabread(f, a, offset, n);
772                 case Qlog:
773                         return netlogread(f, a, offset, n);
774                 case Qctl:
775                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
776                         return readstr(offset, p, n, get_cur_genbuf());
777                 case Qremote:
778                         buf = kzmalloc(Statelen, 0);
779                         x = f->p[PROTO(ch->qid)];
780                         c = x->conv[CONV(ch->qid)];
781                         if (x->remote == NULL) {
782                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
783                         } else {
784                                 (*x->remote) (c, buf, Statelen - 2);
785                         }
786                         rv = readstr(offset, p, n, buf);
787                         kfree(buf);
788                         return rv;
789                 case Qlocal:
790                         buf = kzmalloc(Statelen, 0);
791                         x = f->p[PROTO(ch->qid)];
792                         c = x->conv[CONV(ch->qid)];
793                         if (x->local == NULL) {
794                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
795                         } else {
796                                 (*x->local) (c, buf, Statelen - 2);
797                         }
798                         rv = readstr(offset, p, n, buf);
799                         kfree(buf);
800                         return rv;
801                 case Qstatus:
802                         /* this all is a bit screwed up since the size of some state's
803                          * buffers will change from one invocation to another.  a reader
804                          * will come in and read the entire buffer.  then it will come again
805                          * and read from the next offset, expecting EOF.  if the buffer
806                          * changed sizes, it'll reprint the end of the buffer slightly. */
807                         buf = kzmalloc(Statelen, 0);
808                         x = f->p[PROTO(ch->qid)];
809                         c = x->conv[CONV(ch->qid)];
810                         sofar = (*x->state) (c, buf, Statelen - 2);
811                         rv = readstr(offset, p, n, buf);
812                         kfree(buf);
813                         return rv;
814                 case Qdata:
815                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
816                         if (ch->flag & O_NONBLOCK)
817                                 return qread_nonblock(c->rq, a, n);
818                         else
819                                 return qread(c->rq, a, n);
820                 case Qerr:
821                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
822                         return qread(c->eq, a, n);
823                 case Qsnoop:
824                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
825                         return qread(c->sq, a, n);
826                 case Qstats:
827                         x = f->p[PROTO(ch->qid)];
828                         if (x->stats == NULL)
829                                 error(EFAIL, "stats not implemented");
830                         buf = kzmalloc(Statelen, 0);
831                         (*x->stats) (x, buf, Statelen);
832                         rv = readstr(offset, p, n, buf);
833                         kfree(buf);
834                         return rv;
835         }
836 }
837
838 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
839 {
840         struct conv *c;
841
842         switch (TYPE(ch->qid)) {
843                 case Qdata:
844                         c = chan2conv(ch);
845                         if (ch->flag & O_NONBLOCK)
846                                 return qbread_nonblock(c->rq, n);
847                         else
848                                 return qbread(c->rq, n);
849                 default:
850                         return devbread(ch, n, offset);
851         }
852 }
853
854 /*
855  *  set local address to be that of the ifc closest to remote address
856  */
857 static void setladdr(struct conv *c)
858 {
859         findlocalip(c->p->f, c->laddr, c->raddr);
860 }
861
862 /*
863  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
864  */
865 static void setluniqueport(struct conv *c, int lport)
866 {
867         struct Proto *p;
868         struct conv *xp;
869         int x;
870
871         p = c->p;
872
873         qlock(&p->qlock);
874         for (x = 0; x < p->nc; x++) {
875                 xp = p->conv[x];
876                 if (xp == NULL)
877                         break;
878                 if (xp == c)
879                         continue;
880                 if ((xp->state == Connected || xp->state == Announced)
881                         && xp->lport == lport
882                         && xp->rport == c->rport
883                         && ipcmp(xp->raddr, c->raddr) == 0
884                         && ipcmp(xp->laddr, c->laddr) == 0) {
885                         qunlock(&p->qlock);
886                         error(EFAIL, "address in use");
887                 }
888         }
889         c->lport = lport;
890         qunlock(&p->qlock);
891 }
892
893 /*
894  *  pick a local port and set it
895  */
896 static void setlport(struct conv *c)
897 {
898         struct Proto *p;
899         uint16_t *pp;
900         int x, found;
901
902         p = c->p;
903         if (c->restricted)
904                 pp = &p->nextrport;
905         else
906                 pp = &p->nextport;
907         qlock(&p->qlock);
908         for (;; (*pp)++) {
909                 /*
910                  * Fsproto initialises p->nextport to 0 and the restricted
911                  * ports (p->nextrport) to 600.
912                  * Restricted ports must lie between 600 and 1024.
913                  * For the initial condition or if the unrestricted port number
914                  * has wrapped round, select a random port between 5000 and 1<<15
915                  * to start at.
916                  */
917                 if (c->restricted) {
918                         if (*pp >= 1024)
919                                 *pp = 600;
920                 } else
921                         while (*pp < 5000)
922                                 urandom_read(pp, sizeof(*pp));
923
924                 found = 0;
925                 for (x = 0; x < p->nc; x++) {
926                         if (p->conv[x] == NULL)
927                                 break;
928                         if (p->conv[x]->lport == *pp) {
929                                 found = 1;
930                                 break;
931                         }
932                 }
933                 if (!found)
934                         break;
935         }
936         c->lport = (*pp)++;
937         qunlock(&p->qlock);
938 }
939
940 /*
941  *  set a local address and port from a string of the form
942  *      [address!]port[!r]
943  */
944 static void setladdrport(struct conv *c, char *str, int announcing)
945 {
946         char *p;
947         uint16_t lport;
948         uint8_t addr[IPaddrlen];
949
950         /*
951          *  ignore restricted part if it exists.  it's
952          *  meaningless on local ports.
953          */
954         p = strchr(str, '!');
955         if (p != NULL) {
956                 *p++ = 0;
957                 if (strcmp(p, "r") == 0)
958                         p = NULL;
959         }
960
961         c->lport = 0;
962         if (p == NULL) {
963                 if (announcing)
964                         ipmove(c->laddr, IPnoaddr);
965                 else
966                         setladdr(c);
967                 p = str;
968         } else {
969                 if (strcmp(str, "*") == 0)
970                         ipmove(c->laddr, IPnoaddr);
971                 else {
972                         parseip(addr, str);
973                         if (ipforme(c->p->f, addr))
974                                 ipmove(c->laddr, addr);
975                         else
976                                 error(EFAIL, "not a local IP address");
977                 }
978         }
979
980         /* one process can get all connections */
981         if (announcing && strcmp(p, "*") == 0) {
982                 if (!iseve())
983                         error(EPERM, ERROR_FIXME);
984                 setluniqueport(c, 0);
985         }
986
987         lport = atoi(p);
988         if (lport <= 0)
989                 setlport(c);
990         else
991                 setluniqueport(c, lport);
992 }
993
994 static void setraddrport(struct conv *c, char *str)
995 {
996         char *p;
997
998         p = strchr(str, '!');
999         if (p == NULL)
1000                 error(EFAIL, "malformed address");
1001         *p++ = 0;
1002         parseip(c->raddr, str);
1003         c->rport = atoi(p);
1004         p = strchr(p, '!');
1005         if (p) {
1006                 if (strstr(p, "!r") != NULL)
1007                         c->restricted = 1;
1008         }
1009 }
1010
1011 /*
1012  *  called by protocol connect routine to set addresses
1013  */
1014 void Fsstdconnect(struct conv *c, char *argv[], int argc)
1015 {
1016         switch (argc) {
1017                 default:
1018                         error(EINVAL, "bad args to %s", __func__);
1019                 case 2:
1020                         setraddrport(c, argv[1]);
1021                         setladdr(c);
1022                         setlport(c);
1023                         break;
1024                 case 3:
1025                         setraddrport(c, argv[1]);
1026                         setladdrport(c, argv[2], 0);
1027                         break;
1028         }
1029
1030         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1031                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1032                 || ipcmp(c->raddr, IPnoaddr) == 0)
1033                 c->ipversion = V4;
1034         else
1035                 c->ipversion = V6;
1036 }
1037
1038 /*
1039  *  initiate connection and sleep till its set up
1040  */
1041 static int connected(void *a)
1042 {
1043         return ((struct conv *)a)->state == Connected;
1044 }
1045
1046 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1047 {
1048         ERRSTACK(1);
1049         char *p;
1050
1051         if (c->state != 0)
1052                 error(EBUSY, ERROR_FIXME);
1053         c->state = Connecting;
1054         c->cerr[0] = '\0';
1055         if (x->connect == NULL)
1056                 error(EFAIL, "connect not supported");
1057         x->connect(c, cb->f, cb->nf);
1058
1059         qunlock(&c->qlock);
1060         if (waserror()) {
1061                 qlock(&c->qlock);
1062                 nexterror();
1063         }
1064         rendez_sleep(&c->cr, connected, c);
1065         qlock(&c->qlock);
1066         poperror();
1067
1068         if (c->cerr[0] != '\0')
1069                 error(EFAIL, c->cerr);
1070 }
1071
1072 /*
1073  *  called by protocol announce routine to set addresses
1074  */
1075 void Fsstdannounce(struct conv *c, char *argv[], int argc)
1076 {
1077         memset(c->raddr, 0, sizeof(c->raddr));
1078         c->rport = 0;
1079         switch (argc) {
1080                 default:
1081                         error(EINVAL, "bad args to announce");
1082                 case 2:
1083                         setladdrport(c, argv[1], 1);
1084                         break;
1085         }
1086 }
1087
1088 /*
1089  *  initiate announcement and sleep till its set up
1090  */
1091 static int announced(void *a)
1092 {
1093         return ((struct conv *)a)->state == Announced;
1094 }
1095
1096 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1097 {
1098         ERRSTACK(1);
1099         char *p;
1100
1101         if (c->state != 0)
1102                 error(EBUSY, ERROR_FIXME);
1103         c->state = Announcing;
1104         c->cerr[0] = '\0';
1105         if (x->announce == NULL)
1106                 error(EFAIL, "announce not supported");
1107         x->announce(c, cb->f, cb->nf);
1108
1109         qunlock(&c->qlock);
1110         if (waserror()) {
1111                 qlock(&c->qlock);
1112                 nexterror();
1113         }
1114         rendez_sleep(&c->cr, announced, c);
1115         qlock(&c->qlock);
1116         poperror();
1117
1118         if (c->cerr[0] != '\0')
1119                 error(EFAIL, c->cerr);
1120 }
1121
1122 /*
1123  *  called by protocol bind routine to set addresses
1124  */
1125 void Fsstdbind(struct conv *c, char *argv[], int argc)
1126 {
1127         switch (argc) {
1128                 default:
1129                         error(EINVAL, "bad args to bind");
1130                 case 2:
1131                         setladdrport(c, argv[1], 0);
1132                         break;
1133         }
1134 }
1135
1136 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1137 {
1138         if (x->bind == NULL)
1139                 Fsstdbind(c, cb->f, cb->nf);
1140         else
1141                 x->bind(c, cb->f, cb->nf);
1142 }
1143
1144 static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
1145 {
1146         if (cb->nf < 2)
1147                 goto err;
1148         if (!strcmp(cb->f[1], "rd")) {
1149                 qhangup(cv->rq, "shutdown");
1150                 if (cv->p->shutdown)
1151                         cv->p->shutdown(cv, SHUT_RD);
1152         } else if (!strcmp(cb->f[1], "wr")) {
1153                 qhangup(cv->wq, "shutdown");
1154                 if (cv->p->shutdown)
1155                         cv->p->shutdown(cv, SHUT_WR);
1156         } else if (!strcmp(cb->f[1], "rdwr")) {
1157                 qhangup(cv->rq, "shutdown");
1158                 qhangup(cv->wq, "shutdown");
1159                 if (cv->p->shutdown)
1160                         cv->p->shutdown(cv, SHUT_RDWR);
1161         } else {
1162                 goto err;
1163         }
1164         return;
1165 err:
1166         error(EINVAL, "shutdown [rx|tx|rxtx]");
1167 }
1168
1169 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1170 {
1171         if (cb->nf < 2)
1172                 c->tos = 0;
1173         else
1174                 c->tos = atoi(cb->f[1]);
1175 }
1176
1177 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1178 {
1179         if (cb->nf < 2)
1180                 c->ttl = MAXTTL;
1181         else
1182                 c->ttl = atoi(cb->f[1]);
1183 }
1184
1185 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1186 {
1187         ERRSTACK(1);
1188         struct conv *c;
1189         struct Proto *x;
1190         char *p;
1191         struct cmdbuf *cb;
1192         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1193         struct Fs *f;
1194         char *a;
1195
1196         a = v;
1197         f = ipfs[ch->dev];
1198
1199         switch (TYPE(ch->qid)) {
1200                 default:
1201                         error(EPERM, ERROR_FIXME);
1202                 case Qdata:
1203                         x = f->p[PROTO(ch->qid)];
1204                         c = x->conv[CONV(ch->qid)];
1205                         if (ch->flag & O_NONBLOCK)
1206                                 qwrite_nonblock(c->wq, a, n);
1207                         else
1208                                 qwrite(c->wq, a, n);
1209                         break;
1210                 case Qarp:
1211                         return arpwrite(f, a, n);
1212                 case Qiproute:
1213                         return routewrite(f, ch, a, n);
1214                 case Qlog:
1215                         netlogctl(f, a, n);
1216                         return n;
1217                 case Qndb:
1218                         return ndbwrite(f, a, off, n);
1219                 case Qctl:
1220                         x = f->p[PROTO(ch->qid)];
1221                         c = x->conv[CONV(ch->qid)];
1222                         cb = parsecmd(a, n);
1223
1224                         qlock(&c->qlock);
1225                         if (waserror()) {
1226                                 qunlock(&c->qlock);
1227                                 kfree(cb);
1228                                 nexterror();
1229                         }
1230                         if (cb->nf < 1)
1231                                 error(EFAIL, "short control request");
1232                         if (strcmp(cb->f[0], "connect") == 0)
1233                                 connectctlmsg(x, c, cb);
1234                         else if (strcmp(cb->f[0], "announce") == 0)
1235                                 announcectlmsg(x, c, cb);
1236                         else if (strcmp(cb->f[0], "bind") == 0)
1237                                 bindctlmsg(x, c, cb);
1238                         else if (strcmp(cb->f[0], "shutdown") == 0)
1239                                 shutdownctlmsg(c, cb);
1240                         else if (strcmp(cb->f[0], "ttl") == 0)
1241                                 ttlctlmsg(c, cb);
1242                         else if (strcmp(cb->f[0], "tos") == 0)
1243                                 tosctlmsg(c, cb);
1244                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1245                                 c->ignoreadvice = 1;
1246                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1247                                 if (cb->nf < 2)
1248                                         error(EFAIL, "addmulti needs interface address");
1249                                 if (cb->nf == 2) {
1250                                         if (!ipismulticast(c->raddr))
1251                                                 error(EFAIL, "addmulti for a non multicast address");
1252                                         parseip(ia, cb->f[1]);
1253                                         ipifcaddmulti(c, c->raddr, ia);
1254                                 } else {
1255                                         parseip(ma, cb->f[2]);
1256                                         if (!ipismulticast(ma))
1257                                                 error(EFAIL, "addmulti for a non multicast address");
1258                                         parseip(ia, cb->f[1]);
1259                                         ipifcaddmulti(c, ma, ia);
1260                                 }
1261                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1262                                 if (cb->nf < 2)
1263                                         error(EFAIL, "remmulti needs interface address");
1264                                 if (!ipismulticast(c->raddr))
1265                                         error(EFAIL, "remmulti for a non multicast address");
1266                                 parseip(ia, cb->f[1]);
1267                                 ipifcremmulti(c, c->raddr, ia);
1268                         } else if (x->ctl != NULL) {
1269                                 x->ctl(c, cb->f, cb->nf);
1270                         } else
1271                                 error(EFAIL, "unknown control request");
1272                         qunlock(&c->qlock);
1273                         kfree(cb);
1274                         poperror();
1275         }
1276         return n;
1277 }
1278
1279 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1280 {
1281         struct conv *c;
1282         int n;
1283
1284         switch (TYPE(ch->qid)) {
1285                 case Qdata:
1286                         c = chan2conv(ch);
1287                         if (bp->next)
1288                                 bp = concatblock(bp);
1289                         n = BLEN(bp);
1290                         if (ch->flag & O_NONBLOCK)
1291                                 qbwrite_nonblock(c->wq, bp);
1292                         else
1293                                 qbwrite(c->wq, bp);
1294                         return n;
1295                 default:
1296                         return devbwrite(ch, bp, offset);
1297         }
1298 }
1299
1300 static void ip_wake_cb(struct queue *q, void *data, int filter)
1301 {
1302         struct conv *conv = (struct conv*)data;
1303         struct fd_tap *tap_i;
1304         /* For these two, we want to ignore events on the opposite end of the
1305          * queues.  For instance, we want to know when the WQ is writable.  Our
1306          * writes will actually make it readable - we don't want to trigger a tap
1307          * for that.  However, qio doesn't know how/why we are using a queue, or
1308          * even who the ends are (hence the callbacks) */
1309         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1310                 return;
1311         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1312                 return;
1313         /* At this point, we have an event we want to send to our taps (if any).
1314          * The lock protects list integrity and the existence of the tap.
1315          *
1316          * Previously, I thought of using the conv qlock.  That actually breaks, due
1317          * to weird usages of the qlock (someone holds it for a long time, blocking
1318          * the inbound wakeup from etherread4).
1319          *
1320          * I opted for a spinlock for a couple reasons:
1321          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1322          * send_event).
1323          * - our callers might not want to block.  A lot of network wakeups will
1324          * come network processes (etherread4) or otherwise unrelated to this
1325          * particular conversation.  I'd rather do something like fire off a KMSG
1326          * than block those.
1327          * - if fire_tap takes a while, holding the lock only slows down other
1328          * events on this *same* conversation, or other tap registration.  not a
1329          * huge deal. */
1330         spin_lock(&conv->tap_lock);
1331         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1332                 fire_tap(tap_i, filter);
1333         spin_unlock(&conv->tap_lock);
1334 }
1335
1336 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1337 {
1338         struct conv *conv = chan2conv(chan);
1339         int ret;
1340
1341         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1342                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1343                                        FDTAP_FILT_ERROR)
1344         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1345
1346         switch (TYPE(chan->qid)) {
1347                 case Qdata:
1348                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1349                                 set_errno(ENOSYS);
1350                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1351                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1352                                 return -1;
1353                         }
1354                         spin_lock(&conv->tap_lock);
1355                         switch (cmd) {
1356                                 case (FDTAP_CMD_ADD):
1357                                         if (SLIST_EMPTY(&conv->data_taps)) {
1358                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1359                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1360                                         }
1361                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1362                                         ret = 0;
1363                                         break;
1364                                 case (FDTAP_CMD_REM):
1365                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1366                                         if (SLIST_EMPTY(&conv->data_taps)) {
1367                                                 qio_set_wake_cb(conv->rq, 0, conv);
1368                                                 qio_set_wake_cb(conv->wq, 0, conv);
1369                                         }
1370                                         ret = 0;
1371                                         break;
1372                                 default:
1373                                         set_errno(ENOSYS);
1374                                         set_errstr("Unsupported #%s data tap command %p",
1375                                                    devname(), cmd);
1376                                         ret = -1;
1377                         }
1378                         spin_unlock(&conv->tap_lock);
1379                         return ret;
1380                 case Qlisten:
1381                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1382                                 set_errno(ENOSYS);
1383                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1384                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1385                                 return -1;
1386                         }
1387                         spin_lock(&conv->tap_lock);
1388                         switch (cmd) {
1389                                 case (FDTAP_CMD_ADD):
1390                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1391                                         ret = 0;
1392                                         break;
1393                                 case (FDTAP_CMD_REM):
1394                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1395                                         ret = 0;
1396                                         break;
1397                                 default:
1398                                         set_errno(ENOSYS);
1399                                         set_errstr("Unsupported #%s listen tap command %p",
1400                                                    devname(), cmd);
1401                                         ret = -1;
1402                         }
1403                         spin_unlock(&conv->tap_lock);
1404                         return ret;
1405                 default:
1406                         set_errno(ENOSYS);
1407                         set_errstr("Can't tap #%s file type %d", devname(),
1408                                    TYPE(chan->qid));
1409                         return -1;
1410         }
1411 }
1412
1413 struct dev ipdevtab __devtab = {
1414         .name = "ip",
1415
1416         .reset = ipreset,
1417         .init = ipinit,
1418         .shutdown = devshutdown,
1419         .attach = ipattach,
1420         .walk = ipwalk,
1421         .stat = ipstat,
1422         .open = ipopen,
1423         .create = devcreate,
1424         .close = ipclose,
1425         .read = ipread,
1426         .bread = ipbread,
1427         .write = ipwrite,
1428         .bwrite = ipbwrite,
1429         .remove = devremove,
1430         .wstat = ipwstat,
1431         .power = devpower,
1432         .chaninfo = ipchaninfo,
1433         .tapfd = iptapfd,
1434 };
1435
1436 int Fsproto(struct Fs *f, struct Proto *p)
1437 {
1438         if (f->np >= Maxproto)
1439                 return -1;
1440
1441         qlock_init(&p->qlock);
1442         p->f = f;
1443
1444         if (p->ipproto > 0) {
1445                 if (f->t2p[p->ipproto] != NULL)
1446                         return -1;
1447                 f->t2p[p->ipproto] = p;
1448         }
1449
1450         p->qid.type = QTDIR;
1451         p->qid.path = QID(f->np, 0, Qprotodir);
1452         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1453         if (p->conv == NULL)
1454                 panic("Fsproto");
1455
1456         p->x = f->np;
1457         p->nextport = 0;
1458         p->nextrport = 600;
1459         f->p[f->np++] = p;
1460
1461         return 0;
1462 }
1463
1464 /*
1465  *  return true if this protocol is
1466  *  built in
1467  */
1468 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1469 {
1470         return f->t2p[proto] != NULL;
1471 }
1472
1473 /*
1474  *  called with protocol locked
1475  */
1476 struct conv *Fsprotoclone(struct Proto *p, char *user)
1477 {
1478         struct conv *c, **pp, **ep;
1479
1480 retry:
1481         c = NULL;
1482         ep = &p->conv[p->nc];
1483         for (pp = p->conv; pp < ep; pp++) {
1484                 c = *pp;
1485                 if (c == NULL) {
1486                         c = kzmalloc(sizeof(struct conv), 0);
1487                         if (c == NULL)
1488                                 error(ENOMEM, ERROR_FIXME);
1489                         qlock_init(&c->qlock);
1490                         qlock_init(&c->listenq);
1491                         rendez_init(&c->cr);
1492                         rendez_init(&c->listenr);
1493                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1494                         SLIST_INIT(&c->listen_taps);
1495                         spinlock_init(&c->tap_lock);
1496                         qlock(&c->qlock);
1497                         c->p = p;
1498                         c->x = pp - p->conv;
1499                         if (p->ptclsize != 0) {
1500                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1501                                 if (c->ptcl == NULL) {
1502                                         kfree(c);
1503                                         error(ENOMEM, ERROR_FIXME);
1504                                 }
1505                         }
1506                         *pp = c;
1507                         p->ac++;
1508                         c->eq = qopen(1024, Qmsg, 0, 0);
1509                         (*p->create) (c);
1510                         assert(c->rq && c->wq);
1511                         break;
1512                 }
1513                 if (canqlock(&c->qlock)) {
1514                         /*
1515                          *  make sure both processes and protocol
1516                          *  are done with this Conv
1517                          */
1518                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1519                                 break;
1520
1521                         qunlock(&c->qlock);
1522                 }
1523         }
1524         if (pp >= ep) {
1525                 if (p->gc != NULL && (*p->gc) (p))
1526                         goto retry;
1527                 return NULL;
1528         }
1529
1530         c->inuse = 1;
1531         kstrdup(&c->owner, user);
1532         c->perm = 0660;
1533         c->state = Idle;
1534         ipmove(c->laddr, IPnoaddr);
1535         ipmove(c->raddr, IPnoaddr);
1536         c->r = NULL;
1537         c->rgen = 0;
1538         c->lport = 0;
1539         c->rport = 0;
1540         c->restricted = 0;
1541         c->ttl = MAXTTL;
1542         c->tos = DFLTTOS;
1543         qreopen(c->rq);
1544         qreopen(c->wq);
1545         qreopen(c->eq);
1546
1547         qunlock(&c->qlock);
1548         return c;
1549 }
1550
1551 int Fsconnected(struct conv *c, char *msg)
1552 {
1553         if (msg != NULL && *msg != '\0')
1554                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1555
1556         switch (c->state) {
1557                 case Announcing:
1558                         c->state = Announced;
1559                         break;
1560
1561                 case Connecting:
1562                         c->state = Connected;
1563                         break;
1564         }
1565
1566         rendez_wakeup(&c->cr);
1567         return 0;
1568 }
1569
1570 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1571 {
1572         if (f->ipmux)
1573                 return f->ipmux;
1574         else
1575                 return f->t2p[proto];
1576 }
1577
1578 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1579 {
1580         return f->t2p[proto];
1581 }
1582
1583 static void fire_listener_taps(struct conv *conv)
1584 {
1585         struct fd_tap *tap_i;
1586         if (SLIST_EMPTY(&conv->listen_taps))
1587                 return;
1588         spin_lock(&conv->tap_lock);
1589         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1590                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1591         spin_unlock(&conv->tap_lock);
1592 }
1593
1594 /*
1595  *  called with protocol locked
1596  */
1597 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1598                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1599 {
1600         struct conv *nc;
1601         struct conv **l;
1602         int i;
1603
1604         qlock(&c->qlock);
1605         i = 0;
1606         for (l = &c->incall; *l; l = &(*l)->next)
1607                 i++;
1608         if (i >= Maxincall) {
1609                 qunlock(&c->qlock);
1610                 return NULL;
1611         }
1612
1613         /* find a free conversation */
1614         nc = Fsprotoclone(c->p, network);
1615         if (nc == NULL) {
1616                 qunlock(&c->qlock);
1617                 return NULL;
1618         }
1619         ipmove(nc->raddr, raddr);
1620         nc->rport = rport;
1621         ipmove(nc->laddr, laddr);
1622         nc->lport = lport;
1623         nc->next = NULL;
1624         *l = nc;
1625         nc->state = Connected;
1626         nc->ipversion = version;
1627
1628         qunlock(&c->qlock);
1629
1630         rendez_wakeup(&c->listenr);
1631         fire_listener_taps(c);
1632
1633         return nc;
1634 }
1635
1636 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1637 {
1638         if (off > strlen(f->ndb))
1639                 error(EIO, ERROR_FIXME);
1640         if (off + n >= sizeof(f->ndb) - 1)
1641                 error(EIO, ERROR_FIXME);
1642         memmove(f->ndb + off, a, n);
1643         f->ndb[off + n] = 0;
1644         f->ndbvers++;
1645         f->ndbmtime = seconds();
1646         return n;
1647 }
1648
1649 uint32_t scalednconv(void)
1650 {
1651         //if(conf.npage*BY2PG >= 128*MB)
1652         return Nchans * 4;
1653         //  return Nchans;
1654 }