Report readablity/writablility via 9p stat
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qndb,
55         Qiproute,
56         Qiprouter,
57         Qipselftab,
58         Qlog,
59
60         Qprotodir,      /* directory for a protocol */
61         Qprotobase,
62         Qclone = Qprotobase,
63         Qstats,
64
65         Qconvdir,       /* directory for a conversation */
66         Qconvbase,
67         Qctl = Qconvbase,
68         Qdata,
69         Qerr,
70         Qlisten,
71         Qlocal,
72         Qremote,
73         Qstatus,
74         Qsnoop,
75
76         Logtype = 5,
77         Masktype = (1 << Logtype) - 1,
78         Logconv = 12,
79         Maskconv = (1 << Logconv) - 1,
80         Shiftconv = Logtype,
81         Logproto = 8,
82         Maskproto = (1 << Logproto) - 1,
83         Shiftproto = Logtype + Logconv,
84
85         Nfs = 32,
86 };
87 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
88 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
89 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
90 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
91 static char network[] = "network";
92
93 qlock_t fslock;
94 struct Fs *ipfs[Nfs];                   /* attached fs's */
95 struct queue *qlog;
96
97 extern void nullmediumlink(void);
98 extern void pktmediumlink(void);
99 extern char *eve;
100 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
101 static void closeconv(struct conv *);
102
103 static struct conv *chan2conv(struct chan *chan)
104 {
105         /* That's a lot of pointers to get to the conv! */
106         return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
107 }
108
109 static inline int founddevdir(struct chan *c, struct qid q, char *n,
110                                                           int64_t length, char *user, long perm,
111                                                           struct dir *db)
112 {
113         devdir(c, q, n, length, user, perm, db);
114         return 1;
115 }
116
117 static int topdirgen(struct chan *c, struct dir *dp)
118 {
119         struct qid q;
120         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
121         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
122         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
123 }
124
125
126 static int ip3gen(struct chan *c, int i, struct dir *dp)
127 {
128         struct qid q;
129         struct conv *cv;
130         char *p;
131         int perm;
132
133         cv = chan2conv(c);
134         if (cv->owner == NULL)
135                 kstrdup(&cv->owner, eve);
136         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
137
138         switch (i) {
139                 default:
140                         return -1;
141                 case Qctl:
142                         return founddevdir(c, q, "ctl", 0,
143                                                    cv->owner, cv->perm, dp);
144                 case Qdata:
145                         perm = cv->perm;
146                         perm |= qreadable(cv->rq) ? DMREADABLE : 0;
147                         perm |= qwritable(cv->wq) ? DMWRITABLE : 0;
148                         return founddevdir(c, q, "data", qlen(cv->rq),
149                                                            cv->owner, perm, dp);
150                 case Qerr:
151                         perm = cv->perm;
152                         perm |= qreadable(cv->eq) ? DMREADABLE : 0;
153                         return founddevdir(c, q, "err", qlen(cv->eq),
154                                                            cv->owner, perm, dp);
155                 case Qlisten:
156                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
157                 case Qlocal:
158                         p = "local";
159                         break;
160                 case Qremote:
161                         p = "remote";
162                         break;
163                 case Qsnoop:
164                         if (strcmp(cv->p->name, "ipifc") != 0)
165                                 return -1;
166                         perm = 0400;
167                         perm |= qreadable(cv->sq) ? DMREADABLE : 0;
168                         return founddevdir(c, q, "snoop", qlen(cv->sq),
169                                                            cv->owner, perm, dp);
170                 case Qstatus:
171                         p = "status";
172                         break;
173         }
174         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
175 }
176
177 static int ip2gen(struct chan *c, int i, struct dir *dp)
178 {
179         struct qid q;
180         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
181         switch (i) {
182                 case Qclone:
183                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
184                 case Qstats:
185                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
186         }
187         return -1;
188 }
189
190 static int ip1gen(struct chan *c, int i, struct dir *dp)
191 {
192         struct qid q;
193         char *p;
194         int prot;
195         int len = 0;
196         struct Fs *f;
197         extern uint32_t kerndate;
198
199         f = ipfs[c->dev];
200
201         prot = 0666;
202         mkqid(&q, QID(0, 0, i), 0, QTFILE);
203         switch (i) {
204                 default:
205                         return -1;
206                 case Qarp:
207                         p = "arp";
208                         break;
209                 case Qndb:
210                         p = "ndb";
211                         len = strlen(f->ndb);
212                         q.vers = f->ndbvers;
213                         break;
214                 case Qiproute:
215                         p = "iproute";
216                         break;
217                 case Qipselftab:
218                         p = "ipselftab";
219                         prot = 0444;
220                         break;
221                 case Qiprouter:
222                         p = "iprouter";
223                         break;
224                 case Qlog:
225                         p = "log";
226                         break;
227         }
228         devdir(c, q, p, len, network, prot, dp);
229         if (i == Qndb && f->ndbmtime > kerndate)
230                 dp->mtime = f->ndbmtime;
231         return 1;
232 }
233
234 static int
235 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
236           int s, struct dir *dp)
237 {
238         struct qid q;
239         struct conv *cv;
240         struct Fs *f;
241
242         f = ipfs[c->dev];
243
244         switch (TYPE(c->qid)) {
245                 case Qtopdir:
246                         if (s == DEVDOTDOT)
247                                 return topdirgen(c, dp);
248                         if (s < f->np) {
249                                 if (f->p[s]->connect == NULL)
250                                         return 0;       /* protocol with no user interface */
251                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
252                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
253                         }
254                         s -= f->np;
255                         return ip1gen(c, s + Qtopbase, dp);
256                 case Qarp:
257                 case Qndb:
258                 case Qlog:
259                 case Qiproute:
260                 case Qiprouter:
261                 case Qipselftab:
262                         return ip1gen(c, TYPE(c->qid), dp);
263                 case Qprotodir:
264                         if (s == DEVDOTDOT)
265                                 return topdirgen(c, dp);
266                         else if (s < f->p[PROTO(c->qid)]->ac) {
267                                 cv = f->p[PROTO(c->qid)]->conv[s];
268                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
269                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
270                                 return
271                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
272                         }
273                         s -= f->p[PROTO(c->qid)]->ac;
274                         return ip2gen(c, s + Qprotobase, dp);
275                 case Qclone:
276                 case Qstats:
277                         return ip2gen(c, TYPE(c->qid), dp);
278                 case Qconvdir:
279                         if (s == DEVDOTDOT) {
280                                 s = PROTO(c->qid);
281                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
282                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
283                                 return 1;
284                         }
285                         return ip3gen(c, s + Qconvbase, dp);
286                 case Qctl:
287                 case Qdata:
288                 case Qerr:
289                 case Qlisten:
290                 case Qlocal:
291                 case Qremote:
292                 case Qstatus:
293                 case Qsnoop:
294                         return ip3gen(c, TYPE(c->qid), dp);
295         }
296         return -1;
297 }
298
299 static void ipinit(void)
300 {
301         qlock_init(&fslock);
302         nullmediumlink();
303         pktmediumlink();
304 /* if only
305         fmtinstall('i', eipfmt);
306         fmtinstall('I', eipfmt);
307         fmtinstall('E', eipfmt);
308         fmtinstall('V', eipfmt);
309         fmtinstall('M', eipfmt);
310 */
311 }
312
313 static void ipreset(void)
314 {
315 }
316
317 static struct Fs *ipgetfs(int dev)
318 {
319         extern void (*ipprotoinit[]) (struct Fs *);
320         struct Fs *f;
321         int i;
322
323         if (dev >= Nfs)
324                 return NULL;
325
326         qlock(&fslock);
327         if (ipfs[dev] == NULL) {
328                 f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
329                 rwinit(&f->rwlock);
330                 qlock_init(&f->iprouter.qlock);
331                 ip_init(f);
332                 arpinit(f);
333                 netloginit(f);
334                 for (i = 0; ipprotoinit[i]; i++)
335                         ipprotoinit[i] (f);
336                 f->dev = dev;
337                 ipfs[dev] = f;
338         }
339         qunlock(&fslock);
340
341         return ipfs[dev];
342 }
343
344 struct IPaux *newipaux(char *owner, char *tag)
345 {
346         struct IPaux *a;
347         int n;
348
349         a = kzmalloc(sizeof(*a), 0);
350         kstrdup(&a->owner, owner);
351         memset(a->tag, ' ', sizeof(a->tag));
352         n = strlen(tag);
353         if (n > sizeof(a->tag))
354                 n = sizeof(a->tag);
355         memmove(a->tag, tag, n);
356         return a;
357 }
358
359 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
360
361 static struct chan *ipattach(char *spec)
362 {
363         struct chan *c;
364         int dev;
365
366         dev = atoi(spec);
367         if (dev >= Nfs)
368                 error(EFAIL, "bad specification");
369
370         ipgetfs(dev);
371         c = devattach(devname(), spec);
372         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
373         c->dev = dev;
374
375         c->aux = newipaux(commonuser(), "none");
376
377         return c;
378 }
379
380 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
381                                                           int nname)
382 {
383         struct IPaux *a = c->aux;
384         struct walkqid *w;
385
386         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
387         if (w != NULL && w->clone != NULL)
388                 w->clone->aux = newipaux(a->owner, a->tag);
389         return w;
390 }
391
392 static int ipstat(struct chan *c, uint8_t * db, int n)
393 {
394         return devstat(c, db, n, NULL, 0, ipgen);
395 }
396
397 static int should_wake(void *arg)
398 {
399         struct conv *cv = arg;
400         /* signal that the conv is closed */
401         if (qisclosed(cv->rq))
402                 return TRUE;
403         return cv->incall != NULL;
404 }
405
406 static struct chan *ipopen(struct chan *c, int omode)
407 {
408         ERRSTACK(2);
409         struct conv *cv, *nc;
410         struct Proto *p;
411         int perm;
412         struct Fs *f;
413
414         /* perm is a lone rwx, not the rwx------ from the conversion */
415         perm = omode_to_rwx(omode) >> 6;
416
417         f = ipfs[c->dev];
418
419         switch (TYPE(c->qid)) {
420                 default:
421                         break;
422                 case Qndb:
423                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
424                                 error(EPERM, ERROR_FIXME);
425                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
426                                 f->ndb[0] = 0;
427                         break;
428                 case Qlog:
429                         netlogopen(f);
430                         break;
431                 case Qiprouter:
432                         iprouteropen(f);
433                         break;
434                 case Qiproute:
435                         break;
436                 case Qtopdir:
437                 case Qprotodir:
438                 case Qconvdir:
439                 case Qstatus:
440                 case Qremote:
441                 case Qlocal:
442                 case Qstats:
443                 case Qipselftab:
444                         if (omode & O_WRITE)
445                                 error(EPERM, ERROR_FIXME);
446                         break;
447                 case Qsnoop:
448                         if (omode & O_WRITE)
449                                 error(EPERM, ERROR_FIXME);
450                         p = f->p[PROTO(c->qid)];
451                         cv = p->conv[CONV(c->qid)];
452                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
453                                 error(EPERM, ERROR_FIXME);
454                         atomic_inc(&cv->snoopers);
455                         break;
456                 case Qclone:
457                         p = f->p[PROTO(c->qid)];
458                         qlock(&p->qlock);
459                         if (waserror()) {
460                                 qunlock(&p->qlock);
461                                 nexterror();
462                         }
463                         cv = Fsprotoclone(p, ATTACHER(c));
464                         qunlock(&p->qlock);
465                         poperror();
466                         if (cv == NULL) {
467                                 error(ENODEV, ERROR_FIXME);
468                                 break;
469                         }
470                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
471                         break;
472                 case Qdata:
473                 case Qctl:
474                 case Qerr:
475                         p = f->p[PROTO(c->qid)];
476                         qlock(&p->qlock);
477                         cv = p->conv[CONV(c->qid)];
478                         qlock(&cv->qlock);
479                         if (waserror()) {
480                                 qunlock(&cv->qlock);
481                                 qunlock(&p->qlock);
482                                 nexterror();
483                         }
484                         if ((perm & (cv->perm >> 6)) != perm) {
485                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
486                                         error(EPERM, ERROR_FIXME);
487                                 if ((perm & cv->perm) != perm)
488                                         error(EPERM, ERROR_FIXME);
489
490                         }
491                         cv->inuse++;
492                         if (cv->inuse == 1) {
493                                 kstrdup(&cv->owner, ATTACHER(c));
494                                 cv->perm = 0660;
495                         }
496                         qunlock(&cv->qlock);
497                         qunlock(&p->qlock);
498                         poperror();
499                         break;
500                 case Qlisten:
501                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
502                         /* No permissions or Announce checks required.  We'll see if that's
503                          * a good idea or not. (the perm check would do nothing, as is,
504                          * since an O_PATH perm is 0).
505                          *
506                          * But we probably want to incref to keep the conversation around
507                          * until this FD/chan is closed.  #ip is a little weird in that
508                          * objects never really go away (high water mark for convs, you can
509                          * always find them in the ns).  I think it is possible to
510                          * namec/ipgen a chan, then have that conv close, then have that
511                          * chan be opened.  You can probably do this with a data file. */
512                         if (omode & O_PATH) {
513                                 qlock(&cv->qlock);
514                                 cv->inuse++;
515                                 qunlock(&cv->qlock);
516                                 break;
517                         }
518                         if ((perm & (cv->perm >> 6)) != perm) {
519                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
520                                         error(EPERM, ERROR_FIXME);
521                                 if ((perm & cv->perm) != perm)
522                                         error(EPERM, ERROR_FIXME);
523
524                         }
525
526                         if (cv->state != Announced)
527                                 error(EFAIL, "not announced");
528
529                         if (waserror()) {
530                                 closeconv(cv);
531                                 nexterror();
532                         }
533                         qlock(&cv->qlock);
534                         cv->inuse++;
535                         qunlock(&cv->qlock);
536
537                         nc = NULL;
538                         while (nc == NULL) {
539                                 /* give up if we got a hangup */
540                                 if (qisclosed(cv->rq))
541                                         error(EFAIL, "listen hungup");
542
543                                 qlock(&cv->listenq);
544                                 if (waserror()) {
545                                         qunlock(&cv->listenq);
546                                         nexterror();
547                                 }
548                                 /* we can peek at incall without grabbing the cv qlock.  if
549                                  * anything is there, it'll remain there until we dequeue it.
550                                  * no one else can, since we hold the listenq lock */
551                                 if ((c->flag & O_NONBLOCK) && !cv->incall)
552                                         error(EAGAIN, "listen queue empty");
553                                 /* wait for a connect */
554                                 rendez_sleep(&cv->listenr, should_wake, cv);
555
556                                 /* if there is a concurrent hangup, they will hold the qlock
557                                  * until the hangup is complete, including closing the cv->rq */
558                                 qlock(&cv->qlock);
559                                 nc = cv->incall;
560                                 if (nc != NULL) {
561                                         cv->incall = nc->next;
562                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
563                                         kstrdup(&cv->owner, ATTACHER(c));
564                                 }
565                                 qunlock(&cv->qlock);
566
567                                 qunlock(&cv->listenq);
568                                 poperror();
569                         }
570                         closeconv(cv);
571                         poperror();
572                         break;
573         }
574         c->mode = openmode(omode);
575         c->flag |= COPEN;
576         c->offset = 0;
577         return c;
578 }
579
580 static int ipwstat(struct chan *c, uint8_t * dp, int n)
581 {
582         ERRSTACK(2);
583         struct dir *d;
584         struct conv *cv;
585         struct Fs *f;
586         struct Proto *p;
587
588         f = ipfs[c->dev];
589         switch (TYPE(c->qid)) {
590                 default:
591                         error(EPERM, ERROR_FIXME);
592                         break;
593                 case Qctl:
594                 case Qdata:
595                         break;
596         }
597
598         d = kzmalloc(sizeof(*d) + n, 0);
599         if (waserror()) {
600                 kfree(d);
601                 nexterror();
602         }
603         n = convM2D(dp, n, d, (char *)&d[1]);
604         if (n == 0)
605                 error(ENODATA, ERROR_FIXME);
606         p = f->p[PROTO(c->qid)];
607         cv = p->conv[CONV(c->qid)];
608         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
609                 error(EPERM, ERROR_FIXME);
610         if (!emptystr(d->uid))
611                 kstrdup(&cv->owner, d->uid);
612         if (d->mode != ~0UL)
613                 cv->perm = d->mode & 0777;
614         poperror();
615         kfree(d);
616         return n;
617 }
618
619 /* Should be able to handle any file type chan. Feel free to extend it. */
620 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
621 {
622         struct conv *conv;
623         struct Proto *proto;
624         char *p;
625         struct Fs *f;
626
627         f = ipfs[ch->dev];
628
629         switch (TYPE(ch->qid)) {
630                 default:
631                         ret = "Unknown type";
632                         break;
633                 case Qdata:
634                         proto = f->p[PROTO(ch->qid)];
635                         conv = proto->conv[CONV(ch->qid)];
636                         snprintf(ret, ret_l, "Qdata, %s, proto %s, conv idx %d, rq len %d, wq len %d",
637                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
638                                  proto->name, conv->x, qlen(conv->rq), qlen(conv->wq));
639                         break;
640                 case Qarp:
641                         ret = "Qarp";
642                         break;
643                 case Qiproute:
644                         ret = "Qiproute";
645                         break;
646                 case Qlisten:
647                         proto = f->p[PROTO(ch->qid)];
648                         conv = proto->conv[CONV(ch->qid)];
649                         snprintf(ret, ret_l, "Qlisten, %s proto %s, conv idx %d",
650                                  SLIST_EMPTY(&conv->listen_taps) ? "untapped" : "tapped",
651                                  proto->name, conv->x);
652                         break;
653                 case Qlog:
654                         ret = "Qlog";
655                         break;
656                 case Qndb:
657                         ret = "Qndb";
658                         break;
659                 case Qctl:
660                         proto = f->p[PROTO(ch->qid)];
661                         conv = proto->conv[CONV(ch->qid)];
662                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
663                                          conv->x);
664                         break;
665         }
666         return ret;
667 }
668
669 static void closeconv(struct conv *cv)
670 {
671         struct conv *nc;
672         struct Ipmulti *mp;
673
674         qlock(&cv->qlock);
675
676         if (--cv->inuse > 0) {
677                 qunlock(&cv->qlock);
678                 return;
679         }
680
681         /* close all incoming calls since no listen will ever happen */
682         for (nc = cv->incall; nc; nc = cv->incall) {
683                 cv->incall = nc->next;
684                 closeconv(nc);
685         }
686         cv->incall = NULL;
687
688         kstrdup(&cv->owner, network);
689         cv->perm = 0660;
690
691         while ((mp = cv->multi) != NULL)
692                 ipifcremmulti(cv, mp->ma, mp->ia);
693
694         cv->r = NULL;
695         cv->rgen = 0;
696         cv->p->close(cv);
697         cv->state = Idle;
698         qunlock(&cv->qlock);
699 }
700
701 static void ipclose(struct chan *c)
702 {
703         struct Fs *f;
704
705         f = ipfs[c->dev];
706         switch (TYPE(c->qid)) {
707                 default:
708                         break;
709                 case Qlog:
710                         if (c->flag & COPEN)
711                                 netlogclose(f);
712                         break;
713                 case Qiprouter:
714                         if (c->flag & COPEN)
715                                 iprouterclose(f);
716                         break;
717                 case Qdata:
718                 case Qctl:
719                 case Qerr:
720                 case Qlisten:
721                         if (c->flag & COPEN)
722                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
723                         break;
724                 case Qsnoop:
725                         if (c->flag & COPEN)
726                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
727                         break;
728         }
729         kfree(((struct IPaux *)c->aux)->owner);
730         kfree(c->aux);
731 }
732
733 enum {
734         Statelen = 32 * 1024,
735 };
736
737 static long ipread(struct chan *ch, void *a, long n, int64_t off)
738 {
739         struct conv *c;
740         struct Proto *x;
741         char *buf, *p;
742         long rv;
743         struct Fs *f;
744         uint32_t offset = off;
745         size_t sofar;
746
747         f = ipfs[ch->dev];
748
749         p = a;
750         switch (TYPE(ch->qid)) {
751                 default:
752                         error(EPERM, ERROR_FIXME);
753                 case Qtopdir:
754                 case Qprotodir:
755                 case Qconvdir:
756                         return devdirread(ch, a, n, 0, 0, ipgen);
757                 case Qarp:
758                         return arpread(f->arp, a, offset, n);
759                 case Qndb:
760                         return readstr(offset, a, n, f->ndb);
761                 case Qiproute:
762                         return routeread(f, a, offset, n);
763                 case Qiprouter:
764                         return iprouterread(f, a, n);
765                 case Qipselftab:
766                         return ipselftabread(f, a, offset, n);
767                 case Qlog:
768                         return netlogread(f, a, offset, n);
769                 case Qctl:
770                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
771                         return readstr(offset, p, n, get_cur_genbuf());
772                 case Qremote:
773                         buf = kzmalloc(Statelen, 0);
774                         x = f->p[PROTO(ch->qid)];
775                         c = x->conv[CONV(ch->qid)];
776                         if (x->remote == NULL) {
777                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
778                         } else {
779                                 (*x->remote) (c, buf, Statelen - 2);
780                         }
781                         rv = readstr(offset, p, n, buf);
782                         kfree(buf);
783                         return rv;
784                 case Qlocal:
785                         buf = kzmalloc(Statelen, 0);
786                         x = f->p[PROTO(ch->qid)];
787                         c = x->conv[CONV(ch->qid)];
788                         if (x->local == NULL) {
789                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
790                         } else {
791                                 (*x->local) (c, buf, Statelen - 2);
792                         }
793                         rv = readstr(offset, p, n, buf);
794                         kfree(buf);
795                         return rv;
796                 case Qstatus:
797                         /* this all is a bit screwed up since the size of some state's
798                          * buffers will change from one invocation to another.  a reader
799                          * will come in and read the entire buffer.  then it will come again
800                          * and read from the next offset, expecting EOF.  if the buffer
801                          * changed sizes, it'll reprint the end of the buffer slightly. */
802                         buf = kzmalloc(Statelen, 0);
803                         x = f->p[PROTO(ch->qid)];
804                         c = x->conv[CONV(ch->qid)];
805                         sofar = (*x->state) (c, buf, Statelen - 2);
806                         rv = readstr(offset, p, n, buf);
807                         kfree(buf);
808                         return rv;
809                 case Qdata:
810                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
811                         if (ch->flag & O_NONBLOCK)
812                                 return qread_nonblock(c->rq, a, n);
813                         else
814                                 return qread(c->rq, a, n);
815                 case Qerr:
816                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
817                         return qread(c->eq, a, n);
818                 case Qsnoop:
819                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
820                         return qread(c->sq, a, n);
821                 case Qstats:
822                         x = f->p[PROTO(ch->qid)];
823                         if (x->stats == NULL)
824                                 error(EFAIL, "stats not implemented");
825                         buf = kzmalloc(Statelen, 0);
826                         (*x->stats) (x, buf, Statelen);
827                         rv = readstr(offset, p, n, buf);
828                         kfree(buf);
829                         return rv;
830         }
831 }
832
833 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
834 {
835         struct conv *c;
836
837         switch (TYPE(ch->qid)) {
838                 case Qdata:
839                         c = chan2conv(ch);
840                         if (ch->flag & O_NONBLOCK)
841                                 return qbread_nonblock(c->rq, n);
842                         else
843                                 return qbread(c->rq, n);
844                 default:
845                         return devbread(ch, n, offset);
846         }
847 }
848
849 /*
850  *  set local address to be that of the ifc closest to remote address
851  */
852 static void setladdr(struct conv *c)
853 {
854         findlocalip(c->p->f, c->laddr, c->raddr);
855 }
856
857 /*
858  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
859  */
860 static void setluniqueport(struct conv *c, int lport)
861 {
862         struct Proto *p;
863         struct conv *xp;
864         int x;
865
866         p = c->p;
867
868         qlock(&p->qlock);
869         for (x = 0; x < p->nc; x++) {
870                 xp = p->conv[x];
871                 if (xp == NULL)
872                         break;
873                 if (xp == c)
874                         continue;
875                 if ((xp->state == Connected || xp->state == Announced)
876                         && xp->lport == lport
877                         && xp->rport == c->rport
878                         && ipcmp(xp->raddr, c->raddr) == 0
879                         && ipcmp(xp->laddr, c->laddr) == 0) {
880                         qunlock(&p->qlock);
881                         error(EFAIL, "address in use");
882                 }
883         }
884         c->lport = lport;
885         qunlock(&p->qlock);
886 }
887
888 /*
889  *  pick a local port and set it
890  */
891 static void setlport(struct conv *c)
892 {
893         struct Proto *p;
894         uint16_t *pp;
895         int x, found;
896
897         p = c->p;
898         if (c->restricted)
899                 pp = &p->nextrport;
900         else
901                 pp = &p->nextport;
902         qlock(&p->qlock);
903         for (;; (*pp)++) {
904                 /*
905                  * Fsproto initialises p->nextport to 0 and the restricted
906                  * ports (p->nextrport) to 600.
907                  * Restricted ports must lie between 600 and 1024.
908                  * For the initial condition or if the unrestricted port number
909                  * has wrapped round, select a random port between 5000 and 1<<15
910                  * to start at.
911                  */
912                 if (c->restricted) {
913                         if (*pp >= 1024)
914                                 *pp = 600;
915                 } else
916                         while (*pp < 5000)
917                                 urandom_read(pp, sizeof(*pp));
918
919                 found = 0;
920                 for (x = 0; x < p->nc; x++) {
921                         if (p->conv[x] == NULL)
922                                 break;
923                         if (p->conv[x]->lport == *pp) {
924                                 found = 1;
925                                 break;
926                         }
927                 }
928                 if (!found)
929                         break;
930         }
931         c->lport = (*pp)++;
932         qunlock(&p->qlock);
933 }
934
935 /*
936  *  set a local address and port from a string of the form
937  *      [address!]port[!r]
938  */
939 static void setladdrport(struct conv *c, char *str, int announcing)
940 {
941         char *p;
942         uint16_t lport;
943         uint8_t addr[IPaddrlen];
944
945         /*
946          *  ignore restricted part if it exists.  it's
947          *  meaningless on local ports.
948          */
949         p = strchr(str, '!');
950         if (p != NULL) {
951                 *p++ = 0;
952                 if (strcmp(p, "r") == 0)
953                         p = NULL;
954         }
955
956         c->lport = 0;
957         if (p == NULL) {
958                 if (announcing)
959                         ipmove(c->laddr, IPnoaddr);
960                 else
961                         setladdr(c);
962                 p = str;
963         } else {
964                 if (strcmp(str, "*") == 0)
965                         ipmove(c->laddr, IPnoaddr);
966                 else {
967                         parseip(addr, str);
968                         if (ipforme(c->p->f, addr))
969                                 ipmove(c->laddr, addr);
970                         else
971                                 error(EFAIL, "not a local IP address");
972                 }
973         }
974
975         /* one process can get all connections */
976         if (announcing && strcmp(p, "*") == 0) {
977                 if (!iseve())
978                         error(EPERM, ERROR_FIXME);
979                 setluniqueport(c, 0);
980         }
981
982         lport = atoi(p);
983         if (lport <= 0)
984                 setlport(c);
985         else
986                 setluniqueport(c, lport);
987 }
988
989 static void setraddrport(struct conv *c, char *str)
990 {
991         char *p;
992
993         p = strchr(str, '!');
994         if (p == NULL)
995                 error(EFAIL, "malformed address");
996         *p++ = 0;
997         parseip(c->raddr, str);
998         c->rport = atoi(p);
999         p = strchr(p, '!');
1000         if (p) {
1001                 if (strstr(p, "!r") != NULL)
1002                         c->restricted = 1;
1003         }
1004 }
1005
1006 /*
1007  *  called by protocol connect routine to set addresses
1008  */
1009 void Fsstdconnect(struct conv *c, char *argv[], int argc)
1010 {
1011         switch (argc) {
1012                 default:
1013                         error(EINVAL, "bad args to %s", __func__);
1014                 case 2:
1015                         setraddrport(c, argv[1]);
1016                         setladdr(c);
1017                         setlport(c);
1018                         break;
1019                 case 3:
1020                         setraddrport(c, argv[1]);
1021                         setladdrport(c, argv[2], 0);
1022                         break;
1023         }
1024
1025         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1026                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1027                 || ipcmp(c->raddr, IPnoaddr) == 0)
1028                 c->ipversion = V4;
1029         else
1030                 c->ipversion = V6;
1031 }
1032
1033 /*
1034  *  initiate connection and sleep till its set up
1035  */
1036 static int connected(void *a)
1037 {
1038         return ((struct conv *)a)->state == Connected;
1039 }
1040
1041 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1042 {
1043         ERRSTACK(1);
1044         char *p;
1045
1046         if (c->state != 0)
1047                 error(EBUSY, ERROR_FIXME);
1048         c->state = Connecting;
1049         c->cerr[0] = '\0';
1050         if (x->connect == NULL)
1051                 error(EFAIL, "connect not supported");
1052         x->connect(c, cb->f, cb->nf);
1053
1054         qunlock(&c->qlock);
1055         if (waserror()) {
1056                 qlock(&c->qlock);
1057                 nexterror();
1058         }
1059         rendez_sleep(&c->cr, connected, c);
1060         qlock(&c->qlock);
1061         poperror();
1062
1063         if (c->cerr[0] != '\0')
1064                 error(EFAIL, c->cerr);
1065 }
1066
1067 /*
1068  *  called by protocol announce routine to set addresses
1069  */
1070 void Fsstdannounce(struct conv *c, char *argv[], int argc)
1071 {
1072         memset(c->raddr, 0, sizeof(c->raddr));
1073         c->rport = 0;
1074         switch (argc) {
1075                 default:
1076                         error(EINVAL, "bad args to announce");
1077                 case 2:
1078                         setladdrport(c, argv[1], 1);
1079                         break;
1080         }
1081 }
1082
1083 /*
1084  *  initiate announcement and sleep till its set up
1085  */
1086 static int announced(void *a)
1087 {
1088         return ((struct conv *)a)->state == Announced;
1089 }
1090
1091 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1092 {
1093         ERRSTACK(1);
1094         char *p;
1095
1096         if (c->state != 0)
1097                 error(EBUSY, ERROR_FIXME);
1098         c->state = Announcing;
1099         c->cerr[0] = '\0';
1100         if (x->announce == NULL)
1101                 error(EFAIL, "announce not supported");
1102         x->announce(c, cb->f, cb->nf);
1103
1104         qunlock(&c->qlock);
1105         if (waserror()) {
1106                 qlock(&c->qlock);
1107                 nexterror();
1108         }
1109         rendez_sleep(&c->cr, announced, c);
1110         qlock(&c->qlock);
1111         poperror();
1112
1113         if (c->cerr[0] != '\0')
1114                 error(EFAIL, c->cerr);
1115 }
1116
1117 /*
1118  *  called by protocol bind routine to set addresses
1119  */
1120 void Fsstdbind(struct conv *c, char *argv[], int argc)
1121 {
1122         switch (argc) {
1123                 default:
1124                         error(EINVAL, "bad args to bind");
1125                 case 2:
1126                         setladdrport(c, argv[1], 0);
1127                         break;
1128         }
1129 }
1130
1131 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1132 {
1133         if (x->bind == NULL)
1134                 Fsstdbind(c, cb->f, cb->nf);
1135         else
1136                 x->bind(c, cb->f, cb->nf);
1137 }
1138
1139 static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
1140 {
1141         if (cb->nf < 2)
1142                 goto err;
1143         if (!strcmp(cb->f[1], "rd")) {
1144                 qhangup(cv->rq, "shutdown");
1145                 if (cv->p->shutdown)
1146                         cv->p->shutdown(cv, SHUT_RD);
1147         } else if (!strcmp(cb->f[1], "wr")) {
1148                 qhangup(cv->wq, "shutdown");
1149                 if (cv->p->shutdown)
1150                         cv->p->shutdown(cv, SHUT_WR);
1151         } else if (!strcmp(cb->f[1], "rdwr")) {
1152                 qhangup(cv->rq, "shutdown");
1153                 qhangup(cv->wq, "shutdown");
1154                 if (cv->p->shutdown)
1155                         cv->p->shutdown(cv, SHUT_RDWR);
1156         } else {
1157                 goto err;
1158         }
1159         return;
1160 err:
1161         error(EINVAL, "shutdown [rx|tx|rxtx]");
1162 }
1163
1164 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1165 {
1166         if (cb->nf < 2)
1167                 c->tos = 0;
1168         else
1169                 c->tos = atoi(cb->f[1]);
1170 }
1171
1172 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1173 {
1174         if (cb->nf < 2)
1175                 c->ttl = MAXTTL;
1176         else
1177                 c->ttl = atoi(cb->f[1]);
1178 }
1179
1180 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1181 {
1182         ERRSTACK(1);
1183         struct conv *c;
1184         struct Proto *x;
1185         char *p;
1186         struct cmdbuf *cb;
1187         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1188         struct Fs *f;
1189         char *a;
1190
1191         a = v;
1192         f = ipfs[ch->dev];
1193
1194         switch (TYPE(ch->qid)) {
1195                 default:
1196                         error(EPERM, ERROR_FIXME);
1197                 case Qdata:
1198                         x = f->p[PROTO(ch->qid)];
1199                         c = x->conv[CONV(ch->qid)];
1200                         if (ch->flag & O_NONBLOCK)
1201                                 qwrite_nonblock(c->wq, a, n);
1202                         else
1203                                 qwrite(c->wq, a, n);
1204                         break;
1205                 case Qarp:
1206                         return arpwrite(f, a, n);
1207                 case Qiproute:
1208                         return routewrite(f, ch, a, n);
1209                 case Qlog:
1210                         netlogctl(f, a, n);
1211                         return n;
1212                 case Qndb:
1213                         return ndbwrite(f, a, off, n);
1214                 case Qctl:
1215                         x = f->p[PROTO(ch->qid)];
1216                         c = x->conv[CONV(ch->qid)];
1217                         cb = parsecmd(a, n);
1218
1219                         qlock(&c->qlock);
1220                         if (waserror()) {
1221                                 qunlock(&c->qlock);
1222                                 kfree(cb);
1223                                 nexterror();
1224                         }
1225                         if (cb->nf < 1)
1226                                 error(EFAIL, "short control request");
1227                         if (strcmp(cb->f[0], "connect") == 0)
1228                                 connectctlmsg(x, c, cb);
1229                         else if (strcmp(cb->f[0], "announce") == 0)
1230                                 announcectlmsg(x, c, cb);
1231                         else if (strcmp(cb->f[0], "bind") == 0)
1232                                 bindctlmsg(x, c, cb);
1233                         else if (strcmp(cb->f[0], "shutdown") == 0)
1234                                 shutdownctlmsg(c, cb);
1235                         else if (strcmp(cb->f[0], "ttl") == 0)
1236                                 ttlctlmsg(c, cb);
1237                         else if (strcmp(cb->f[0], "tos") == 0)
1238                                 tosctlmsg(c, cb);
1239                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1240                                 c->ignoreadvice = 1;
1241                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1242                                 if (cb->nf < 2)
1243                                         error(EFAIL, "addmulti needs interface address");
1244                                 if (cb->nf == 2) {
1245                                         if (!ipismulticast(c->raddr))
1246                                                 error(EFAIL, "addmulti for a non multicast address");
1247                                         parseip(ia, cb->f[1]);
1248                                         ipifcaddmulti(c, c->raddr, ia);
1249                                 } else {
1250                                         parseip(ma, cb->f[2]);
1251                                         if (!ipismulticast(ma))
1252                                                 error(EFAIL, "addmulti for a non multicast address");
1253                                         parseip(ia, cb->f[1]);
1254                                         ipifcaddmulti(c, ma, ia);
1255                                 }
1256                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1257                                 if (cb->nf < 2)
1258                                         error(EFAIL, "remmulti needs interface address");
1259                                 if (!ipismulticast(c->raddr))
1260                                         error(EFAIL, "remmulti for a non multicast address");
1261                                 parseip(ia, cb->f[1]);
1262                                 ipifcremmulti(c, c->raddr, ia);
1263                         } else if (x->ctl != NULL) {
1264                                 x->ctl(c, cb->f, cb->nf);
1265                         } else
1266                                 error(EFAIL, "unknown control request");
1267                         qunlock(&c->qlock);
1268                         kfree(cb);
1269                         poperror();
1270         }
1271         return n;
1272 }
1273
1274 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1275 {
1276         struct conv *c;
1277         int n;
1278
1279         switch (TYPE(ch->qid)) {
1280                 case Qdata:
1281                         c = chan2conv(ch);
1282                         if (bp->next)
1283                                 bp = concatblock(bp);
1284                         n = BLEN(bp);
1285                         if (ch->flag & O_NONBLOCK)
1286                                 qbwrite_nonblock(c->wq, bp);
1287                         else
1288                                 qbwrite(c->wq, bp);
1289                         return n;
1290                 default:
1291                         return devbwrite(ch, bp, offset);
1292         }
1293 }
1294
1295 static void ip_wake_cb(struct queue *q, void *data, int filter)
1296 {
1297         struct conv *conv = (struct conv*)data;
1298         struct fd_tap *tap_i;
1299         /* For these two, we want to ignore events on the opposite end of the
1300          * queues.  For instance, we want to know when the WQ is writable.  Our
1301          * writes will actually make it readable - we don't want to trigger a tap
1302          * for that.  However, qio doesn't know how/why we are using a queue, or
1303          * even who the ends are (hence the callbacks) */
1304         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1305                 return;
1306         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1307                 return;
1308         /* At this point, we have an event we want to send to our taps (if any).
1309          * The lock protects list integrity and the existence of the tap.
1310          *
1311          * Previously, I thought of using the conv qlock.  That actually breaks, due
1312          * to weird usages of the qlock (someone holds it for a long time, blocking
1313          * the inbound wakeup from etherread4).
1314          *
1315          * I opted for a spinlock for a couple reasons:
1316          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1317          * send_event).
1318          * - our callers might not want to block.  A lot of network wakeups will
1319          * come network processes (etherread4) or otherwise unrelated to this
1320          * particular conversation.  I'd rather do something like fire off a KMSG
1321          * than block those.
1322          * - if fire_tap takes a while, holding the lock only slows down other
1323          * events on this *same* conversation, or other tap registration.  not a
1324          * huge deal. */
1325         spin_lock(&conv->tap_lock);
1326         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1327                 fire_tap(tap_i, filter);
1328         spin_unlock(&conv->tap_lock);
1329 }
1330
1331 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1332 {
1333         struct conv *conv = chan2conv(chan);
1334         int ret;
1335
1336         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1337                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1338                                        FDTAP_FILT_ERROR)
1339         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1340
1341         switch (TYPE(chan->qid)) {
1342                 case Qdata:
1343                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1344                                 set_errno(ENOSYS);
1345                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1346                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1347                                 return -1;
1348                         }
1349                         spin_lock(&conv->tap_lock);
1350                         switch (cmd) {
1351                                 case (FDTAP_CMD_ADD):
1352                                         if (SLIST_EMPTY(&conv->data_taps)) {
1353                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1354                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1355                                         }
1356                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1357                                         ret = 0;
1358                                         break;
1359                                 case (FDTAP_CMD_REM):
1360                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1361                                         if (SLIST_EMPTY(&conv->data_taps)) {
1362                                                 qio_set_wake_cb(conv->rq, 0, conv);
1363                                                 qio_set_wake_cb(conv->wq, 0, conv);
1364                                         }
1365                                         ret = 0;
1366                                         break;
1367                                 default:
1368                                         set_errno(ENOSYS);
1369                                         set_errstr("Unsupported #%s data tap command %p",
1370                                                    devname(), cmd);
1371                                         ret = -1;
1372                         }
1373                         spin_unlock(&conv->tap_lock);
1374                         return ret;
1375                 case Qlisten:
1376                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1377                                 set_errno(ENOSYS);
1378                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1379                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1380                                 return -1;
1381                         }
1382                         spin_lock(&conv->tap_lock);
1383                         switch (cmd) {
1384                                 case (FDTAP_CMD_ADD):
1385                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1386                                         ret = 0;
1387                                         break;
1388                                 case (FDTAP_CMD_REM):
1389                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1390                                         ret = 0;
1391                                         break;
1392                                 default:
1393                                         set_errno(ENOSYS);
1394                                         set_errstr("Unsupported #%s listen tap command %p",
1395                                                    devname(), cmd);
1396                                         ret = -1;
1397                         }
1398                         spin_unlock(&conv->tap_lock);
1399                         return ret;
1400                 default:
1401                         set_errno(ENOSYS);
1402                         set_errstr("Can't tap #%s file type %d", devname(),
1403                                    TYPE(chan->qid));
1404                         return -1;
1405         }
1406 }
1407
1408 struct dev ipdevtab __devtab = {
1409         .name = "ip",
1410
1411         .reset = ipreset,
1412         .init = ipinit,
1413         .shutdown = devshutdown,
1414         .attach = ipattach,
1415         .walk = ipwalk,
1416         .stat = ipstat,
1417         .open = ipopen,
1418         .create = devcreate,
1419         .close = ipclose,
1420         .read = ipread,
1421         .bread = ipbread,
1422         .write = ipwrite,
1423         .bwrite = ipbwrite,
1424         .remove = devremove,
1425         .wstat = ipwstat,
1426         .power = devpower,
1427         .chaninfo = ipchaninfo,
1428         .tapfd = iptapfd,
1429 };
1430
1431 int Fsproto(struct Fs *f, struct Proto *p)
1432 {
1433         if (f->np >= Maxproto)
1434                 return -1;
1435
1436         qlock_init(&p->qlock);
1437         p->f = f;
1438
1439         if (p->ipproto > 0) {
1440                 if (f->t2p[p->ipproto] != NULL)
1441                         return -1;
1442                 f->t2p[p->ipproto] = p;
1443         }
1444
1445         p->qid.type = QTDIR;
1446         p->qid.path = QID(f->np, 0, Qprotodir);
1447         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1448         if (p->conv == NULL)
1449                 panic("Fsproto");
1450
1451         p->x = f->np;
1452         p->nextport = 0;
1453         p->nextrport = 600;
1454         f->p[f->np++] = p;
1455
1456         return 0;
1457 }
1458
1459 /*
1460  *  return true if this protocol is
1461  *  built in
1462  */
1463 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1464 {
1465         return f->t2p[proto] != NULL;
1466 }
1467
1468 /*
1469  *  called with protocol locked
1470  */
1471 struct conv *Fsprotoclone(struct Proto *p, char *user)
1472 {
1473         struct conv *c, **pp, **ep;
1474
1475 retry:
1476         c = NULL;
1477         ep = &p->conv[p->nc];
1478         for (pp = p->conv; pp < ep; pp++) {
1479                 c = *pp;
1480                 if (c == NULL) {
1481                         c = kzmalloc(sizeof(struct conv), 0);
1482                         if (c == NULL)
1483                                 error(ENOMEM, ERROR_FIXME);
1484                         qlock_init(&c->qlock);
1485                         qlock_init(&c->listenq);
1486                         rendez_init(&c->cr);
1487                         rendez_init(&c->listenr);
1488                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1489                         SLIST_INIT(&c->listen_taps);
1490                         spinlock_init(&c->tap_lock);
1491                         qlock(&c->qlock);
1492                         c->p = p;
1493                         c->x = pp - p->conv;
1494                         if (p->ptclsize != 0) {
1495                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1496                                 if (c->ptcl == NULL) {
1497                                         kfree(c);
1498                                         error(ENOMEM, ERROR_FIXME);
1499                                 }
1500                         }
1501                         *pp = c;
1502                         p->ac++;
1503                         c->eq = qopen(1024, Qmsg, 0, 0);
1504                         (*p->create) (c);
1505                         assert(c->rq && c->wq);
1506                         break;
1507                 }
1508                 if (canqlock(&c->qlock)) {
1509                         /*
1510                          *  make sure both processes and protocol
1511                          *  are done with this Conv
1512                          */
1513                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1514                                 break;
1515
1516                         qunlock(&c->qlock);
1517                 }
1518         }
1519         if (pp >= ep) {
1520                 if (p->gc != NULL && (*p->gc) (p))
1521                         goto retry;
1522                 return NULL;
1523         }
1524
1525         c->inuse = 1;
1526         kstrdup(&c->owner, user);
1527         c->perm = 0660;
1528         c->state = Idle;
1529         ipmove(c->laddr, IPnoaddr);
1530         ipmove(c->raddr, IPnoaddr);
1531         c->r = NULL;
1532         c->rgen = 0;
1533         c->lport = 0;
1534         c->rport = 0;
1535         c->restricted = 0;
1536         c->ttl = MAXTTL;
1537         c->tos = DFLTTOS;
1538         qreopen(c->rq);
1539         qreopen(c->wq);
1540         qreopen(c->eq);
1541
1542         qunlock(&c->qlock);
1543         return c;
1544 }
1545
1546 int Fsconnected(struct conv *c, char *msg)
1547 {
1548         if (msg != NULL && *msg != '\0')
1549                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1550
1551         switch (c->state) {
1552                 case Announcing:
1553                         c->state = Announced;
1554                         break;
1555
1556                 case Connecting:
1557                         c->state = Connected;
1558                         break;
1559         }
1560
1561         rendez_wakeup(&c->cr);
1562         return 0;
1563 }
1564
1565 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1566 {
1567         if (f->ipmux)
1568                 return f->ipmux;
1569         else
1570                 return f->t2p[proto];
1571 }
1572
1573 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1574 {
1575         return f->t2p[proto];
1576 }
1577
1578 static void fire_listener_taps(struct conv *conv)
1579 {
1580         struct fd_tap *tap_i;
1581         if (SLIST_EMPTY(&conv->listen_taps))
1582                 return;
1583         spin_lock(&conv->tap_lock);
1584         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1585                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1586         spin_unlock(&conv->tap_lock);
1587 }
1588
1589 /*
1590  *  called with protocol locked
1591  */
1592 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1593                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1594 {
1595         struct conv *nc;
1596         struct conv **l;
1597         int i;
1598
1599         qlock(&c->qlock);
1600         i = 0;
1601         for (l = &c->incall; *l; l = &(*l)->next)
1602                 i++;
1603         if (i >= Maxincall) {
1604                 qunlock(&c->qlock);
1605                 return NULL;
1606         }
1607
1608         /* find a free conversation */
1609         nc = Fsprotoclone(c->p, network);
1610         if (nc == NULL) {
1611                 qunlock(&c->qlock);
1612                 return NULL;
1613         }
1614         ipmove(nc->raddr, raddr);
1615         nc->rport = rport;
1616         ipmove(nc->laddr, laddr);
1617         nc->lport = lport;
1618         nc->next = NULL;
1619         *l = nc;
1620         nc->state = Connected;
1621         nc->ipversion = version;
1622
1623         qunlock(&c->qlock);
1624
1625         rendez_wakeup(&c->listenr);
1626         fire_listener_taps(c);
1627
1628         return nc;
1629 }
1630
1631 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1632 {
1633         if (off > strlen(f->ndb))
1634                 error(EIO, ERROR_FIXME);
1635         if (off + n >= sizeof(f->ndb) - 1)
1636                 error(EIO, ERROR_FIXME);
1637         memmove(f->ndb + off, a, n);
1638         f->ndb[off + n] = 0;
1639         f->ndbvers++;
1640         f->ndbmtime = seconds();
1641         return n;
1642 }
1643
1644 uint32_t scalednconv(void)
1645 {
1646         //if(conf.npage*BY2PG >= 128*MB)
1647         return Nchans * 4;
1648         //  return Nchans;
1649 }