0865941c1f4df1e373b08a45b186106e333240cd
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qndb,
55         Qiproute,
56         Qiprouter,
57         Qipselftab,
58         Qlog,
59
60         Qprotodir,      /* directory for a protocol */
61         Qprotobase,
62         Qclone = Qprotobase,
63         Qstats,
64
65         Qconvdir,       /* directory for a conversation */
66         Qconvbase,
67         Qctl = Qconvbase,
68         Qdata,
69         Qerr,
70         Qlisten,
71         Qlocal,
72         Qremote,
73         Qstatus,
74         Qsnoop,
75
76         Logtype = 5,
77         Masktype = (1 << Logtype) - 1,
78         Logconv = 12,
79         Maskconv = (1 << Logconv) - 1,
80         Shiftconv = Logtype,
81         Logproto = 8,
82         Maskproto = (1 << Logproto) - 1,
83         Shiftproto = Logtype + Logconv,
84
85         Nfs = 32,
86 };
87 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
88 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
89 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
90 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
91 static char network[] = "network";
92
93 qlock_t fslock;
94 struct Fs *ipfs[Nfs];                   /* attached fs's */
95 struct queue *qlog;
96
97 extern void nullmediumlink(void);
98 extern void pktmediumlink(void);
99 extern char *eve;
100 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
101 static void closeconv(struct conv *);
102
103 static inline int founddevdir(struct chan *c, struct qid q, char *n,
104                                                           int64_t length, char *user, long perm,
105                                                           struct dir *db)
106 {
107         devdir(c, q, n, length, user, perm, db);
108         return 1;
109 }
110
111 static int topdirgen(struct chan *c, struct dir *dp)
112 {
113         struct qid q;
114         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
115         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
116         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
117 }
118
119
120 static int ip3gen(struct chan *c, int i, struct dir *dp)
121 {
122         struct qid q;
123         struct conv *cv;
124         char *p;
125
126         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
127         if (cv->owner == NULL)
128                 kstrdup(&cv->owner, eve);
129         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
130
131         switch (i) {
132                 default:
133                         return -1;
134                 case Qctl:
135                         return founddevdir(c, q, "ctl", 0,
136                                                    cv->owner, cv->perm, dp);
137                 case Qdata:
138                         return founddevdir(c, q, "data", qlen(cv->rq),
139                                                            cv->owner, cv->perm, dp);
140                 case Qerr:
141                         return founddevdir(c, q, "err", qlen(cv->eq),
142                                                            cv->owner, cv->perm, dp);
143                 case Qlisten:
144                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
145                 case Qlocal:
146                         p = "local";
147                         break;
148                 case Qremote:
149                         p = "remote";
150                         break;
151                 case Qsnoop:
152                         if (strcmp(cv->p->name, "ipifc") != 0)
153                                 return -1;
154                         return founddevdir(c, q, "snoop", qlen(cv->sq),
155                                                            cv->owner, 0400, dp);
156                 case Qstatus:
157                         p = "status";
158                         break;
159         }
160         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
161 }
162
163 static int ip2gen(struct chan *c, int i, struct dir *dp)
164 {
165         struct qid q;
166         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
167         switch (i) {
168                 case Qclone:
169                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
170                 case Qstats:
171                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
172         }
173         return -1;
174 }
175
176 static int ip1gen(struct chan *c, int i, struct dir *dp)
177 {
178         struct qid q;
179         char *p;
180         int prot;
181         int len = 0;
182         struct Fs *f;
183         extern uint32_t kerndate;
184
185         f = ipfs[c->dev];
186
187         prot = 0666;
188         mkqid(&q, QID(0, 0, i), 0, QTFILE);
189         switch (i) {
190                 default:
191                         return -1;
192                 case Qarp:
193                         p = "arp";
194                         break;
195                 case Qndb:
196                         p = "ndb";
197                         len = strlen(f->ndb);
198                         q.vers = f->ndbvers;
199                         break;
200                 case Qiproute:
201                         p = "iproute";
202                         break;
203                 case Qipselftab:
204                         p = "ipselftab";
205                         prot = 0444;
206                         break;
207                 case Qiprouter:
208                         p = "iprouter";
209                         break;
210                 case Qlog:
211                         p = "log";
212                         break;
213         }
214         devdir(c, q, p, len, network, prot, dp);
215         if (i == Qndb && f->ndbmtime > kerndate)
216                 dp->mtime = f->ndbmtime;
217         return 1;
218 }
219
220 static int
221 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
222           int s, struct dir *dp)
223 {
224         struct qid q;
225         struct conv *cv;
226         struct Fs *f;
227
228         f = ipfs[c->dev];
229
230         switch (TYPE(c->qid)) {
231                 case Qtopdir:
232                         if (s == DEVDOTDOT)
233                                 return topdirgen(c, dp);
234                         if (s < f->np) {
235                                 if (f->p[s]->connect == NULL)
236                                         return 0;       /* protocol with no user interface */
237                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
238                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
239                         }
240                         s -= f->np;
241                         return ip1gen(c, s + Qtopbase, dp);
242                 case Qarp:
243                 case Qndb:
244                 case Qlog:
245                 case Qiproute:
246                 case Qiprouter:
247                 case Qipselftab:
248                         return ip1gen(c, TYPE(c->qid), dp);
249                 case Qprotodir:
250                         if (s == DEVDOTDOT)
251                                 return topdirgen(c, dp);
252                         else if (s < f->p[PROTO(c->qid)]->ac) {
253                                 cv = f->p[PROTO(c->qid)]->conv[s];
254                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
255                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
256                                 return
257                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
258                         }
259                         s -= f->p[PROTO(c->qid)]->ac;
260                         return ip2gen(c, s + Qprotobase, dp);
261                 case Qclone:
262                 case Qstats:
263                         return ip2gen(c, TYPE(c->qid), dp);
264                 case Qconvdir:
265                         if (s == DEVDOTDOT) {
266                                 s = PROTO(c->qid);
267                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
268                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
269                                 return 1;
270                         }
271                         return ip3gen(c, s + Qconvbase, dp);
272                 case Qctl:
273                 case Qdata:
274                 case Qerr:
275                 case Qlisten:
276                 case Qlocal:
277                 case Qremote:
278                 case Qstatus:
279                 case Qsnoop:
280                         return ip3gen(c, TYPE(c->qid), dp);
281         }
282         return -1;
283 }
284
285 static void ipinit(void)
286 {
287         qlock_init(&fslock);
288         nullmediumlink();
289         pktmediumlink();
290 /* if only
291         fmtinstall('i', eipfmt);
292         fmtinstall('I', eipfmt);
293         fmtinstall('E', eipfmt);
294         fmtinstall('V', eipfmt);
295         fmtinstall('M', eipfmt);
296 */
297 }
298
299 static void ipreset(void)
300 {
301 }
302
303 static struct Fs *ipgetfs(int dev)
304 {
305         extern void (*ipprotoinit[]) (struct Fs *);
306         struct Fs *f;
307         int i;
308
309         if (dev >= Nfs)
310                 return NULL;
311
312         qlock(&fslock);
313         if (ipfs[dev] == NULL) {
314                 f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
315                 rwinit(&f->rwlock);
316                 qlock_init(&f->iprouter.qlock);
317                 ip_init(f);
318                 arpinit(f);
319                 netloginit(f);
320                 for (i = 0; ipprotoinit[i]; i++)
321                         ipprotoinit[i] (f);
322                 f->dev = dev;
323                 ipfs[dev] = f;
324         }
325         qunlock(&fslock);
326
327         return ipfs[dev];
328 }
329
330 struct IPaux *newipaux(char *owner, char *tag)
331 {
332         struct IPaux *a;
333         int n;
334
335         a = kzmalloc(sizeof(*a), 0);
336         kstrdup(&a->owner, owner);
337         memset(a->tag, ' ', sizeof(a->tag));
338         n = strlen(tag);
339         if (n > sizeof(a->tag))
340                 n = sizeof(a->tag);
341         memmove(a->tag, tag, n);
342         return a;
343 }
344
345 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
346
347 static struct chan *ipattach(char *spec)
348 {
349         struct chan *c;
350         int dev;
351
352         dev = atoi(spec);
353         if (dev >= Nfs)
354                 error(EFAIL, "bad specification");
355
356         ipgetfs(dev);
357         c = devattach(devname(), spec);
358         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
359         c->dev = dev;
360
361         c->aux = newipaux(commonuser(), "none");
362
363         return c;
364 }
365
366 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
367                                                           int nname)
368 {
369         struct IPaux *a = c->aux;
370         struct walkqid *w;
371
372         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
373         if (w != NULL && w->clone != NULL)
374                 w->clone->aux = newipaux(a->owner, a->tag);
375         return w;
376 }
377
378 static int ipstat(struct chan *c, uint8_t * db, int n)
379 {
380         return devstat(c, db, n, NULL, 0, ipgen);
381 }
382
383 static int should_wake(void *arg)
384 {
385         struct conv *cv = arg;
386         /* signal that the conv is closed */
387         if (qisclosed(cv->rq))
388                 return TRUE;
389         return cv->incall != NULL;
390 }
391
392 static struct chan *ipopen(struct chan *c, int omode)
393 {
394         ERRSTACK(2);
395         struct conv *cv, *nc;
396         struct Proto *p;
397         int perm;
398         struct Fs *f;
399
400         /* perm is a lone rwx, not the rwx------ from the conversion */
401         perm = omode_to_rwx(omode) >> 6;
402
403         f = ipfs[c->dev];
404
405         switch (TYPE(c->qid)) {
406                 default:
407                         break;
408                 case Qndb:
409                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
410                                 error(EPERM, ERROR_FIXME);
411                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
412                                 f->ndb[0] = 0;
413                         break;
414                 case Qlog:
415                         netlogopen(f);
416                         break;
417                 case Qiprouter:
418                         iprouteropen(f);
419                         break;
420                 case Qiproute:
421                         break;
422                 case Qtopdir:
423                 case Qprotodir:
424                 case Qconvdir:
425                 case Qstatus:
426                 case Qremote:
427                 case Qlocal:
428                 case Qstats:
429                 case Qipselftab:
430                         if (omode & O_WRITE)
431                                 error(EPERM, ERROR_FIXME);
432                         break;
433                 case Qsnoop:
434                         if (omode & O_WRITE)
435                                 error(EPERM, ERROR_FIXME);
436                         p = f->p[PROTO(c->qid)];
437                         cv = p->conv[CONV(c->qid)];
438                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
439                                 error(EPERM, ERROR_FIXME);
440                         atomic_inc(&cv->snoopers);
441                         break;
442                 case Qclone:
443                         p = f->p[PROTO(c->qid)];
444                         qlock(&p->qlock);
445                         if (waserror()) {
446                                 qunlock(&p->qlock);
447                                 nexterror();
448                         }
449                         cv = Fsprotoclone(p, ATTACHER(c));
450                         qunlock(&p->qlock);
451                         poperror();
452                         if (cv == NULL) {
453                                 error(ENODEV, ERROR_FIXME);
454                                 break;
455                         }
456                         /* we only honor nonblock on a clone */
457                         if (c->flag & O_NONBLOCK)
458                                 Fsconvnonblock(cv, TRUE);
459                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
460                         break;
461                 case Qdata:
462                 case Qctl:
463                 case Qerr:
464                         p = f->p[PROTO(c->qid)];
465                         qlock(&p->qlock);
466                         cv = p->conv[CONV(c->qid)];
467                         qlock(&cv->qlock);
468                         if (waserror()) {
469                                 qunlock(&cv->qlock);
470                                 qunlock(&p->qlock);
471                                 nexterror();
472                         }
473                         if ((perm & (cv->perm >> 6)) != perm) {
474                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
475                                         error(EPERM, ERROR_FIXME);
476                                 if ((perm & cv->perm) != perm)
477                                         error(EPERM, ERROR_FIXME);
478
479                         }
480                         cv->inuse++;
481                         if (cv->inuse == 1) {
482                                 kstrdup(&cv->owner, ATTACHER(c));
483                                 cv->perm = 0660;
484                         }
485                         qunlock(&cv->qlock);
486                         qunlock(&p->qlock);
487                         poperror();
488                         break;
489                 case Qlisten:
490                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
491                         /* No permissions or Announce checks required.  We'll see if that's
492                          * a good idea or not. (the perm check would do nothing, as is,
493                          * since an O_PATH perm is 0).
494                          *
495                          * But we probably want to incref to keep the conversation around
496                          * until this FD/chan is closed.  #ip is a little weird in that
497                          * objects never really go away (high water mark for convs, you can
498                          * always find them in the ns).  I think it is possible to
499                          * namec/ipgen a chan, then have that conv close, then have that
500                          * chan be opened.  You can probably do this with a data file. */
501                         if (omode & O_PATH) {
502                                 qlock(&cv->qlock);
503                                 cv->inuse++;
504                                 qunlock(&cv->qlock);
505                                 break;
506                         }
507                         if ((perm & (cv->perm >> 6)) != perm) {
508                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
509                                         error(EPERM, ERROR_FIXME);
510                                 if ((perm & cv->perm) != perm)
511                                         error(EPERM, ERROR_FIXME);
512
513                         }
514
515                         if (cv->state != Announced)
516                                 error(EFAIL, "not announced");
517
518                         if (waserror()) {
519                                 closeconv(cv);
520                                 nexterror();
521                         }
522                         qlock(&cv->qlock);
523                         cv->inuse++;
524                         qunlock(&cv->qlock);
525
526                         nc = NULL;
527                         while (nc == NULL) {
528                                 /* give up if we got a hangup */
529                                 if (qisclosed(cv->rq))
530                                         error(EFAIL, "listen hungup");
531
532                                 qlock(&cv->listenq);
533                                 if (waserror()) {
534                                         qunlock(&cv->listenq);
535                                         nexterror();
536                                 }
537                                 /* we can peek at incall without grabbing the cv qlock.  if
538                                  * anything is there, it'll remain there until we dequeue it.
539                                  * no one else can, since we hold the listenq lock */
540                                 if (cv->nonblock && !cv->incall)
541                                         error(EAGAIN, "listen queue empty");
542                                 /* wait for a connect */
543                                 rendez_sleep(&cv->listenr, should_wake, cv);
544
545                                 /* if there is a concurrent hangup, they will hold the qlock
546                                  * until the hangup is complete, including closing the cv->rq */
547                                 qlock(&cv->qlock);
548                                 nc = cv->incall;
549                                 if (nc != NULL) {
550                                         cv->incall = nc->next;
551                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
552                                         kstrdup(&cv->owner, ATTACHER(c));
553                                         /* O_NONBLOCK/CNONBLOCK when opening listen means the *new*
554                                          * conv is already non-blocking, like accept4() in Linux */
555                                         if (c->flag & O_NONBLOCK)
556                                                 Fsconvnonblock(nc, TRUE);
557                                 }
558                                 qunlock(&cv->qlock);
559
560                                 qunlock(&cv->listenq);
561                                 poperror();
562                         }
563                         closeconv(cv);
564                         poperror();
565                         break;
566         }
567         c->mode = openmode(omode);
568         c->flag |= COPEN;
569         c->offset = 0;
570         return c;
571 }
572
573 static int ipwstat(struct chan *c, uint8_t * dp, int n)
574 {
575         ERRSTACK(2);
576         struct dir *d;
577         struct conv *cv;
578         struct Fs *f;
579         struct Proto *p;
580
581         f = ipfs[c->dev];
582         switch (TYPE(c->qid)) {
583                 default:
584                         error(EPERM, ERROR_FIXME);
585                         break;
586                 case Qctl:
587                 case Qdata:
588                         break;
589         }
590
591         d = kzmalloc(sizeof(*d) + n, 0);
592         if (waserror()) {
593                 kfree(d);
594                 nexterror();
595         }
596         n = convM2D(dp, n, d, (char *)&d[1]);
597         if (n == 0)
598                 error(ENODATA, ERROR_FIXME);
599         p = f->p[PROTO(c->qid)];
600         cv = p->conv[CONV(c->qid)];
601         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
602                 error(EPERM, ERROR_FIXME);
603         if (!emptystr(d->uid))
604                 kstrdup(&cv->owner, d->uid);
605         if (d->mode != ~0UL)
606                 cv->perm = d->mode & 0777;
607         poperror();
608         kfree(d);
609         return n;
610 }
611
612 /* Should be able to handle any file type chan. Feel free to extend it. */
613 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
614 {
615         struct conv *conv;
616         struct Proto *proto;
617         char *p;
618         struct Fs *f;
619
620         f = ipfs[ch->dev];
621
622         switch (TYPE(ch->qid)) {
623                 default:
624                         ret = "Unknown type";
625                         break;
626                 case Qdata:
627                         proto = f->p[PROTO(ch->qid)];
628                         conv = proto->conv[CONV(ch->qid)];
629                         snprintf(ret, ret_l, "Qdata, %s %s proto %s, conv idx %d",
630                                  conv->nonblock ? "nonblock" : "block",
631                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
632                                  proto->name, conv->x);
633                         break;
634                 case Qarp:
635                         ret = "Qarp";
636                         break;
637                 case Qiproute:
638                         ret = "Qiproute";
639                         break;
640                 case Qlisten:
641                         proto = f->p[PROTO(ch->qid)];
642                         conv = proto->conv[CONV(ch->qid)];
643                         snprintf(ret, ret_l, "Qlisten, %s %s proto %s, conv idx %d",
644                                  conv->nonblock ? "nonblock" : "block",
645                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
646                                  proto->name, conv->x);
647                         break;
648                 case Qlog:
649                         ret = "Qlog";
650                         break;
651                 case Qndb:
652                         ret = "Qndb";
653                         break;
654                 case Qctl:
655                         proto = f->p[PROTO(ch->qid)];
656                         conv = proto->conv[CONV(ch->qid)];
657                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
658                                          conv->x);
659                         break;
660         }
661         return ret;
662 }
663
664 static void closeconv(struct conv *cv)
665 {
666         struct conv *nc;
667         struct Ipmulti *mp;
668
669         qlock(&cv->qlock);
670
671         if (--cv->inuse > 0) {
672                 qunlock(&cv->qlock);
673                 return;
674         }
675
676         /* close all incoming calls since no listen will ever happen */
677         for (nc = cv->incall; nc; nc = cv->incall) {
678                 cv->incall = nc->next;
679                 closeconv(nc);
680         }
681         cv->incall = NULL;
682
683         kstrdup(&cv->owner, network);
684         cv->perm = 0660;
685
686         while ((mp = cv->multi) != NULL)
687                 ipifcremmulti(cv, mp->ma, mp->ia);
688
689         cv->r = NULL;
690         cv->rgen = 0;
691         cv->p->close(cv);
692         cv->state = Idle;
693         qunlock(&cv->qlock);
694 }
695
696 static void ipclose(struct chan *c)
697 {
698         struct Fs *f;
699
700         f = ipfs[c->dev];
701         switch (TYPE(c->qid)) {
702                 default:
703                         break;
704                 case Qlog:
705                         if (c->flag & COPEN)
706                                 netlogclose(f);
707                         break;
708                 case Qiprouter:
709                         if (c->flag & COPEN)
710                                 iprouterclose(f);
711                         break;
712                 case Qdata:
713                 case Qctl:
714                 case Qerr:
715                 case Qlisten:
716                         if (c->flag & COPEN)
717                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
718                         break;
719                 case Qsnoop:
720                         if (c->flag & COPEN)
721                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
722                         break;
723         }
724         kfree(((struct IPaux *)c->aux)->owner);
725         kfree(c->aux);
726 }
727
728 enum {
729         Statelen = 32 * 1024,
730 };
731
732 static long ipread(struct chan *ch, void *a, long n, int64_t off)
733 {
734         struct conv *c;
735         struct Proto *x;
736         char *buf, *p;
737         long rv;
738         struct Fs *f;
739         uint32_t offset = off;
740         size_t sofar;
741
742         f = ipfs[ch->dev];
743
744         p = a;
745         switch (TYPE(ch->qid)) {
746                 default:
747                         error(EPERM, ERROR_FIXME);
748                 case Qtopdir:
749                 case Qprotodir:
750                 case Qconvdir:
751                         return devdirread(ch, a, n, 0, 0, ipgen);
752                 case Qarp:
753                         return arpread(f->arp, a, offset, n);
754                 case Qndb:
755                         return readstr(offset, a, n, f->ndb);
756                 case Qiproute:
757                         return routeread(f, a, offset, n);
758                 case Qiprouter:
759                         return iprouterread(f, a, n);
760                 case Qipselftab:
761                         return ipselftabread(f, a, offset, n);
762                 case Qlog:
763                         return netlogread(f, a, offset, n);
764                 case Qctl:
765                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
766                         return readstr(offset, p, n, get_cur_genbuf());
767                 case Qremote:
768                         buf = kzmalloc(Statelen, 0);
769                         x = f->p[PROTO(ch->qid)];
770                         c = x->conv[CONV(ch->qid)];
771                         if (x->remote == NULL) {
772                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
773                         } else {
774                                 (*x->remote) (c, buf, Statelen - 2);
775                         }
776                         rv = readstr(offset, p, n, buf);
777                         kfree(buf);
778                         return rv;
779                 case Qlocal:
780                         buf = kzmalloc(Statelen, 0);
781                         x = f->p[PROTO(ch->qid)];
782                         c = x->conv[CONV(ch->qid)];
783                         if (x->local == NULL) {
784                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
785                         } else {
786                                 (*x->local) (c, buf, Statelen - 2);
787                         }
788                         rv = readstr(offset, p, n, buf);
789                         kfree(buf);
790                         return rv;
791                 case Qstatus:
792                         /* this all is a bit screwed up since the size of some state's
793                          * buffers will change from one invocation to another.  a reader
794                          * will come in and read the entire buffer.  then it will come again
795                          * and read from the next offset, expecting EOF.  if the buffer
796                          * changed sizes, it'll reprint the end of the buffer slightly. */
797                         buf = kzmalloc(Statelen, 0);
798                         x = f->p[PROTO(ch->qid)];
799                         c = x->conv[CONV(ch->qid)];
800                         sofar = (*x->state) (c, buf, Statelen - 2);
801                         sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
802                                           c->nonblock ? "on" : "off");
803                         rv = readstr(offset, p, n, buf);
804                         kfree(buf);
805                         return rv;
806                 case Qdata:
807                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
808                         return qread(c->rq, a, n);
809                 case Qerr:
810                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
811                         return qread(c->eq, a, n);
812                 case Qsnoop:
813                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
814                         return qread(c->sq, a, n);
815                 case Qstats:
816                         x = f->p[PROTO(ch->qid)];
817                         if (x->stats == NULL)
818                                 error(EFAIL, "stats not implemented");
819                         buf = kzmalloc(Statelen, 0);
820                         (*x->stats) (x, buf, Statelen);
821                         rv = readstr(offset, p, n, buf);
822                         kfree(buf);
823                         return rv;
824         }
825 }
826
827 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
828 {
829         struct conv *c;
830         struct Proto *x;
831         struct Fs *f;
832
833         switch (TYPE(ch->qid)) {
834                 case Qdata:
835                         f = ipfs[ch->dev];
836                         x = f->p[PROTO(ch->qid)];
837                         c = x->conv[CONV(ch->qid)];
838                         return qbread(c->rq, n);
839                 default:
840                         return devbread(ch, n, offset);
841         }
842 }
843
844 /*
845  *  set local address to be that of the ifc closest to remote address
846  */
847 static void setladdr(struct conv *c)
848 {
849         findlocalip(c->p->f, c->laddr, c->raddr);
850 }
851
852 /*
853  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
854  */
855 static void setluniqueport(struct conv *c, int lport)
856 {
857         struct Proto *p;
858         struct conv *xp;
859         int x;
860
861         p = c->p;
862
863         qlock(&p->qlock);
864         for (x = 0; x < p->nc; x++) {
865                 xp = p->conv[x];
866                 if (xp == NULL)
867                         break;
868                 if (xp == c)
869                         continue;
870                 if ((xp->state == Connected || xp->state == Announced)
871                         && xp->lport == lport
872                         && xp->rport == c->rport
873                         && ipcmp(xp->raddr, c->raddr) == 0
874                         && ipcmp(xp->laddr, c->laddr) == 0) {
875                         qunlock(&p->qlock);
876                         error(EFAIL, "address in use");
877                 }
878         }
879         c->lport = lport;
880         qunlock(&p->qlock);
881 }
882
883 /*
884  *  pick a local port and set it
885  */
886 static void setlport(struct conv *c)
887 {
888         struct Proto *p;
889         uint16_t *pp;
890         int x, found;
891
892         p = c->p;
893         if (c->restricted)
894                 pp = &p->nextrport;
895         else
896                 pp = &p->nextport;
897         qlock(&p->qlock);
898         for (;; (*pp)++) {
899                 /*
900                  * Fsproto initialises p->nextport to 0 and the restricted
901                  * ports (p->nextrport) to 600.
902                  * Restricted ports must lie between 600 and 1024.
903                  * For the initial condition or if the unrestricted port number
904                  * has wrapped round, select a random port between 5000 and 1<<15
905                  * to start at.
906                  */
907                 if (c->restricted) {
908                         if (*pp >= 1024)
909                                 *pp = 600;
910                 } else
911                         while (*pp < 5000)
912                                 urandom_read(pp, sizeof(*pp));
913
914                 found = 0;
915                 for (x = 0; x < p->nc; x++) {
916                         if (p->conv[x] == NULL)
917                                 break;
918                         if (p->conv[x]->lport == *pp) {
919                                 found = 1;
920                                 break;
921                         }
922                 }
923                 if (!found)
924                         break;
925         }
926         c->lport = (*pp)++;
927         qunlock(&p->qlock);
928 }
929
930 /*
931  *  set a local address and port from a string of the form
932  *      [address!]port[!r]
933  */
934 static void setladdrport(struct conv *c, char *str, int announcing)
935 {
936         char *p;
937         uint16_t lport;
938         uint8_t addr[IPaddrlen];
939
940         /*
941          *  ignore restricted part if it exists.  it's
942          *  meaningless on local ports.
943          */
944         p = strchr(str, '!');
945         if (p != NULL) {
946                 *p++ = 0;
947                 if (strcmp(p, "r") == 0)
948                         p = NULL;
949         }
950
951         c->lport = 0;
952         if (p == NULL) {
953                 if (announcing)
954                         ipmove(c->laddr, IPnoaddr);
955                 else
956                         setladdr(c);
957                 p = str;
958         } else {
959                 if (strcmp(str, "*") == 0)
960                         ipmove(c->laddr, IPnoaddr);
961                 else {
962                         parseip(addr, str);
963                         if (ipforme(c->p->f, addr))
964                                 ipmove(c->laddr, addr);
965                         else
966                                 error(EFAIL, "not a local IP address");
967                 }
968         }
969
970         /* one process can get all connections */
971         if (announcing && strcmp(p, "*") == 0) {
972                 if (!iseve())
973                         error(EPERM, ERROR_FIXME);
974                 setluniqueport(c, 0);
975         }
976
977         lport = atoi(p);
978         if (lport <= 0)
979                 setlport(c);
980         else
981                 setluniqueport(c, lport);
982 }
983
984 static void setraddrport(struct conv *c, char *str)
985 {
986         char *p;
987
988         p = strchr(str, '!');
989         if (p == NULL)
990                 error(EFAIL, "malformed address");
991         *p++ = 0;
992         parseip(c->raddr, str);
993         c->rport = atoi(p);
994         p = strchr(p, '!');
995         if (p) {
996                 if (strstr(p, "!r") != NULL)
997                         c->restricted = 1;
998         }
999 }
1000
1001 /*
1002  *  called by protocol connect routine to set addresses
1003  */
1004 void Fsstdconnect(struct conv *c, char *argv[], int argc)
1005 {
1006         switch (argc) {
1007                 default:
1008                         error(EINVAL, "bad args to %s", __func__);
1009                 case 2:
1010                         setraddrport(c, argv[1]);
1011                         setladdr(c);
1012                         setlport(c);
1013                         break;
1014                 case 3:
1015                         setraddrport(c, argv[1]);
1016                         setladdrport(c, argv[2], 0);
1017                         break;
1018         }
1019
1020         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1021                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1022                 || ipcmp(c->raddr, IPnoaddr) == 0)
1023                 c->ipversion = V4;
1024         else
1025                 c->ipversion = V6;
1026 }
1027
1028 /*
1029  *  initiate connection and sleep till its set up
1030  */
1031 static int connected(void *a)
1032 {
1033         return ((struct conv *)a)->state == Connected;
1034 }
1035
1036 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1037 {
1038         ERRSTACK(1);
1039         char *p;
1040
1041         if (c->state != 0)
1042                 error(EBUSY, ERROR_FIXME);
1043         c->state = Connecting;
1044         c->cerr[0] = '\0';
1045         if (x->connect == NULL)
1046                 error(EFAIL, "connect not supported");
1047         x->connect(c, cb->f, cb->nf);
1048
1049         qunlock(&c->qlock);
1050         if (waserror()) {
1051                 qlock(&c->qlock);
1052                 nexterror();
1053         }
1054         rendez_sleep(&c->cr, connected, c);
1055         qlock(&c->qlock);
1056         poperror();
1057
1058         if (c->cerr[0] != '\0')
1059                 error(EFAIL, c->cerr);
1060 }
1061
1062 /*
1063  *  called by protocol announce routine to set addresses
1064  */
1065 void Fsstdannounce(struct conv *c, char *argv[], int argc)
1066 {
1067         memset(c->raddr, 0, sizeof(c->raddr));
1068         c->rport = 0;
1069         switch (argc) {
1070                 default:
1071                         error(EINVAL, "bad args to announce");
1072                 case 2:
1073                         setladdrport(c, argv[1], 1);
1074                         break;
1075         }
1076 }
1077
1078 /*
1079  *  initiate announcement and sleep till its set up
1080  */
1081 static int announced(void *a)
1082 {
1083         return ((struct conv *)a)->state == Announced;
1084 }
1085
1086 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1087 {
1088         ERRSTACK(1);
1089         char *p;
1090
1091         if (c->state != 0)
1092                 error(EBUSY, ERROR_FIXME);
1093         c->state = Announcing;
1094         c->cerr[0] = '\0';
1095         if (x->announce == NULL)
1096                 error(EFAIL, "announce not supported");
1097         x->announce(c, cb->f, cb->nf);
1098
1099         qunlock(&c->qlock);
1100         if (waserror()) {
1101                 qlock(&c->qlock);
1102                 nexterror();
1103         }
1104         rendez_sleep(&c->cr, announced, c);
1105         qlock(&c->qlock);
1106         poperror();
1107
1108         if (c->cerr[0] != '\0')
1109                 error(EFAIL, c->cerr);
1110 }
1111
1112 /*
1113  *  called by protocol bind routine to set addresses
1114  */
1115 void Fsstdbind(struct conv *c, char *argv[], int argc)
1116 {
1117         switch (argc) {
1118                 default:
1119                         error(EINVAL, "bad args to bind");
1120                 case 2:
1121                         setladdrport(c, argv[1], 0);
1122                         break;
1123         }
1124 }
1125
1126 void Fsconvnonblock(struct conv *cv, bool onoff)
1127 {
1128         qnonblock(cv->wq, onoff);
1129         qnonblock(cv->rq, onoff);
1130         cv->nonblock = onoff;
1131 }
1132
1133 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1134 {
1135         if (x->bind == NULL)
1136                 Fsstdbind(c, cb->f, cb->nf);
1137         else
1138                 x->bind(c, cb->f, cb->nf);
1139 }
1140
1141 static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
1142 {
1143         if (cb->nf < 2)
1144                 goto err;
1145         if (!strcmp(cb->f[1], "on"))
1146                 Fsconvnonblock(c, TRUE);
1147         else if (!strcmp(cb->f[1], "off"))
1148                 Fsconvnonblock(c, FALSE);
1149         else
1150                 goto err;
1151         return;
1152 err:
1153         error(EINVAL, "nonblock [on|off]");
1154 }
1155
1156 static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
1157 {
1158         if (cb->nf < 2)
1159                 goto err;
1160         if (!strcmp(cb->f[1], "rd")) {
1161                 qhangup(cv->rq, "shutdown");
1162                 if (cv->p->shutdown)
1163                         cv->p->shutdown(cv, SHUT_RD);
1164         } else if (!strcmp(cb->f[1], "wr")) {
1165                 qhangup(cv->wq, "shutdown");
1166                 if (cv->p->shutdown)
1167                         cv->p->shutdown(cv, SHUT_WR);
1168         } else if (!strcmp(cb->f[1], "rdwr")) {
1169                 qhangup(cv->rq, "shutdown");
1170                 qhangup(cv->wq, "shutdown");
1171                 if (cv->p->shutdown)
1172                         cv->p->shutdown(cv, SHUT_RDWR);
1173         } else {
1174                 goto err;
1175         }
1176         return;
1177 err:
1178         error(EINVAL, "shutdown [rx|tx|rxtx]");
1179 }
1180
1181 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1182 {
1183         if (cb->nf < 2)
1184                 c->tos = 0;
1185         else
1186                 c->tos = atoi(cb->f[1]);
1187 }
1188
1189 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1190 {
1191         if (cb->nf < 2)
1192                 c->ttl = MAXTTL;
1193         else
1194                 c->ttl = atoi(cb->f[1]);
1195 }
1196
1197 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1198 {
1199         ERRSTACK(1);
1200         struct conv *c;
1201         struct Proto *x;
1202         char *p;
1203         struct cmdbuf *cb;
1204         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1205         struct Fs *f;
1206         char *a;
1207
1208         a = v;
1209         f = ipfs[ch->dev];
1210
1211         switch (TYPE(ch->qid)) {
1212                 default:
1213                         error(EPERM, ERROR_FIXME);
1214                 case Qdata:
1215                         x = f->p[PROTO(ch->qid)];
1216                         c = x->conv[CONV(ch->qid)];
1217                         qwrite(c->wq, a, n);
1218                         break;
1219                 case Qarp:
1220                         return arpwrite(f, a, n);
1221                 case Qiproute:
1222                         return routewrite(f, ch, a, n);
1223                 case Qlog:
1224                         netlogctl(f, a, n);
1225                         return n;
1226                 case Qndb:
1227                         return ndbwrite(f, a, off, n);
1228                 case Qctl:
1229                         x = f->p[PROTO(ch->qid)];
1230                         c = x->conv[CONV(ch->qid)];
1231                         cb = parsecmd(a, n);
1232
1233                         qlock(&c->qlock);
1234                         if (waserror()) {
1235                                 qunlock(&c->qlock);
1236                                 kfree(cb);
1237                                 nexterror();
1238                         }
1239                         if (cb->nf < 1)
1240                                 error(EFAIL, "short control request");
1241                         if (strcmp(cb->f[0], "connect") == 0)
1242                                 connectctlmsg(x, c, cb);
1243                         else if (strcmp(cb->f[0], "announce") == 0)
1244                                 announcectlmsg(x, c, cb);
1245                         else if (strcmp(cb->f[0], "bind") == 0)
1246                                 bindctlmsg(x, c, cb);
1247                         else if (strcmp(cb->f[0], "nonblock") == 0)
1248                                 nonblockctlmsg(c, cb);
1249                         else if (strcmp(cb->f[0], "shutdown") == 0)
1250                                 shutdownctlmsg(c, cb);
1251                         else if (strcmp(cb->f[0], "ttl") == 0)
1252                                 ttlctlmsg(c, cb);
1253                         else if (strcmp(cb->f[0], "tos") == 0)
1254                                 tosctlmsg(c, cb);
1255                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1256                                 c->ignoreadvice = 1;
1257                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1258                                 if (cb->nf < 2)
1259                                         error(EFAIL, "addmulti needs interface address");
1260                                 if (cb->nf == 2) {
1261                                         if (!ipismulticast(c->raddr))
1262                                                 error(EFAIL, "addmulti for a non multicast address");
1263                                         parseip(ia, cb->f[1]);
1264                                         ipifcaddmulti(c, c->raddr, ia);
1265                                 } else {
1266                                         parseip(ma, cb->f[2]);
1267                                         if (!ipismulticast(ma))
1268                                                 error(EFAIL, "addmulti for a non multicast address");
1269                                         parseip(ia, cb->f[1]);
1270                                         ipifcaddmulti(c, ma, ia);
1271                                 }
1272                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1273                                 if (cb->nf < 2)
1274                                         error(EFAIL, "remmulti needs interface address");
1275                                 if (!ipismulticast(c->raddr))
1276                                         error(EFAIL, "remmulti for a non multicast address");
1277                                 parseip(ia, cb->f[1]);
1278                                 ipifcremmulti(c, c->raddr, ia);
1279                         } else if (x->ctl != NULL) {
1280                                 x->ctl(c, cb->f, cb->nf);
1281                         } else
1282                                 error(EFAIL, "unknown control request");
1283                         qunlock(&c->qlock);
1284                         kfree(cb);
1285                         poperror();
1286         }
1287         return n;
1288 }
1289
1290 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1291 {
1292         struct conv *c;
1293         struct Proto *x;
1294         struct Fs *f;
1295         int n;
1296
1297         switch (TYPE(ch->qid)) {
1298                 case Qdata:
1299                         f = ipfs[ch->dev];
1300                         x = f->p[PROTO(ch->qid)];
1301                         c = x->conv[CONV(ch->qid)];
1302                         if (bp->next)
1303                                 bp = concatblock(bp);
1304                         n = BLEN(bp);
1305                         qbwrite(c->wq, bp);
1306                         return n;
1307                 default:
1308                         return devbwrite(ch, bp, offset);
1309         }
1310 }
1311
1312 static void ip_wake_cb(struct queue *q, void *data, int filter)
1313 {
1314         struct conv *conv = (struct conv*)data;
1315         struct fd_tap *tap_i;
1316         /* For these two, we want to ignore events on the opposite end of the
1317          * queues.  For instance, we want to know when the WQ is writable.  Our
1318          * writes will actually make it readable - we don't want to trigger a tap
1319          * for that.  However, qio doesn't know how/why we are using a queue, or
1320          * even who the ends are (hence the callbacks) */
1321         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1322                 return;
1323         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1324                 return;
1325         /* At this point, we have an event we want to send to our taps (if any).
1326          * The lock protects list integrity and the existence of the tap.
1327          *
1328          * Previously, I thought of using the conv qlock.  That actually breaks, due
1329          * to weird usages of the qlock (someone holds it for a long time, blocking
1330          * the inbound wakeup from etherread4).
1331          *
1332          * I opted for a spinlock for a couple reasons:
1333          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1334          * send_event).
1335          * - our callers might not want to block.  A lot of network wakeups will
1336          * come network processes (etherread4) or otherwise unrelated to this
1337          * particular conversation.  I'd rather do something like fire off a KMSG
1338          * than block those.
1339          * - if fire_tap takes a while, holding the lock only slows down other
1340          * events on this *same* conversation, or other tap registration.  not a
1341          * huge deal. */
1342         spin_lock(&conv->tap_lock);
1343         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1344                 fire_tap(tap_i, filter);
1345         spin_unlock(&conv->tap_lock);
1346 }
1347
1348 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1349 {
1350         struct conv *conv;
1351         struct Proto *x;
1352         struct Fs *f;
1353         int ret;
1354
1355         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1356                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1357                                        FDTAP_FILT_ERROR)
1358         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1359
1360         /* That's a lot of pointers to get to the conv! */
1361         f = ipfs[chan->dev];
1362         x = f->p[PROTO(chan->qid)];
1363         conv = x->conv[CONV(chan->qid)];
1364
1365         switch (TYPE(chan->qid)) {
1366                 case Qdata:
1367                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1368                                 set_errno(ENOSYS);
1369                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1370                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1371                                 return -1;
1372                         }
1373                         spin_lock(&conv->tap_lock);
1374                         switch (cmd) {
1375                                 case (FDTAP_CMD_ADD):
1376                                         if (SLIST_EMPTY(&conv->data_taps)) {
1377                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1378                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1379                                         }
1380                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1381                                         ret = 0;
1382                                         break;
1383                                 case (FDTAP_CMD_REM):
1384                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1385                                         if (SLIST_EMPTY(&conv->data_taps)) {
1386                                                 qio_set_wake_cb(conv->rq, 0, conv);
1387                                                 qio_set_wake_cb(conv->wq, 0, conv);
1388                                         }
1389                                         ret = 0;
1390                                         break;
1391                                 default:
1392                                         set_errno(ENOSYS);
1393                                         set_errstr("Unsupported #%s data tap command %p",
1394                                                    devname(), cmd);
1395                                         ret = -1;
1396                         }
1397                         spin_unlock(&conv->tap_lock);
1398                         return ret;
1399                 case Qlisten:
1400                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1401                                 set_errno(ENOSYS);
1402                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1403                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1404                                 return -1;
1405                         }
1406                         spin_lock(&conv->tap_lock);
1407                         switch (cmd) {
1408                                 case (FDTAP_CMD_ADD):
1409                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1410                                         ret = 0;
1411                                         break;
1412                                 case (FDTAP_CMD_REM):
1413                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1414                                         ret = 0;
1415                                         break;
1416                                 default:
1417                                         set_errno(ENOSYS);
1418                                         set_errstr("Unsupported #%s listen tap command %p",
1419                                                    devname(), cmd);
1420                                         ret = -1;
1421                         }
1422                         spin_unlock(&conv->tap_lock);
1423                         return ret;
1424                 default:
1425                         set_errno(ENOSYS);
1426                         set_errstr("Can't tap #%s file type %d", devname(),
1427                                    TYPE(chan->qid));
1428                         return -1;
1429         }
1430 }
1431
1432 struct dev ipdevtab __devtab = {
1433         .name = "ip",
1434
1435         .reset = ipreset,
1436         .init = ipinit,
1437         .shutdown = devshutdown,
1438         .attach = ipattach,
1439         .walk = ipwalk,
1440         .stat = ipstat,
1441         .open = ipopen,
1442         .create = devcreate,
1443         .close = ipclose,
1444         .read = ipread,
1445         .bread = ipbread,
1446         .write = ipwrite,
1447         .bwrite = ipbwrite,
1448         .remove = devremove,
1449         .wstat = ipwstat,
1450         .power = devpower,
1451         .chaninfo = ipchaninfo,
1452         .tapfd = iptapfd,
1453 };
1454
1455 int Fsproto(struct Fs *f, struct Proto *p)
1456 {
1457         if (f->np >= Maxproto)
1458                 return -1;
1459
1460         qlock_init(&p->qlock);
1461         p->f = f;
1462
1463         if (p->ipproto > 0) {
1464                 if (f->t2p[p->ipproto] != NULL)
1465                         return -1;
1466                 f->t2p[p->ipproto] = p;
1467         }
1468
1469         p->qid.type = QTDIR;
1470         p->qid.path = QID(f->np, 0, Qprotodir);
1471         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1472         if (p->conv == NULL)
1473                 panic("Fsproto");
1474
1475         p->x = f->np;
1476         p->nextport = 0;
1477         p->nextrport = 600;
1478         f->p[f->np++] = p;
1479
1480         return 0;
1481 }
1482
1483 /*
1484  *  return true if this protocol is
1485  *  built in
1486  */
1487 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1488 {
1489         return f->t2p[proto] != NULL;
1490 }
1491
1492 /*
1493  *  called with protocol locked
1494  */
1495 struct conv *Fsprotoclone(struct Proto *p, char *user)
1496 {
1497         struct conv *c, **pp, **ep;
1498
1499 retry:
1500         c = NULL;
1501         ep = &p->conv[p->nc];
1502         for (pp = p->conv; pp < ep; pp++) {
1503                 c = *pp;
1504                 if (c == NULL) {
1505                         c = kzmalloc(sizeof(struct conv), 0);
1506                         if (c == NULL)
1507                                 error(ENOMEM, ERROR_FIXME);
1508                         qlock_init(&c->qlock);
1509                         qlock_init(&c->listenq);
1510                         rendez_init(&c->cr);
1511                         rendez_init(&c->listenr);
1512                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1513                         SLIST_INIT(&c->listen_taps);
1514                         spinlock_init(&c->tap_lock);
1515                         qlock(&c->qlock);
1516                         c->p = p;
1517                         c->x = pp - p->conv;
1518                         if (p->ptclsize != 0) {
1519                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1520                                 if (c->ptcl == NULL) {
1521                                         kfree(c);
1522                                         error(ENOMEM, ERROR_FIXME);
1523                                 }
1524                         }
1525                         *pp = c;
1526                         p->ac++;
1527                         c->eq = qopen(1024, Qmsg, 0, 0);
1528                         (*p->create) (c);
1529                         assert(c->rq && c->wq);
1530                         break;
1531                 }
1532                 if (canqlock(&c->qlock)) {
1533                         /*
1534                          *  make sure both processes and protocol
1535                          *  are done with this Conv
1536                          */
1537                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1538                                 break;
1539
1540                         qunlock(&c->qlock);
1541                 }
1542         }
1543         if (pp >= ep) {
1544                 if (p->gc != NULL && (*p->gc) (p))
1545                         goto retry;
1546                 return NULL;
1547         }
1548
1549         c->inuse = 1;
1550         kstrdup(&c->owner, user);
1551         c->perm = 0660;
1552         c->state = Idle;
1553         ipmove(c->laddr, IPnoaddr);
1554         ipmove(c->raddr, IPnoaddr);
1555         c->r = NULL;
1556         c->rgen = 0;
1557         c->lport = 0;
1558         c->rport = 0;
1559         c->restricted = 0;
1560         c->ttl = MAXTTL;
1561         c->tos = DFLTTOS;
1562         c->nonblock = FALSE;
1563         qreopen(c->rq);
1564         qreopen(c->wq);
1565         qreopen(c->eq);
1566
1567         qunlock(&c->qlock);
1568         return c;
1569 }
1570
1571 int Fsconnected(struct conv *c, char *msg)
1572 {
1573         if (msg != NULL && *msg != '\0')
1574                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1575
1576         switch (c->state) {
1577                 case Announcing:
1578                         c->state = Announced;
1579                         break;
1580
1581                 case Connecting:
1582                         c->state = Connected;
1583                         break;
1584         }
1585
1586         rendez_wakeup(&c->cr);
1587         return 0;
1588 }
1589
1590 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1591 {
1592         if (f->ipmux)
1593                 return f->ipmux;
1594         else
1595                 return f->t2p[proto];
1596 }
1597
1598 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1599 {
1600         return f->t2p[proto];
1601 }
1602
1603 static void fire_listener_taps(struct conv *conv)
1604 {
1605         struct fd_tap *tap_i;
1606         if (SLIST_EMPTY(&conv->listen_taps))
1607                 return;
1608         spin_lock(&conv->tap_lock);
1609         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1610                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1611         spin_unlock(&conv->tap_lock);
1612 }
1613
1614 /*
1615  *  called with protocol locked
1616  */
1617 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1618                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1619 {
1620         struct conv *nc;
1621         struct conv **l;
1622         int i;
1623
1624         qlock(&c->qlock);
1625         i = 0;
1626         for (l = &c->incall; *l; l = &(*l)->next)
1627                 i++;
1628         if (i >= Maxincall) {
1629                 qunlock(&c->qlock);
1630                 return NULL;
1631         }
1632
1633         /* find a free conversation */
1634         nc = Fsprotoclone(c->p, network);
1635         if (nc == NULL) {
1636                 qunlock(&c->qlock);
1637                 return NULL;
1638         }
1639         ipmove(nc->raddr, raddr);
1640         nc->rport = rport;
1641         ipmove(nc->laddr, laddr);
1642         nc->lport = lport;
1643         nc->next = NULL;
1644         *l = nc;
1645         nc->state = Connected;
1646         nc->ipversion = version;
1647
1648         qunlock(&c->qlock);
1649
1650         rendez_wakeup(&c->listenr);
1651         fire_listener_taps(c);
1652
1653         return nc;
1654 }
1655
1656 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1657 {
1658         if (off > strlen(f->ndb))
1659                 error(EIO, ERROR_FIXME);
1660         if (off + n >= sizeof(f->ndb) - 1)
1661                 error(EIO, ERROR_FIXME);
1662         memmove(f->ndb + off, a, n);
1663         f->ndb[off + n] = 0;
1664         f->ndbvers++;
1665         f->ndbmtime = seconds();
1666         return n;
1667 }
1668
1669 uint32_t scalednconv(void)
1670 {
1671         //if(conf.npage*BY2PG >= 128*MB)
1672         return Nchans * 4;
1673         //  return Nchans;
1674 }