net: Use chan flag O_NONBLOCK for nonblocking
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qndb,
55         Qiproute,
56         Qiprouter,
57         Qipselftab,
58         Qlog,
59
60         Qprotodir,      /* directory for a protocol */
61         Qprotobase,
62         Qclone = Qprotobase,
63         Qstats,
64
65         Qconvdir,       /* directory for a conversation */
66         Qconvbase,
67         Qctl = Qconvbase,
68         Qdata,
69         Qerr,
70         Qlisten,
71         Qlocal,
72         Qremote,
73         Qstatus,
74         Qsnoop,
75
76         Logtype = 5,
77         Masktype = (1 << Logtype) - 1,
78         Logconv = 12,
79         Maskconv = (1 << Logconv) - 1,
80         Shiftconv = Logtype,
81         Logproto = 8,
82         Maskproto = (1 << Logproto) - 1,
83         Shiftproto = Logtype + Logconv,
84
85         Nfs = 32,
86 };
87 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
88 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
89 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
90 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
91 static char network[] = "network";
92
93 qlock_t fslock;
94 struct Fs *ipfs[Nfs];                   /* attached fs's */
95 struct queue *qlog;
96
97 extern void nullmediumlink(void);
98 extern void pktmediumlink(void);
99 extern char *eve;
100 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
101 static void closeconv(struct conv *);
102
103 static struct conv *chan2conv(struct chan *chan)
104 {
105         /* That's a lot of pointers to get to the conv! */
106         return ipfs[chan->dev]->p[PROTO(chan->qid)]->conv[CONV(chan->qid)];
107 }
108
109 static inline int founddevdir(struct chan *c, struct qid q, char *n,
110                                                           int64_t length, char *user, long perm,
111                                                           struct dir *db)
112 {
113         devdir(c, q, n, length, user, perm, db);
114         return 1;
115 }
116
117 static int topdirgen(struct chan *c, struct dir *dp)
118 {
119         struct qid q;
120         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
121         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
122         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
123 }
124
125
126 static int ip3gen(struct chan *c, int i, struct dir *dp)
127 {
128         struct qid q;
129         struct conv *cv;
130         char *p;
131
132         cv = chan2conv(c);
133         if (cv->owner == NULL)
134                 kstrdup(&cv->owner, eve);
135         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
136
137         switch (i) {
138                 default:
139                         return -1;
140                 case Qctl:
141                         return founddevdir(c, q, "ctl", 0,
142                                                    cv->owner, cv->perm, dp);
143                 case Qdata:
144                         return founddevdir(c, q, "data", qlen(cv->rq),
145                                                            cv->owner, cv->perm, dp);
146                 case Qerr:
147                         return founddevdir(c, q, "err", qlen(cv->eq),
148                                                            cv->owner, cv->perm, dp);
149                 case Qlisten:
150                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
151                 case Qlocal:
152                         p = "local";
153                         break;
154                 case Qremote:
155                         p = "remote";
156                         break;
157                 case Qsnoop:
158                         if (strcmp(cv->p->name, "ipifc") != 0)
159                                 return -1;
160                         return founddevdir(c, q, "snoop", qlen(cv->sq),
161                                                            cv->owner, 0400, dp);
162                 case Qstatus:
163                         p = "status";
164                         break;
165         }
166         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
167 }
168
169 static int ip2gen(struct chan *c, int i, struct dir *dp)
170 {
171         struct qid q;
172         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
173         switch (i) {
174                 case Qclone:
175                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
176                 case Qstats:
177                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
178         }
179         return -1;
180 }
181
182 static int ip1gen(struct chan *c, int i, struct dir *dp)
183 {
184         struct qid q;
185         char *p;
186         int prot;
187         int len = 0;
188         struct Fs *f;
189         extern uint32_t kerndate;
190
191         f = ipfs[c->dev];
192
193         prot = 0666;
194         mkqid(&q, QID(0, 0, i), 0, QTFILE);
195         switch (i) {
196                 default:
197                         return -1;
198                 case Qarp:
199                         p = "arp";
200                         break;
201                 case Qndb:
202                         p = "ndb";
203                         len = strlen(f->ndb);
204                         q.vers = f->ndbvers;
205                         break;
206                 case Qiproute:
207                         p = "iproute";
208                         break;
209                 case Qipselftab:
210                         p = "ipselftab";
211                         prot = 0444;
212                         break;
213                 case Qiprouter:
214                         p = "iprouter";
215                         break;
216                 case Qlog:
217                         p = "log";
218                         break;
219         }
220         devdir(c, q, p, len, network, prot, dp);
221         if (i == Qndb && f->ndbmtime > kerndate)
222                 dp->mtime = f->ndbmtime;
223         return 1;
224 }
225
226 static int
227 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
228           int s, struct dir *dp)
229 {
230         struct qid q;
231         struct conv *cv;
232         struct Fs *f;
233
234         f = ipfs[c->dev];
235
236         switch (TYPE(c->qid)) {
237                 case Qtopdir:
238                         if (s == DEVDOTDOT)
239                                 return topdirgen(c, dp);
240                         if (s < f->np) {
241                                 if (f->p[s]->connect == NULL)
242                                         return 0;       /* protocol with no user interface */
243                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
244                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
245                         }
246                         s -= f->np;
247                         return ip1gen(c, s + Qtopbase, dp);
248                 case Qarp:
249                 case Qndb:
250                 case Qlog:
251                 case Qiproute:
252                 case Qiprouter:
253                 case Qipselftab:
254                         return ip1gen(c, TYPE(c->qid), dp);
255                 case Qprotodir:
256                         if (s == DEVDOTDOT)
257                                 return topdirgen(c, dp);
258                         else if (s < f->p[PROTO(c->qid)]->ac) {
259                                 cv = f->p[PROTO(c->qid)]->conv[s];
260                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
261                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
262                                 return
263                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
264                         }
265                         s -= f->p[PROTO(c->qid)]->ac;
266                         return ip2gen(c, s + Qprotobase, dp);
267                 case Qclone:
268                 case Qstats:
269                         return ip2gen(c, TYPE(c->qid), dp);
270                 case Qconvdir:
271                         if (s == DEVDOTDOT) {
272                                 s = PROTO(c->qid);
273                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
274                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
275                                 return 1;
276                         }
277                         return ip3gen(c, s + Qconvbase, dp);
278                 case Qctl:
279                 case Qdata:
280                 case Qerr:
281                 case Qlisten:
282                 case Qlocal:
283                 case Qremote:
284                 case Qstatus:
285                 case Qsnoop:
286                         return ip3gen(c, TYPE(c->qid), dp);
287         }
288         return -1;
289 }
290
291 static void ipinit(void)
292 {
293         qlock_init(&fslock);
294         nullmediumlink();
295         pktmediumlink();
296 /* if only
297         fmtinstall('i', eipfmt);
298         fmtinstall('I', eipfmt);
299         fmtinstall('E', eipfmt);
300         fmtinstall('V', eipfmt);
301         fmtinstall('M', eipfmt);
302 */
303 }
304
305 static void ipreset(void)
306 {
307 }
308
309 static struct Fs *ipgetfs(int dev)
310 {
311         extern void (*ipprotoinit[]) (struct Fs *);
312         struct Fs *f;
313         int i;
314
315         if (dev >= Nfs)
316                 return NULL;
317
318         qlock(&fslock);
319         if (ipfs[dev] == NULL) {
320                 f = kzmalloc(sizeof(struct Fs), MEM_WAIT);
321                 rwinit(&f->rwlock);
322                 qlock_init(&f->iprouter.qlock);
323                 ip_init(f);
324                 arpinit(f);
325                 netloginit(f);
326                 for (i = 0; ipprotoinit[i]; i++)
327                         ipprotoinit[i] (f);
328                 f->dev = dev;
329                 ipfs[dev] = f;
330         }
331         qunlock(&fslock);
332
333         return ipfs[dev];
334 }
335
336 struct IPaux *newipaux(char *owner, char *tag)
337 {
338         struct IPaux *a;
339         int n;
340
341         a = kzmalloc(sizeof(*a), 0);
342         kstrdup(&a->owner, owner);
343         memset(a->tag, ' ', sizeof(a->tag));
344         n = strlen(tag);
345         if (n > sizeof(a->tag))
346                 n = sizeof(a->tag);
347         memmove(a->tag, tag, n);
348         return a;
349 }
350
351 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
352
353 static struct chan *ipattach(char *spec)
354 {
355         struct chan *c;
356         int dev;
357
358         dev = atoi(spec);
359         if (dev >= Nfs)
360                 error(EFAIL, "bad specification");
361
362         ipgetfs(dev);
363         c = devattach(devname(), spec);
364         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
365         c->dev = dev;
366
367         c->aux = newipaux(commonuser(), "none");
368
369         return c;
370 }
371
372 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
373                                                           int nname)
374 {
375         struct IPaux *a = c->aux;
376         struct walkqid *w;
377
378         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
379         if (w != NULL && w->clone != NULL)
380                 w->clone->aux = newipaux(a->owner, a->tag);
381         return w;
382 }
383
384 static int ipstat(struct chan *c, uint8_t * db, int n)
385 {
386         return devstat(c, db, n, NULL, 0, ipgen);
387 }
388
389 static int should_wake(void *arg)
390 {
391         struct conv *cv = arg;
392         /* signal that the conv is closed */
393         if (qisclosed(cv->rq))
394                 return TRUE;
395         return cv->incall != NULL;
396 }
397
398 static struct chan *ipopen(struct chan *c, int omode)
399 {
400         ERRSTACK(2);
401         struct conv *cv, *nc;
402         struct Proto *p;
403         int perm;
404         struct Fs *f;
405
406         /* perm is a lone rwx, not the rwx------ from the conversion */
407         perm = omode_to_rwx(omode) >> 6;
408
409         f = ipfs[c->dev];
410
411         switch (TYPE(c->qid)) {
412                 default:
413                         break;
414                 case Qndb:
415                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
416                                 error(EPERM, ERROR_FIXME);
417                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
418                                 f->ndb[0] = 0;
419                         break;
420                 case Qlog:
421                         netlogopen(f);
422                         break;
423                 case Qiprouter:
424                         iprouteropen(f);
425                         break;
426                 case Qiproute:
427                         break;
428                 case Qtopdir:
429                 case Qprotodir:
430                 case Qconvdir:
431                 case Qstatus:
432                 case Qremote:
433                 case Qlocal:
434                 case Qstats:
435                 case Qipselftab:
436                         if (omode & O_WRITE)
437                                 error(EPERM, ERROR_FIXME);
438                         break;
439                 case Qsnoop:
440                         if (omode & O_WRITE)
441                                 error(EPERM, ERROR_FIXME);
442                         p = f->p[PROTO(c->qid)];
443                         cv = p->conv[CONV(c->qid)];
444                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
445                                 error(EPERM, ERROR_FIXME);
446                         atomic_inc(&cv->snoopers);
447                         break;
448                 case Qclone:
449                         p = f->p[PROTO(c->qid)];
450                         qlock(&p->qlock);
451                         if (waserror()) {
452                                 qunlock(&p->qlock);
453                                 nexterror();
454                         }
455                         cv = Fsprotoclone(p, ATTACHER(c));
456                         qunlock(&p->qlock);
457                         poperror();
458                         if (cv == NULL) {
459                                 error(ENODEV, ERROR_FIXME);
460                                 break;
461                         }
462                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
463                         break;
464                 case Qdata:
465                 case Qctl:
466                 case Qerr:
467                         p = f->p[PROTO(c->qid)];
468                         qlock(&p->qlock);
469                         cv = p->conv[CONV(c->qid)];
470                         qlock(&cv->qlock);
471                         if (waserror()) {
472                                 qunlock(&cv->qlock);
473                                 qunlock(&p->qlock);
474                                 nexterror();
475                         }
476                         if ((perm & (cv->perm >> 6)) != perm) {
477                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
478                                         error(EPERM, ERROR_FIXME);
479                                 if ((perm & cv->perm) != perm)
480                                         error(EPERM, ERROR_FIXME);
481
482                         }
483                         cv->inuse++;
484                         if (cv->inuse == 1) {
485                                 kstrdup(&cv->owner, ATTACHER(c));
486                                 cv->perm = 0660;
487                         }
488                         qunlock(&cv->qlock);
489                         qunlock(&p->qlock);
490                         poperror();
491                         break;
492                 case Qlisten:
493                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
494                         /* No permissions or Announce checks required.  We'll see if that's
495                          * a good idea or not. (the perm check would do nothing, as is,
496                          * since an O_PATH perm is 0).
497                          *
498                          * But we probably want to incref to keep the conversation around
499                          * until this FD/chan is closed.  #ip is a little weird in that
500                          * objects never really go away (high water mark for convs, you can
501                          * always find them in the ns).  I think it is possible to
502                          * namec/ipgen a chan, then have that conv close, then have that
503                          * chan be opened.  You can probably do this with a data file. */
504                         if (omode & O_PATH) {
505                                 qlock(&cv->qlock);
506                                 cv->inuse++;
507                                 qunlock(&cv->qlock);
508                                 break;
509                         }
510                         if ((perm & (cv->perm >> 6)) != perm) {
511                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
512                                         error(EPERM, ERROR_FIXME);
513                                 if ((perm & cv->perm) != perm)
514                                         error(EPERM, ERROR_FIXME);
515
516                         }
517
518                         if (cv->state != Announced)
519                                 error(EFAIL, "not announced");
520
521                         if (waserror()) {
522                                 closeconv(cv);
523                                 nexterror();
524                         }
525                         qlock(&cv->qlock);
526                         cv->inuse++;
527                         qunlock(&cv->qlock);
528
529                         nc = NULL;
530                         while (nc == NULL) {
531                                 /* give up if we got a hangup */
532                                 if (qisclosed(cv->rq))
533                                         error(EFAIL, "listen hungup");
534
535                                 qlock(&cv->listenq);
536                                 if (waserror()) {
537                                         qunlock(&cv->listenq);
538                                         nexterror();
539                                 }
540                                 /* we can peek at incall without grabbing the cv qlock.  if
541                                  * anything is there, it'll remain there until we dequeue it.
542                                  * no one else can, since we hold the listenq lock */
543                                 if ((c->flag & O_NONBLOCK) && !cv->incall)
544                                         error(EAGAIN, "listen queue empty");
545                                 /* wait for a connect */
546                                 rendez_sleep(&cv->listenr, should_wake, cv);
547
548                                 /* if there is a concurrent hangup, they will hold the qlock
549                                  * until the hangup is complete, including closing the cv->rq */
550                                 qlock(&cv->qlock);
551                                 nc = cv->incall;
552                                 if (nc != NULL) {
553                                         cv->incall = nc->next;
554                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
555                                         kstrdup(&cv->owner, ATTACHER(c));
556                                 }
557                                 qunlock(&cv->qlock);
558
559                                 qunlock(&cv->listenq);
560                                 poperror();
561                         }
562                         closeconv(cv);
563                         poperror();
564                         break;
565         }
566         c->mode = openmode(omode);
567         c->flag |= COPEN;
568         c->offset = 0;
569         return c;
570 }
571
572 static int ipwstat(struct chan *c, uint8_t * dp, int n)
573 {
574         ERRSTACK(2);
575         struct dir *d;
576         struct conv *cv;
577         struct Fs *f;
578         struct Proto *p;
579
580         f = ipfs[c->dev];
581         switch (TYPE(c->qid)) {
582                 default:
583                         error(EPERM, ERROR_FIXME);
584                         break;
585                 case Qctl:
586                 case Qdata:
587                         break;
588         }
589
590         d = kzmalloc(sizeof(*d) + n, 0);
591         if (waserror()) {
592                 kfree(d);
593                 nexterror();
594         }
595         n = convM2D(dp, n, d, (char *)&d[1]);
596         if (n == 0)
597                 error(ENODATA, ERROR_FIXME);
598         p = f->p[PROTO(c->qid)];
599         cv = p->conv[CONV(c->qid)];
600         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
601                 error(EPERM, ERROR_FIXME);
602         if (!emptystr(d->uid))
603                 kstrdup(&cv->owner, d->uid);
604         if (d->mode != ~0UL)
605                 cv->perm = d->mode & 0777;
606         poperror();
607         kfree(d);
608         return n;
609 }
610
611 /* Should be able to handle any file type chan. Feel free to extend it. */
612 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
613 {
614         struct conv *conv;
615         struct Proto *proto;
616         char *p;
617         struct Fs *f;
618
619         f = ipfs[ch->dev];
620
621         switch (TYPE(ch->qid)) {
622                 default:
623                         ret = "Unknown type";
624                         break;
625                 case Qdata:
626                         proto = f->p[PROTO(ch->qid)];
627                         conv = proto->conv[CONV(ch->qid)];
628                         snprintf(ret, ret_l, "Qdata, %s proto %s, conv idx %d",
629                                  SLIST_EMPTY(&conv->data_taps) ? "untapped" : "tapped",
630                                  proto->name, conv->x);
631                         break;
632                 case Qarp:
633                         ret = "Qarp";
634                         break;
635                 case Qiproute:
636                         ret = "Qiproute";
637                         break;
638                 case Qlisten:
639                         proto = f->p[PROTO(ch->qid)];
640                         conv = proto->conv[CONV(ch->qid)];
641                         snprintf(ret, ret_l, "Qlisten, %s proto %s, conv idx %d",
642                                  SLIST_EMPTY(&conv->listen_taps) ? "untapped" : "tapped",
643                                  proto->name, conv->x);
644                         break;
645                 case Qlog:
646                         ret = "Qlog";
647                         break;
648                 case Qndb:
649                         ret = "Qndb";
650                         break;
651                 case Qctl:
652                         proto = f->p[PROTO(ch->qid)];
653                         conv = proto->conv[CONV(ch->qid)];
654                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
655                                          conv->x);
656                         break;
657         }
658         return ret;
659 }
660
661 static void closeconv(struct conv *cv)
662 {
663         struct conv *nc;
664         struct Ipmulti *mp;
665
666         qlock(&cv->qlock);
667
668         if (--cv->inuse > 0) {
669                 qunlock(&cv->qlock);
670                 return;
671         }
672
673         /* close all incoming calls since no listen will ever happen */
674         for (nc = cv->incall; nc; nc = cv->incall) {
675                 cv->incall = nc->next;
676                 closeconv(nc);
677         }
678         cv->incall = NULL;
679
680         kstrdup(&cv->owner, network);
681         cv->perm = 0660;
682
683         while ((mp = cv->multi) != NULL)
684                 ipifcremmulti(cv, mp->ma, mp->ia);
685
686         cv->r = NULL;
687         cv->rgen = 0;
688         cv->p->close(cv);
689         cv->state = Idle;
690         qunlock(&cv->qlock);
691 }
692
693 static void ipclose(struct chan *c)
694 {
695         struct Fs *f;
696
697         f = ipfs[c->dev];
698         switch (TYPE(c->qid)) {
699                 default:
700                         break;
701                 case Qlog:
702                         if (c->flag & COPEN)
703                                 netlogclose(f);
704                         break;
705                 case Qiprouter:
706                         if (c->flag & COPEN)
707                                 iprouterclose(f);
708                         break;
709                 case Qdata:
710                 case Qctl:
711                 case Qerr:
712                 case Qlisten:
713                         if (c->flag & COPEN)
714                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
715                         break;
716                 case Qsnoop:
717                         if (c->flag & COPEN)
718                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
719                         break;
720         }
721         kfree(((struct IPaux *)c->aux)->owner);
722         kfree(c->aux);
723 }
724
725 enum {
726         Statelen = 32 * 1024,
727 };
728
729 static long ipread(struct chan *ch, void *a, long n, int64_t off)
730 {
731         struct conv *c;
732         struct Proto *x;
733         char *buf, *p;
734         long rv;
735         struct Fs *f;
736         uint32_t offset = off;
737         size_t sofar;
738
739         f = ipfs[ch->dev];
740
741         p = a;
742         switch (TYPE(ch->qid)) {
743                 default:
744                         error(EPERM, ERROR_FIXME);
745                 case Qtopdir:
746                 case Qprotodir:
747                 case Qconvdir:
748                         return devdirread(ch, a, n, 0, 0, ipgen);
749                 case Qarp:
750                         return arpread(f->arp, a, offset, n);
751                 case Qndb:
752                         return readstr(offset, a, n, f->ndb);
753                 case Qiproute:
754                         return routeread(f, a, offset, n);
755                 case Qiprouter:
756                         return iprouterread(f, a, n);
757                 case Qipselftab:
758                         return ipselftabread(f, a, offset, n);
759                 case Qlog:
760                         return netlogread(f, a, offset, n);
761                 case Qctl:
762                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
763                         return readstr(offset, p, n, get_cur_genbuf());
764                 case Qremote:
765                         buf = kzmalloc(Statelen, 0);
766                         x = f->p[PROTO(ch->qid)];
767                         c = x->conv[CONV(ch->qid)];
768                         if (x->remote == NULL) {
769                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
770                         } else {
771                                 (*x->remote) (c, buf, Statelen - 2);
772                         }
773                         rv = readstr(offset, p, n, buf);
774                         kfree(buf);
775                         return rv;
776                 case Qlocal:
777                         buf = kzmalloc(Statelen, 0);
778                         x = f->p[PROTO(ch->qid)];
779                         c = x->conv[CONV(ch->qid)];
780                         if (x->local == NULL) {
781                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
782                         } else {
783                                 (*x->local) (c, buf, Statelen - 2);
784                         }
785                         rv = readstr(offset, p, n, buf);
786                         kfree(buf);
787                         return rv;
788                 case Qstatus:
789                         /* this all is a bit screwed up since the size of some state's
790                          * buffers will change from one invocation to another.  a reader
791                          * will come in and read the entire buffer.  then it will come again
792                          * and read from the next offset, expecting EOF.  if the buffer
793                          * changed sizes, it'll reprint the end of the buffer slightly. */
794                         buf = kzmalloc(Statelen, 0);
795                         x = f->p[PROTO(ch->qid)];
796                         c = x->conv[CONV(ch->qid)];
797                         sofar = (*x->state) (c, buf, Statelen - 2);
798                         rv = readstr(offset, p, n, buf);
799                         kfree(buf);
800                         return rv;
801                 case Qdata:
802                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
803                         if (ch->flag & O_NONBLOCK)
804                                 return qread_nonblock(c->rq, a, n);
805                         else
806                                 return qread(c->rq, a, n);
807                 case Qerr:
808                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
809                         return qread(c->eq, a, n);
810                 case Qsnoop:
811                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
812                         return qread(c->sq, a, n);
813                 case Qstats:
814                         x = f->p[PROTO(ch->qid)];
815                         if (x->stats == NULL)
816                                 error(EFAIL, "stats not implemented");
817                         buf = kzmalloc(Statelen, 0);
818                         (*x->stats) (x, buf, Statelen);
819                         rv = readstr(offset, p, n, buf);
820                         kfree(buf);
821                         return rv;
822         }
823 }
824
825 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
826 {
827         struct conv *c;
828
829         switch (TYPE(ch->qid)) {
830                 case Qdata:
831                         c = chan2conv(ch);
832                         if (ch->flag & O_NONBLOCK)
833                                 return qbread_nonblock(c->rq, n);
834                         else
835                                 return qbread(c->rq, n);
836                 default:
837                         return devbread(ch, n, offset);
838         }
839 }
840
841 /*
842  *  set local address to be that of the ifc closest to remote address
843  */
844 static void setladdr(struct conv *c)
845 {
846         findlocalip(c->p->f, c->laddr, c->raddr);
847 }
848
849 /*
850  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
851  */
852 static void setluniqueport(struct conv *c, int lport)
853 {
854         struct Proto *p;
855         struct conv *xp;
856         int x;
857
858         p = c->p;
859
860         qlock(&p->qlock);
861         for (x = 0; x < p->nc; x++) {
862                 xp = p->conv[x];
863                 if (xp == NULL)
864                         break;
865                 if (xp == c)
866                         continue;
867                 if ((xp->state == Connected || xp->state == Announced)
868                         && xp->lport == lport
869                         && xp->rport == c->rport
870                         && ipcmp(xp->raddr, c->raddr) == 0
871                         && ipcmp(xp->laddr, c->laddr) == 0) {
872                         qunlock(&p->qlock);
873                         error(EFAIL, "address in use");
874                 }
875         }
876         c->lport = lport;
877         qunlock(&p->qlock);
878 }
879
880 /*
881  *  pick a local port and set it
882  */
883 static void setlport(struct conv *c)
884 {
885         struct Proto *p;
886         uint16_t *pp;
887         int x, found;
888
889         p = c->p;
890         if (c->restricted)
891                 pp = &p->nextrport;
892         else
893                 pp = &p->nextport;
894         qlock(&p->qlock);
895         for (;; (*pp)++) {
896                 /*
897                  * Fsproto initialises p->nextport to 0 and the restricted
898                  * ports (p->nextrport) to 600.
899                  * Restricted ports must lie between 600 and 1024.
900                  * For the initial condition or if the unrestricted port number
901                  * has wrapped round, select a random port between 5000 and 1<<15
902                  * to start at.
903                  */
904                 if (c->restricted) {
905                         if (*pp >= 1024)
906                                 *pp = 600;
907                 } else
908                         while (*pp < 5000)
909                                 urandom_read(pp, sizeof(*pp));
910
911                 found = 0;
912                 for (x = 0; x < p->nc; x++) {
913                         if (p->conv[x] == NULL)
914                                 break;
915                         if (p->conv[x]->lport == *pp) {
916                                 found = 1;
917                                 break;
918                         }
919                 }
920                 if (!found)
921                         break;
922         }
923         c->lport = (*pp)++;
924         qunlock(&p->qlock);
925 }
926
927 /*
928  *  set a local address and port from a string of the form
929  *      [address!]port[!r]
930  */
931 static void setladdrport(struct conv *c, char *str, int announcing)
932 {
933         char *p;
934         uint16_t lport;
935         uint8_t addr[IPaddrlen];
936
937         /*
938          *  ignore restricted part if it exists.  it's
939          *  meaningless on local ports.
940          */
941         p = strchr(str, '!');
942         if (p != NULL) {
943                 *p++ = 0;
944                 if (strcmp(p, "r") == 0)
945                         p = NULL;
946         }
947
948         c->lport = 0;
949         if (p == NULL) {
950                 if (announcing)
951                         ipmove(c->laddr, IPnoaddr);
952                 else
953                         setladdr(c);
954                 p = str;
955         } else {
956                 if (strcmp(str, "*") == 0)
957                         ipmove(c->laddr, IPnoaddr);
958                 else {
959                         parseip(addr, str);
960                         if (ipforme(c->p->f, addr))
961                                 ipmove(c->laddr, addr);
962                         else
963                                 error(EFAIL, "not a local IP address");
964                 }
965         }
966
967         /* one process can get all connections */
968         if (announcing && strcmp(p, "*") == 0) {
969                 if (!iseve())
970                         error(EPERM, ERROR_FIXME);
971                 setluniqueport(c, 0);
972         }
973
974         lport = atoi(p);
975         if (lport <= 0)
976                 setlport(c);
977         else
978                 setluniqueport(c, lport);
979 }
980
981 static void setraddrport(struct conv *c, char *str)
982 {
983         char *p;
984
985         p = strchr(str, '!');
986         if (p == NULL)
987                 error(EFAIL, "malformed address");
988         *p++ = 0;
989         parseip(c->raddr, str);
990         c->rport = atoi(p);
991         p = strchr(p, '!');
992         if (p) {
993                 if (strstr(p, "!r") != NULL)
994                         c->restricted = 1;
995         }
996 }
997
998 /*
999  *  called by protocol connect routine to set addresses
1000  */
1001 void Fsstdconnect(struct conv *c, char *argv[], int argc)
1002 {
1003         switch (argc) {
1004                 default:
1005                         error(EINVAL, "bad args to %s", __func__);
1006                 case 2:
1007                         setraddrport(c, argv[1]);
1008                         setladdr(c);
1009                         setlport(c);
1010                         break;
1011                 case 3:
1012                         setraddrport(c, argv[1]);
1013                         setladdrport(c, argv[2], 0);
1014                         break;
1015         }
1016
1017         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1018                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1019                 || ipcmp(c->raddr, IPnoaddr) == 0)
1020                 c->ipversion = V4;
1021         else
1022                 c->ipversion = V6;
1023 }
1024
1025 /*
1026  *  initiate connection and sleep till its set up
1027  */
1028 static int connected(void *a)
1029 {
1030         return ((struct conv *)a)->state == Connected;
1031 }
1032
1033 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1034 {
1035         ERRSTACK(1);
1036         char *p;
1037
1038         if (c->state != 0)
1039                 error(EBUSY, ERROR_FIXME);
1040         c->state = Connecting;
1041         c->cerr[0] = '\0';
1042         if (x->connect == NULL)
1043                 error(EFAIL, "connect not supported");
1044         x->connect(c, cb->f, cb->nf);
1045
1046         qunlock(&c->qlock);
1047         if (waserror()) {
1048                 qlock(&c->qlock);
1049                 nexterror();
1050         }
1051         rendez_sleep(&c->cr, connected, c);
1052         qlock(&c->qlock);
1053         poperror();
1054
1055         if (c->cerr[0] != '\0')
1056                 error(EFAIL, c->cerr);
1057 }
1058
1059 /*
1060  *  called by protocol announce routine to set addresses
1061  */
1062 void Fsstdannounce(struct conv *c, char *argv[], int argc)
1063 {
1064         memset(c->raddr, 0, sizeof(c->raddr));
1065         c->rport = 0;
1066         switch (argc) {
1067                 default:
1068                         error(EINVAL, "bad args to announce");
1069                 case 2:
1070                         setladdrport(c, argv[1], 1);
1071                         break;
1072         }
1073 }
1074
1075 /*
1076  *  initiate announcement and sleep till its set up
1077  */
1078 static int announced(void *a)
1079 {
1080         return ((struct conv *)a)->state == Announced;
1081 }
1082
1083 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1084 {
1085         ERRSTACK(1);
1086         char *p;
1087
1088         if (c->state != 0)
1089                 error(EBUSY, ERROR_FIXME);
1090         c->state = Announcing;
1091         c->cerr[0] = '\0';
1092         if (x->announce == NULL)
1093                 error(EFAIL, "announce not supported");
1094         x->announce(c, cb->f, cb->nf);
1095
1096         qunlock(&c->qlock);
1097         if (waserror()) {
1098                 qlock(&c->qlock);
1099                 nexterror();
1100         }
1101         rendez_sleep(&c->cr, announced, c);
1102         qlock(&c->qlock);
1103         poperror();
1104
1105         if (c->cerr[0] != '\0')
1106                 error(EFAIL, c->cerr);
1107 }
1108
1109 /*
1110  *  called by protocol bind routine to set addresses
1111  */
1112 void Fsstdbind(struct conv *c, char *argv[], int argc)
1113 {
1114         switch (argc) {
1115                 default:
1116                         error(EINVAL, "bad args to bind");
1117                 case 2:
1118                         setladdrport(c, argv[1], 0);
1119                         break;
1120         }
1121 }
1122
1123 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1124 {
1125         if (x->bind == NULL)
1126                 Fsstdbind(c, cb->f, cb->nf);
1127         else
1128                 x->bind(c, cb->f, cb->nf);
1129 }
1130
1131 static void shutdownctlmsg(struct conv *cv, struct cmdbuf *cb)
1132 {
1133         if (cb->nf < 2)
1134                 goto err;
1135         if (!strcmp(cb->f[1], "rd")) {
1136                 qhangup(cv->rq, "shutdown");
1137                 if (cv->p->shutdown)
1138                         cv->p->shutdown(cv, SHUT_RD);
1139         } else if (!strcmp(cb->f[1], "wr")) {
1140                 qhangup(cv->wq, "shutdown");
1141                 if (cv->p->shutdown)
1142                         cv->p->shutdown(cv, SHUT_WR);
1143         } else if (!strcmp(cb->f[1], "rdwr")) {
1144                 qhangup(cv->rq, "shutdown");
1145                 qhangup(cv->wq, "shutdown");
1146                 if (cv->p->shutdown)
1147                         cv->p->shutdown(cv, SHUT_RDWR);
1148         } else {
1149                 goto err;
1150         }
1151         return;
1152 err:
1153         error(EINVAL, "shutdown [rx|tx|rxtx]");
1154 }
1155
1156 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1157 {
1158         if (cb->nf < 2)
1159                 c->tos = 0;
1160         else
1161                 c->tos = atoi(cb->f[1]);
1162 }
1163
1164 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1165 {
1166         if (cb->nf < 2)
1167                 c->ttl = MAXTTL;
1168         else
1169                 c->ttl = atoi(cb->f[1]);
1170 }
1171
1172 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1173 {
1174         ERRSTACK(1);
1175         struct conv *c;
1176         struct Proto *x;
1177         char *p;
1178         struct cmdbuf *cb;
1179         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1180         struct Fs *f;
1181         char *a;
1182
1183         a = v;
1184         f = ipfs[ch->dev];
1185
1186         switch (TYPE(ch->qid)) {
1187                 default:
1188                         error(EPERM, ERROR_FIXME);
1189                 case Qdata:
1190                         x = f->p[PROTO(ch->qid)];
1191                         c = x->conv[CONV(ch->qid)];
1192                         if (ch->flag & O_NONBLOCK)
1193                                 qwrite_nonblock(c->wq, a, n);
1194                         else
1195                                 qwrite(c->wq, a, n);
1196                         break;
1197                 case Qarp:
1198                         return arpwrite(f, a, n);
1199                 case Qiproute:
1200                         return routewrite(f, ch, a, n);
1201                 case Qlog:
1202                         netlogctl(f, a, n);
1203                         return n;
1204                 case Qndb:
1205                         return ndbwrite(f, a, off, n);
1206                 case Qctl:
1207                         x = f->p[PROTO(ch->qid)];
1208                         c = x->conv[CONV(ch->qid)];
1209                         cb = parsecmd(a, n);
1210
1211                         qlock(&c->qlock);
1212                         if (waserror()) {
1213                                 qunlock(&c->qlock);
1214                                 kfree(cb);
1215                                 nexterror();
1216                         }
1217                         if (cb->nf < 1)
1218                                 error(EFAIL, "short control request");
1219                         if (strcmp(cb->f[0], "connect") == 0)
1220                                 connectctlmsg(x, c, cb);
1221                         else if (strcmp(cb->f[0], "announce") == 0)
1222                                 announcectlmsg(x, c, cb);
1223                         else if (strcmp(cb->f[0], "bind") == 0)
1224                                 bindctlmsg(x, c, cb);
1225                         else if (strcmp(cb->f[0], "shutdown") == 0)
1226                                 shutdownctlmsg(c, cb);
1227                         else if (strcmp(cb->f[0], "ttl") == 0)
1228                                 ttlctlmsg(c, cb);
1229                         else if (strcmp(cb->f[0], "tos") == 0)
1230                                 tosctlmsg(c, cb);
1231                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1232                                 c->ignoreadvice = 1;
1233                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1234                                 if (cb->nf < 2)
1235                                         error(EFAIL, "addmulti needs interface address");
1236                                 if (cb->nf == 2) {
1237                                         if (!ipismulticast(c->raddr))
1238                                                 error(EFAIL, "addmulti for a non multicast address");
1239                                         parseip(ia, cb->f[1]);
1240                                         ipifcaddmulti(c, c->raddr, ia);
1241                                 } else {
1242                                         parseip(ma, cb->f[2]);
1243                                         if (!ipismulticast(ma))
1244                                                 error(EFAIL, "addmulti for a non multicast address");
1245                                         parseip(ia, cb->f[1]);
1246                                         ipifcaddmulti(c, ma, ia);
1247                                 }
1248                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1249                                 if (cb->nf < 2)
1250                                         error(EFAIL, "remmulti needs interface address");
1251                                 if (!ipismulticast(c->raddr))
1252                                         error(EFAIL, "remmulti for a non multicast address");
1253                                 parseip(ia, cb->f[1]);
1254                                 ipifcremmulti(c, c->raddr, ia);
1255                         } else if (x->ctl != NULL) {
1256                                 x->ctl(c, cb->f, cb->nf);
1257                         } else
1258                                 error(EFAIL, "unknown control request");
1259                         qunlock(&c->qlock);
1260                         kfree(cb);
1261                         poperror();
1262         }
1263         return n;
1264 }
1265
1266 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1267 {
1268         struct conv *c;
1269         int n;
1270
1271         switch (TYPE(ch->qid)) {
1272                 case Qdata:
1273                         c = chan2conv(ch);
1274                         if (bp->next)
1275                                 bp = concatblock(bp);
1276                         n = BLEN(bp);
1277                         if (ch->flag & O_NONBLOCK)
1278                                 qbwrite_nonblock(c->wq, bp);
1279                         else
1280                                 qbwrite(c->wq, bp);
1281                         return n;
1282                 default:
1283                         return devbwrite(ch, bp, offset);
1284         }
1285 }
1286
1287 static void ip_wake_cb(struct queue *q, void *data, int filter)
1288 {
1289         struct conv *conv = (struct conv*)data;
1290         struct fd_tap *tap_i;
1291         /* For these two, we want to ignore events on the opposite end of the
1292          * queues.  For instance, we want to know when the WQ is writable.  Our
1293          * writes will actually make it readable - we don't want to trigger a tap
1294          * for that.  However, qio doesn't know how/why we are using a queue, or
1295          * even who the ends are (hence the callbacks) */
1296         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1297                 return;
1298         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1299                 return;
1300         /* At this point, we have an event we want to send to our taps (if any).
1301          * The lock protects list integrity and the existence of the tap.
1302          *
1303          * Previously, I thought of using the conv qlock.  That actually breaks, due
1304          * to weird usages of the qlock (someone holds it for a long time, blocking
1305          * the inbound wakeup from etherread4).
1306          *
1307          * I opted for a spinlock for a couple reasons:
1308          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1309          * send_event).
1310          * - our callers might not want to block.  A lot of network wakeups will
1311          * come network processes (etherread4) or otherwise unrelated to this
1312          * particular conversation.  I'd rather do something like fire off a KMSG
1313          * than block those.
1314          * - if fire_tap takes a while, holding the lock only slows down other
1315          * events on this *same* conversation, or other tap registration.  not a
1316          * huge deal. */
1317         spin_lock(&conv->tap_lock);
1318         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1319                 fire_tap(tap_i, filter);
1320         spin_unlock(&conv->tap_lock);
1321 }
1322
1323 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1324 {
1325         struct conv *conv = chan2conv(chan);
1326         int ret;
1327
1328         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1329                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1330                                        FDTAP_FILT_ERROR)
1331         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1332
1333         switch (TYPE(chan->qid)) {
1334                 case Qdata:
1335                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1336                                 set_errno(ENOSYS);
1337                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1338                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1339                                 return -1;
1340                         }
1341                         spin_lock(&conv->tap_lock);
1342                         switch (cmd) {
1343                                 case (FDTAP_CMD_ADD):
1344                                         if (SLIST_EMPTY(&conv->data_taps)) {
1345                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1346                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1347                                         }
1348                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1349                                         ret = 0;
1350                                         break;
1351                                 case (FDTAP_CMD_REM):
1352                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1353                                         if (SLIST_EMPTY(&conv->data_taps)) {
1354                                                 qio_set_wake_cb(conv->rq, 0, conv);
1355                                                 qio_set_wake_cb(conv->wq, 0, conv);
1356                                         }
1357                                         ret = 0;
1358                                         break;
1359                                 default:
1360                                         set_errno(ENOSYS);
1361                                         set_errstr("Unsupported #%s data tap command %p",
1362                                                    devname(), cmd);
1363                                         ret = -1;
1364                         }
1365                         spin_unlock(&conv->tap_lock);
1366                         return ret;
1367                 case Qlisten:
1368                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1369                                 set_errno(ENOSYS);
1370                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1371                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1372                                 return -1;
1373                         }
1374                         spin_lock(&conv->tap_lock);
1375                         switch (cmd) {
1376                                 case (FDTAP_CMD_ADD):
1377                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1378                                         ret = 0;
1379                                         break;
1380                                 case (FDTAP_CMD_REM):
1381                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1382                                         ret = 0;
1383                                         break;
1384                                 default:
1385                                         set_errno(ENOSYS);
1386                                         set_errstr("Unsupported #%s listen tap command %p",
1387                                                    devname(), cmd);
1388                                         ret = -1;
1389                         }
1390                         spin_unlock(&conv->tap_lock);
1391                         return ret;
1392                 default:
1393                         set_errno(ENOSYS);
1394                         set_errstr("Can't tap #%s file type %d", devname(),
1395                                    TYPE(chan->qid));
1396                         return -1;
1397         }
1398 }
1399
1400 struct dev ipdevtab __devtab = {
1401         .name = "ip",
1402
1403         .reset = ipreset,
1404         .init = ipinit,
1405         .shutdown = devshutdown,
1406         .attach = ipattach,
1407         .walk = ipwalk,
1408         .stat = ipstat,
1409         .open = ipopen,
1410         .create = devcreate,
1411         .close = ipclose,
1412         .read = ipread,
1413         .bread = ipbread,
1414         .write = ipwrite,
1415         .bwrite = ipbwrite,
1416         .remove = devremove,
1417         .wstat = ipwstat,
1418         .power = devpower,
1419         .chaninfo = ipchaninfo,
1420         .tapfd = iptapfd,
1421 };
1422
1423 int Fsproto(struct Fs *f, struct Proto *p)
1424 {
1425         if (f->np >= Maxproto)
1426                 return -1;
1427
1428         qlock_init(&p->qlock);
1429         p->f = f;
1430
1431         if (p->ipproto > 0) {
1432                 if (f->t2p[p->ipproto] != NULL)
1433                         return -1;
1434                 f->t2p[p->ipproto] = p;
1435         }
1436
1437         p->qid.type = QTDIR;
1438         p->qid.path = QID(f->np, 0, Qprotodir);
1439         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1440         if (p->conv == NULL)
1441                 panic("Fsproto");
1442
1443         p->x = f->np;
1444         p->nextport = 0;
1445         p->nextrport = 600;
1446         f->p[f->np++] = p;
1447
1448         return 0;
1449 }
1450
1451 /*
1452  *  return true if this protocol is
1453  *  built in
1454  */
1455 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1456 {
1457         return f->t2p[proto] != NULL;
1458 }
1459
1460 /*
1461  *  called with protocol locked
1462  */
1463 struct conv *Fsprotoclone(struct Proto *p, char *user)
1464 {
1465         struct conv *c, **pp, **ep;
1466
1467 retry:
1468         c = NULL;
1469         ep = &p->conv[p->nc];
1470         for (pp = p->conv; pp < ep; pp++) {
1471                 c = *pp;
1472                 if (c == NULL) {
1473                         c = kzmalloc(sizeof(struct conv), 0);
1474                         if (c == NULL)
1475                                 error(ENOMEM, ERROR_FIXME);
1476                         qlock_init(&c->qlock);
1477                         qlock_init(&c->listenq);
1478                         rendez_init(&c->cr);
1479                         rendez_init(&c->listenr);
1480                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1481                         SLIST_INIT(&c->listen_taps);
1482                         spinlock_init(&c->tap_lock);
1483                         qlock(&c->qlock);
1484                         c->p = p;
1485                         c->x = pp - p->conv;
1486                         if (p->ptclsize != 0) {
1487                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1488                                 if (c->ptcl == NULL) {
1489                                         kfree(c);
1490                                         error(ENOMEM, ERROR_FIXME);
1491                                 }
1492                         }
1493                         *pp = c;
1494                         p->ac++;
1495                         c->eq = qopen(1024, Qmsg, 0, 0);
1496                         (*p->create) (c);
1497                         assert(c->rq && c->wq);
1498                         break;
1499                 }
1500                 if (canqlock(&c->qlock)) {
1501                         /*
1502                          *  make sure both processes and protocol
1503                          *  are done with this Conv
1504                          */
1505                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1506                                 break;
1507
1508                         qunlock(&c->qlock);
1509                 }
1510         }
1511         if (pp >= ep) {
1512                 if (p->gc != NULL && (*p->gc) (p))
1513                         goto retry;
1514                 return NULL;
1515         }
1516
1517         c->inuse = 1;
1518         kstrdup(&c->owner, user);
1519         c->perm = 0660;
1520         c->state = Idle;
1521         ipmove(c->laddr, IPnoaddr);
1522         ipmove(c->raddr, IPnoaddr);
1523         c->r = NULL;
1524         c->rgen = 0;
1525         c->lport = 0;
1526         c->rport = 0;
1527         c->restricted = 0;
1528         c->ttl = MAXTTL;
1529         c->tos = DFLTTOS;
1530         qreopen(c->rq);
1531         qreopen(c->wq);
1532         qreopen(c->eq);
1533
1534         qunlock(&c->qlock);
1535         return c;
1536 }
1537
1538 int Fsconnected(struct conv *c, char *msg)
1539 {
1540         if (msg != NULL && *msg != '\0')
1541                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1542
1543         switch (c->state) {
1544                 case Announcing:
1545                         c->state = Announced;
1546                         break;
1547
1548                 case Connecting:
1549                         c->state = Connected;
1550                         break;
1551         }
1552
1553         rendez_wakeup(&c->cr);
1554         return 0;
1555 }
1556
1557 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1558 {
1559         if (f->ipmux)
1560                 return f->ipmux;
1561         else
1562                 return f->t2p[proto];
1563 }
1564
1565 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1566 {
1567         return f->t2p[proto];
1568 }
1569
1570 static void fire_listener_taps(struct conv *conv)
1571 {
1572         struct fd_tap *tap_i;
1573         if (SLIST_EMPTY(&conv->listen_taps))
1574                 return;
1575         spin_lock(&conv->tap_lock);
1576         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1577                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1578         spin_unlock(&conv->tap_lock);
1579 }
1580
1581 /*
1582  *  called with protocol locked
1583  */
1584 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1585                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1586 {
1587         struct conv *nc;
1588         struct conv **l;
1589         int i;
1590
1591         qlock(&c->qlock);
1592         i = 0;
1593         for (l = &c->incall; *l; l = &(*l)->next)
1594                 i++;
1595         if (i >= Maxincall) {
1596                 qunlock(&c->qlock);
1597                 return NULL;
1598         }
1599
1600         /* find a free conversation */
1601         nc = Fsprotoclone(c->p, network);
1602         if (nc == NULL) {
1603                 qunlock(&c->qlock);
1604                 return NULL;
1605         }
1606         ipmove(nc->raddr, raddr);
1607         nc->rport = rport;
1608         ipmove(nc->laddr, laddr);
1609         nc->lport = lport;
1610         nc->next = NULL;
1611         *l = nc;
1612         nc->state = Connected;
1613         nc->ipversion = version;
1614
1615         qunlock(&c->qlock);
1616
1617         rendez_wakeup(&c->listenr);
1618         fire_listener_taps(c);
1619
1620         return nc;
1621 }
1622
1623 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1624 {
1625         if (off > strlen(f->ndb))
1626                 error(EIO, ERROR_FIXME);
1627         if (off + n >= sizeof(f->ndb) - 1)
1628                 error(EIO, ERROR_FIXME);
1629         memmove(f->ndb + off, a, n);
1630         f->ndb[off + n] = 0;
1631         f->ndbvers++;
1632         f->ndbmtime = seconds();
1633         return n;
1634 }
1635
1636 uint32_t scalednconv(void)
1637 {
1638         //if(conf.npage*BY2PG >= 128*MB)
1639         return Nchans * 4;
1640         //  return Nchans;
1641 }