error: return usable error strings, not nothing.
[akaros.git] / kern / src / net / devip.c
1 /* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
2  * Portions Copyright © 1997-1999 Vita Nuova Limited
3  * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
4  *                                (www.vitanuova.com)
5  * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
6  *
7  * Modified for the Akaros operating system:
8  * Copyright (c) 2013-2014 The Regents of the University of California
9  * Copyright (c) 2013-2015 Google Inc.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a copy
12  * of this software and associated documentation files (the "Software"), to deal
13  * in the Software without restriction, including without limitation the rights
14  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  * copies of the Software, and to permit persons to whom the Software is
16  * furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice shall be included in
19  * all copies or substantial portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE. */
28
29 #include <vfs.h>
30 #include <kfs.h>
31 #include <slab.h>
32 #include <kmalloc.h>
33 #include <kref.h>
34 #include <string.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <error.h>
38 #include <cpio.h>
39 #include <pmap.h>
40 #include <smp.h>
41 #include <ip.h>
42
43 struct dev ipdevtab;
44
45 static char *devname(void)
46 {
47         return ipdevtab.name;
48 }
49
50 enum {
51         Qtopdir = 1,                            /* top level directory */
52         Qtopbase,
53         Qarp = Qtopbase,
54         Qbootp,
55         Qndb,
56         Qiproute,
57         Qiprouter,
58         Qipselftab,
59         Qlog,
60
61         Qprotodir,      /* directory for a protocol */
62         Qprotobase,
63         Qclone = Qprotobase,
64         Qstats,
65
66         Qconvdir,       /* directory for a conversation */
67         Qconvbase,
68         Qctl = Qconvbase,
69         Qdata,
70         Qerr,
71         Qlisten,
72         Qlocal,
73         Qremote,
74         Qstatus,
75         Qsnoop,
76
77         Logtype = 5,
78         Masktype = (1 << Logtype) - 1,
79         Logconv = 12,
80         Maskconv = (1 << Logconv) - 1,
81         Shiftconv = Logtype,
82         Logproto = 8,
83         Maskproto = (1 << Logproto) - 1,
84         Shiftproto = Logtype + Logconv,
85
86         Nfs = 32,
87 };
88 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
89 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
90 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
91 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
92 static char network[] = "network";
93
94 qlock_t fslock;
95 struct Fs *ipfs[Nfs];                   /* attached fs's */
96 struct queue *qlog;
97
98 extern void nullmediumlink(void);
99 extern void pktmediumlink(void);
100 extern char *eve;
101 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
102 static void closeconv(struct conv *);
103
104 static inline int founddevdir(struct chan *c, struct qid q, char *n,
105                                                           int64_t length, char *user, long perm,
106                                                           struct dir *db)
107 {
108         devdir(c, q, n, length, user, perm, db);
109         return 1;
110 }
111
112 static int topdirgen(struct chan *c, struct dir *dp)
113 {
114         struct qid q;
115         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
116         snprintf(get_cur_genbuf(), GENBUF_SZ, "#%s%lu", devname(), c->dev);
117         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
118 }
119
120
121 static int ip3gen(struct chan *c, int i, struct dir *dp)
122 {
123         struct qid q;
124         struct conv *cv;
125         char *p;
126
127         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
128         if (cv->owner == NULL)
129                 kstrdup(&cv->owner, eve);
130         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
131
132         switch (i) {
133                 default:
134                         return -1;
135                 case Qctl:
136                         return founddevdir(c, q, "ctl", 0,
137                                                    cv->owner, cv->perm, dp);
138                 case Qdata:
139                         return founddevdir(c, q, "data", qlen(cv->rq),
140                                                            cv->owner, cv->perm, dp);
141                 case Qerr:
142                         return founddevdir(c, q, "err", qlen(cv->eq),
143                                                            cv->owner, cv->perm, dp);
144                 case Qlisten:
145                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
146                 case Qlocal:
147                         p = "local";
148                         break;
149                 case Qremote:
150                         p = "remote";
151                         break;
152                 case Qsnoop:
153                         if (strcmp(cv->p->name, "ipifc") != 0)
154                                 return -1;
155                         return founddevdir(c, q, "snoop", qlen(cv->sq),
156                                                            cv->owner, 0400, dp);
157                 case Qstatus:
158                         p = "status";
159                         break;
160         }
161         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
162 }
163
164 static int ip2gen(struct chan *c, int i, struct dir *dp)
165 {
166         struct qid q;
167         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
168         switch (i) {
169                 case Qclone:
170                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
171                 case Qstats:
172                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
173         }
174         return -1;
175 }
176
177 static int ip1gen(struct chan *c, int i, struct dir *dp)
178 {
179         struct qid q;
180         char *p;
181         int prot;
182         int len = 0;
183         struct Fs *f;
184         extern uint32_t kerndate;
185
186         f = ipfs[c->dev];
187
188         prot = 0666;
189         mkqid(&q, QID(0, 0, i), 0, QTFILE);
190         switch (i) {
191                 default:
192                         return -1;
193                 case Qarp:
194                         p = "arp";
195                         break;
196                 case Qbootp:
197                         if (bootp == NULL)
198                                 return 0;
199                         p = "bootp";
200                         break;
201                 case Qndb:
202                         p = "ndb";
203                         len = strlen(f->ndb);
204                         q.vers = f->ndbvers;
205                         break;
206                 case Qiproute:
207                         p = "iproute";
208                         break;
209                 case Qipselftab:
210                         p = "ipselftab";
211                         prot = 0444;
212                         break;
213                 case Qiprouter:
214                         p = "iprouter";
215                         break;
216                 case Qlog:
217                         p = "log";
218                         break;
219         }
220         devdir(c, q, p, len, network, prot, dp);
221         if (i == Qndb && f->ndbmtime > kerndate)
222                 dp->mtime = f->ndbmtime;
223         return 1;
224 }
225
226 static int
227 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
228           int s, struct dir *dp)
229 {
230         struct qid q;
231         struct conv *cv;
232         struct Fs *f;
233
234         f = ipfs[c->dev];
235
236         switch (TYPE(c->qid)) {
237                 case Qtopdir:
238                         if (s == DEVDOTDOT)
239                                 return topdirgen(c, dp);
240                         if (s < f->np) {
241                                 if (f->p[s]->connect == NULL)
242                                         return 0;       /* protocol with no user interface */
243                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
244                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
245                         }
246                         s -= f->np;
247                         return ip1gen(c, s + Qtopbase, dp);
248                 case Qarp:
249                 case Qbootp:
250                 case Qndb:
251                 case Qlog:
252                 case Qiproute:
253                 case Qiprouter:
254                 case Qipselftab:
255                         return ip1gen(c, TYPE(c->qid), dp);
256                 case Qprotodir:
257                         if (s == DEVDOTDOT)
258                                 return topdirgen(c, dp);
259                         else if (s < f->p[PROTO(c->qid)]->ac) {
260                                 cv = f->p[PROTO(c->qid)]->conv[s];
261                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
262                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
263                                 return
264                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
265                         }
266                         s -= f->p[PROTO(c->qid)]->ac;
267                         return ip2gen(c, s + Qprotobase, dp);
268                 case Qclone:
269                 case Qstats:
270                         return ip2gen(c, TYPE(c->qid), dp);
271                 case Qconvdir:
272                         if (s == DEVDOTDOT) {
273                                 s = PROTO(c->qid);
274                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
275                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
276                                 return 1;
277                         }
278                         return ip3gen(c, s + Qconvbase, dp);
279                 case Qctl:
280                 case Qdata:
281                 case Qerr:
282                 case Qlisten:
283                 case Qlocal:
284                 case Qremote:
285                 case Qstatus:
286                 case Qsnoop:
287                         return ip3gen(c, TYPE(c->qid), dp);
288         }
289         return -1;
290 }
291
292 static void ipinit(void)
293 {
294         qlock_init(&fslock);
295         nullmediumlink();
296         pktmediumlink();
297 /* if only
298         fmtinstall('i', eipfmt);
299         fmtinstall('I', eipfmt);
300         fmtinstall('E', eipfmt);
301         fmtinstall('V', eipfmt);
302         fmtinstall('M', eipfmt);
303 */
304 }
305
306 static void ipreset(void)
307 {
308 }
309
310 static struct Fs *ipgetfs(int dev)
311 {
312         extern void (*ipprotoinit[]) (struct Fs *);
313         struct Fs *f;
314         int i;
315
316         if (dev >= Nfs)
317                 return NULL;
318
319         qlock(&fslock);
320         if (ipfs[dev] == NULL) {
321                 f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
322                 rwinit(&f->rwlock);
323                 qlock_init(&f->iprouter.qlock);
324                 ip_init(f);
325                 arpinit(f);
326                 netloginit(f);
327                 for (i = 0; ipprotoinit[i]; i++)
328                         ipprotoinit[i] (f);
329                 f->dev = dev;
330                 ipfs[dev] = f;
331         }
332         qunlock(&fslock);
333
334         return ipfs[dev];
335 }
336
337 struct IPaux *newipaux(char *owner, char *tag)
338 {
339         struct IPaux *a;
340         int n;
341
342         a = kzmalloc(sizeof(*a), 0);
343         kstrdup(&a->owner, owner);
344         memset(a->tag, ' ', sizeof(a->tag));
345         n = strlen(tag);
346         if (n > sizeof(a->tag))
347                 n = sizeof(a->tag);
348         memmove(a->tag, tag, n);
349         return a;
350 }
351
352 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
353
354 static struct chan *ipattach(char *spec)
355 {
356         struct chan *c;
357         int dev;
358
359         dev = atoi(spec);
360         if (dev >= Nfs)
361                 error(EFAIL, "bad specification");
362
363         ipgetfs(dev);
364         c = devattach(devname(), spec);
365         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
366         c->dev = dev;
367
368         c->aux = newipaux(commonuser(), "none");
369
370         return c;
371 }
372
373 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
374                                                           int nname)
375 {
376         struct IPaux *a = c->aux;
377         struct walkqid *w;
378
379         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
380         if (w != NULL && w->clone != NULL)
381                 w->clone->aux = newipaux(a->owner, a->tag);
382         return w;
383 }
384
385 static int ipstat(struct chan *c, uint8_t * db, int n)
386 {
387         return devstat(c, db, n, NULL, 0, ipgen);
388 }
389
390 static int should_wake(void *arg)
391 {
392         struct conv *cv = arg;
393         /* signal that the conv is closed */
394         if (qisclosed(cv->rq))
395                 return TRUE;
396         return cv->incall != NULL;
397 }
398
399 static struct chan *ipopen(struct chan *c, int omode)
400 {
401         ERRSTACK(2);
402         struct conv *cv, *nc;
403         struct Proto *p;
404         int perm;
405         struct Fs *f;
406
407         /* perm is a lone rwx, not the rwx------ from the conversion */
408         perm = omode_to_rwx(omode) >> 6;
409
410         f = ipfs[c->dev];
411
412         switch (TYPE(c->qid)) {
413                 default:
414                         break;
415                 case Qndb:
416                         if (omode & (O_WRITE | O_TRUNC) && !iseve())
417                                 error(EPERM, ERROR_FIXME);
418                         if ((omode & (O_WRITE | O_TRUNC)) == (O_WRITE | O_TRUNC))
419                                 f->ndb[0] = 0;
420                         break;
421                 case Qlog:
422                         netlogopen(f);
423                         break;
424                 case Qiprouter:
425                         iprouteropen(f);
426                         break;
427                 case Qiproute:
428                         break;
429                 case Qtopdir:
430                 case Qprotodir:
431                 case Qconvdir:
432                 case Qstatus:
433                 case Qremote:
434                 case Qlocal:
435                 case Qstats:
436                 case Qbootp:
437                 case Qipselftab:
438                         if (omode & O_WRITE)
439                                 error(EPERM, ERROR_FIXME);
440                         break;
441                 case Qsnoop:
442                         if (omode & O_WRITE)
443                                 error(EPERM, ERROR_FIXME);
444                         p = f->p[PROTO(c->qid)];
445                         cv = p->conv[CONV(c->qid)];
446                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
447                                 error(EPERM, ERROR_FIXME);
448                         atomic_inc(&cv->snoopers);
449                         break;
450                 case Qclone:
451                         p = f->p[PROTO(c->qid)];
452                         qlock(&p->qlock);
453                         if (waserror()) {
454                                 qunlock(&p->qlock);
455                                 nexterror();
456                         }
457                         cv = Fsprotoclone(p, ATTACHER(c));
458                         qunlock(&p->qlock);
459                         poperror();
460                         if (cv == NULL) {
461                                 error(ENODEV, ERROR_FIXME);
462                                 break;
463                         }
464                         /* we only honor nonblock on a clone */
465                         if (c->flag & O_NONBLOCK)
466                                 Fsconvnonblock(cv, TRUE);
467                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
468                         break;
469                 case Qdata:
470                 case Qctl:
471                 case Qerr:
472                         p = f->p[PROTO(c->qid)];
473                         qlock(&p->qlock);
474                         cv = p->conv[CONV(c->qid)];
475                         qlock(&cv->qlock);
476                         if (waserror()) {
477                                 qunlock(&cv->qlock);
478                                 qunlock(&p->qlock);
479                                 nexterror();
480                         }
481                         if ((perm & (cv->perm >> 6)) != perm) {
482                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
483                                         error(EPERM, ERROR_FIXME);
484                                 if ((perm & cv->perm) != perm)
485                                         error(EPERM, ERROR_FIXME);
486
487                         }
488                         cv->inuse++;
489                         if (cv->inuse == 1) {
490                                 kstrdup(&cv->owner, ATTACHER(c));
491                                 cv->perm = 0660;
492                         }
493                         qunlock(&cv->qlock);
494                         qunlock(&p->qlock);
495                         poperror();
496                         break;
497                 case Qlisten:
498                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
499                         /* No permissions or Announce checks required.  We'll see if that's
500                          * a good idea or not. (the perm check would do nothing, as is,
501                          * since an O_PATH perm is 0).
502                          *
503                          * But we probably want to incref to keep the conversation around
504                          * until this FD/chan is closed.  #ip is a little weird in that
505                          * objects never really go away (high water mark for convs, you can
506                          * always find them in the ns).  I think it is possible to
507                          * namec/ipgen a chan, then have that conv close, then have that
508                          * chan be opened.  You can probably do this with a data file. */
509                         if (omode & O_PATH) {
510                                 qlock(&cv->qlock);
511                                 cv->inuse++;
512                                 qunlock(&cv->qlock);
513                                 break;
514                         }
515                         if ((perm & (cv->perm >> 6)) != perm) {
516                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
517                                         error(EPERM, ERROR_FIXME);
518                                 if ((perm & cv->perm) != perm)
519                                         error(EPERM, ERROR_FIXME);
520
521                         }
522
523                         if (cv->state != Announced)
524                                 error(EFAIL, "not announced");
525
526                         if (waserror()) {
527                                 closeconv(cv);
528                                 nexterror();
529                         }
530                         qlock(&cv->qlock);
531                         cv->inuse++;
532                         qunlock(&cv->qlock);
533
534                         nc = NULL;
535                         while (nc == NULL) {
536                                 /* give up if we got a hangup */
537                                 if (qisclosed(cv->rq))
538                                         error(EFAIL, "listen hungup");
539
540                                 qlock(&cv->listenq);
541                                 if (waserror()) {
542                                         qunlock(&cv->listenq);
543                                         nexterror();
544                                 }
545                                 /* we can peek at incall without grabbing the cv qlock.  if
546                                  * anything is there, it'll remain there until we dequeue it.
547                                  * no one else can, since we hold the listenq lock */
548                                 if (cv->nonblock && !cv->incall)
549                                         error(EAGAIN, "listen queue empty");
550                                 /* wait for a connect */
551                                 rendez_sleep(&cv->listenr, should_wake, cv);
552
553                                 /* if there is a concurrent hangup, they will hold the qlock
554                                  * until the hangup is complete, including closing the cv->rq */
555                                 qlock(&cv->qlock);
556                                 nc = cv->incall;
557                                 if (nc != NULL) {
558                                         cv->incall = nc->next;
559                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
560                                         kstrdup(&cv->owner, ATTACHER(c));
561                                         /* O_NONBLOCK/CNONBLOCK when opening listen means the *new*
562                                          * conv is already non-blocking, like accept4() in Linux */
563                                         if (c->flag & O_NONBLOCK)
564                                                 Fsconvnonblock(nc, TRUE);
565                                 }
566                                 qunlock(&cv->qlock);
567
568                                 qunlock(&cv->listenq);
569                                 poperror();
570                         }
571                         closeconv(cv);
572                         poperror();
573                         break;
574         }
575         c->mode = openmode(omode);
576         c->flag |= COPEN;
577         c->offset = 0;
578         return c;
579 }
580
581 static int ipwstat(struct chan *c, uint8_t * dp, int n)
582 {
583         ERRSTACK(2);
584         struct dir *d;
585         struct conv *cv;
586         struct Fs *f;
587         struct Proto *p;
588
589         f = ipfs[c->dev];
590         switch (TYPE(c->qid)) {
591                 default:
592                         error(EPERM, ERROR_FIXME);
593                         break;
594                 case Qctl:
595                 case Qdata:
596                         break;
597         }
598
599         d = kzmalloc(sizeof(*d) + n, 0);
600         if (waserror()) {
601                 kfree(d);
602                 nexterror();
603         }
604         n = convM2D(dp, n, d, (char *)&d[1]);
605         if (n == 0)
606                 error(ENODATA, ERROR_FIXME);
607         p = f->p[PROTO(c->qid)];
608         cv = p->conv[CONV(c->qid)];
609         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
610                 error(EPERM, ERROR_FIXME);
611         if (!emptystr(d->uid))
612                 kstrdup(&cv->owner, d->uid);
613         if (d->mode != ~0UL)
614                 cv->perm = d->mode & 0777;
615         poperror();
616         kfree(d);
617         return n;
618 }
619
620 /* Should be able to handle any file type chan. Feel free to extend it. */
621 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
622 {
623         struct conv *conv;
624         struct Proto *proto;
625         char *p;
626         struct Fs *f;
627
628         f = ipfs[ch->dev];
629
630         switch (TYPE(ch->qid)) {
631                 default:
632                         ret = "Unknown type";
633                         break;
634                 case Qdata:
635                         proto = f->p[PROTO(ch->qid)];
636                         conv = proto->conv[CONV(ch->qid)];
637                         snprintf(ret, ret_l, "Qdata, proto %s, conv idx %d", proto->name,
638                                          conv->x);
639                         break;
640                 case Qarp:
641                         ret = "Qarp";
642                         break;
643                 case Qiproute:
644                         ret = "Qiproute";
645                         break;
646                 case Qlisten:
647                         proto = f->p[PROTO(ch->qid)];
648                         conv = proto->conv[CONV(ch->qid)];
649                         snprintf(ret, ret_l, "Qlisten, proto %s, conv idx %d", proto->name,
650                                          conv->x);
651                         break;
652                 case Qlog:
653                         ret = "Qlog";
654                         break;
655                 case Qndb:
656                         ret = "Qndb";
657                         break;
658                 case Qctl:
659                         proto = f->p[PROTO(ch->qid)];
660                         conv = proto->conv[CONV(ch->qid)];
661                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
662                                          conv->x);
663                         break;
664         }
665         return ret;
666 }
667
668 static void closeconv(struct conv *cv)
669 {
670         struct conv *nc;
671         struct Ipmulti *mp;
672
673         qlock(&cv->qlock);
674
675         if (--cv->inuse > 0) {
676                 qunlock(&cv->qlock);
677                 return;
678         }
679
680         /* close all incoming calls since no listen will ever happen */
681         for (nc = cv->incall; nc; nc = cv->incall) {
682                 cv->incall = nc->next;
683                 closeconv(nc);
684         }
685         cv->incall = NULL;
686
687         kstrdup(&cv->owner, network);
688         cv->perm = 0660;
689
690         while ((mp = cv->multi) != NULL)
691                 ipifcremmulti(cv, mp->ma, mp->ia);
692
693         cv->r = NULL;
694         cv->rgen = 0;
695         cv->p->close(cv);
696         cv->state = Idle;
697         qunlock(&cv->qlock);
698 }
699
700 static void ipclose(struct chan *c)
701 {
702         struct Fs *f;
703
704         f = ipfs[c->dev];
705         switch (TYPE(c->qid)) {
706                 default:
707                         break;
708                 case Qlog:
709                         if (c->flag & COPEN)
710                                 netlogclose(f);
711                         break;
712                 case Qiprouter:
713                         if (c->flag & COPEN)
714                                 iprouterclose(f);
715                         break;
716                 case Qdata:
717                 case Qctl:
718                 case Qerr:
719                 case Qlisten:
720                         if (c->flag & COPEN)
721                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
722                         break;
723                 case Qsnoop:
724                         if (c->flag & COPEN)
725                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
726                         break;
727         }
728         kfree(((struct IPaux *)c->aux)->owner);
729         kfree(c->aux);
730 }
731
732 enum {
733         Statelen = 32 * 1024,
734 };
735
736 static long ipread(struct chan *ch, void *a, long n, int64_t off)
737 {
738         struct conv *c;
739         struct Proto *x;
740         char *buf, *p;
741         long rv;
742         struct Fs *f;
743         uint32_t offset = off;
744         size_t sofar;
745
746         f = ipfs[ch->dev];
747
748         p = a;
749         switch (TYPE(ch->qid)) {
750                 default:
751                         error(EPERM, ERROR_FIXME);
752                 case Qtopdir:
753                 case Qprotodir:
754                 case Qconvdir:
755                         return devdirread(ch, a, n, 0, 0, ipgen);
756                 case Qarp:
757                         return arpread(f->arp, a, offset, n);
758                 case Qbootp:
759                         return bootpread(a, offset, n);
760                 case Qndb:
761                         return readstr(offset, a, n, f->ndb);
762                 case Qiproute:
763                         return routeread(f, a, offset, n);
764                 case Qiprouter:
765                         return iprouterread(f, a, n);
766                 case Qipselftab:
767                         return ipselftabread(f, a, offset, n);
768                 case Qlog:
769                         return netlogread(f, a, offset, n);
770                 case Qctl:
771                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
772                         return readstr(offset, p, n, get_cur_genbuf());
773                 case Qremote:
774                         buf = kzmalloc(Statelen, 0);
775                         x = f->p[PROTO(ch->qid)];
776                         c = x->conv[CONV(ch->qid)];
777                         if (x->remote == NULL) {
778                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
779                         } else {
780                                 (*x->remote) (c, buf, Statelen - 2);
781                         }
782                         rv = readstr(offset, p, n, buf);
783                         kfree(buf);
784                         return rv;
785                 case Qlocal:
786                         buf = kzmalloc(Statelen, 0);
787                         x = f->p[PROTO(ch->qid)];
788                         c = x->conv[CONV(ch->qid)];
789                         if (x->local == NULL) {
790                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
791                         } else {
792                                 (*x->local) (c, buf, Statelen - 2);
793                         }
794                         rv = readstr(offset, p, n, buf);
795                         kfree(buf);
796                         return rv;
797                 case Qstatus:
798                         /* this all is a bit screwed up since the size of some state's
799                          * buffers will change from one invocation to another.  a reader
800                          * will come in and read the entire buffer.  then it will come again
801                          * and read from the next offset, expecting EOF.  if the buffer
802                          * changed sizes, it'll reprint the end of the buffer slightly. */
803                         buf = kzmalloc(Statelen, 0);
804                         x = f->p[PROTO(ch->qid)];
805                         c = x->conv[CONV(ch->qid)];
806                         sofar = (*x->state) (c, buf, Statelen - 2);
807                         sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
808                                           c->nonblock ? "on" : "off");
809                         rv = readstr(offset, p, n, buf);
810                         kfree(buf);
811                         return rv;
812                 case Qdata:
813                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
814                         return qread(c->rq, a, n);
815                 case Qerr:
816                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
817                         return qread(c->eq, a, n);
818                 case Qsnoop:
819                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
820                         return qread(c->sq, a, n);
821                 case Qstats:
822                         x = f->p[PROTO(ch->qid)];
823                         if (x->stats == NULL)
824                                 error(EFAIL, "stats not implemented");
825                         buf = kzmalloc(Statelen, 0);
826                         (*x->stats) (x, buf, Statelen);
827                         rv = readstr(offset, p, n, buf);
828                         kfree(buf);
829                         return rv;
830         }
831 }
832
833 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
834 {
835         struct conv *c;
836         struct Proto *x;
837         struct Fs *f;
838
839         switch (TYPE(ch->qid)) {
840                 case Qdata:
841                         f = ipfs[ch->dev];
842                         x = f->p[PROTO(ch->qid)];
843                         c = x->conv[CONV(ch->qid)];
844                         return qbread(c->rq, n);
845                 default:
846                         return devbread(ch, n, offset);
847         }
848 }
849
850 /*
851  *  set local address to be that of the ifc closest to remote address
852  */
853 static void setladdr(struct conv *c)
854 {
855         findlocalip(c->p->f, c->laddr, c->raddr);
856 }
857
858 /*
859  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
860  */
861 static char *setluniqueport(struct conv *c, int lport)
862 {
863         struct Proto *p;
864         struct conv *xp;
865         int x;
866
867         p = c->p;
868
869         qlock(&p->qlock);
870         for (x = 0; x < p->nc; x++) {
871                 xp = p->conv[x];
872                 if (xp == NULL)
873                         break;
874                 if (xp == c)
875                         continue;
876                 if ((xp->state == Connected || xp->state == Announced)
877                         && xp->lport == lport
878                         && xp->rport == c->rport
879                         && ipcmp(xp->raddr, c->raddr) == 0
880                         && ipcmp(xp->laddr, c->laddr) == 0) {
881                         qunlock(&p->qlock);
882                         return "address in use";
883                 }
884         }
885         c->lport = lport;
886         qunlock(&p->qlock);
887         return NULL;
888 }
889
890 /*
891  *  pick a local port and set it
892  */
893 static void setlport(struct conv *c)
894 {
895         struct Proto *p;
896         uint16_t *pp;
897         int x, found;
898
899         p = c->p;
900         if (c->restricted)
901                 pp = &p->nextrport;
902         else
903                 pp = &p->nextport;
904         qlock(&p->qlock);
905         for (;; (*pp)++) {
906                 /*
907                  * Fsproto initialises p->nextport to 0 and the restricted
908                  * ports (p->nextrport) to 600.
909                  * Restricted ports must lie between 600 and 1024.
910                  * For the initial condition or if the unrestricted port number
911                  * has wrapped round, select a random port between 5000 and 1<<15
912                  * to start at.
913                  */
914                 if (c->restricted) {
915                         if (*pp >= 1024)
916                                 *pp = 600;
917                 } else
918                         while (*pp < 5000)
919                                 *pp = nrand(1 << 15);
920
921                 found = 0;
922                 for (x = 0; x < p->nc; x++) {
923                         if (p->conv[x] == NULL)
924                                 break;
925                         if (p->conv[x]->lport == *pp) {
926                                 found = 1;
927                                 break;
928                         }
929                 }
930                 if (!found)
931                         break;
932         }
933         c->lport = (*pp)++;
934         qunlock(&p->qlock);
935 }
936
937 /*
938  *  set a local address and port from a string of the form
939  *      [address!]port[!r]
940  */
941 static char *setladdrport(struct conv *c, char *str, int announcing)
942 {
943         char *p;
944         char *rv;
945         uint16_t lport;
946         uint8_t addr[IPaddrlen];
947
948         rv = NULL;
949
950         /*
951          *  ignore restricted part if it exists.  it's
952          *  meaningless on local ports.
953          */
954         p = strchr(str, '!');
955         if (p != NULL) {
956                 *p++ = 0;
957                 if (strcmp(p, "r") == 0)
958                         p = NULL;
959         }
960
961         c->lport = 0;
962         if (p == NULL) {
963                 if (announcing)
964                         ipmove(c->laddr, IPnoaddr);
965                 else
966                         setladdr(c);
967                 p = str;
968         } else {
969                 if (strcmp(str, "*") == 0)
970                         ipmove(c->laddr, IPnoaddr);
971                 else {
972                         parseip(addr, str);
973                         if (ipforme(c->p->f, addr))
974                                 ipmove(c->laddr, addr);
975                         else
976                                 return "not a local IP address";
977                 }
978         }
979
980         /* one process can get all connections */
981         if (announcing && strcmp(p, "*") == 0) {
982                 if (!iseve())
983                         error(EPERM, ERROR_FIXME);
984                 return setluniqueport(c, 0);
985         }
986
987         lport = atoi(p);
988         if (lport <= 0)
989                 setlport(c);
990         else
991                 rv = setluniqueport(c, lport);
992         return rv;
993 }
994
995 static char *setraddrport(struct conv *c, char *str)
996 {
997         char *p;
998
999         p = strchr(str, '!');
1000         if (p == NULL)
1001                 return "malformed address";
1002         *p++ = 0;
1003         parseip(c->raddr, str);
1004         c->rport = atoi(p);
1005         p = strchr(p, '!');
1006         if (p) {
1007                 if (strstr(p, "!r") != NULL)
1008                         c->restricted = 1;
1009         }
1010         return NULL;
1011 }
1012
1013 /*
1014  *  called by protocol connect routine to set addresses
1015  */
1016 char *Fsstdconnect(struct conv *c, char *argv[], int argc)
1017 {
1018         char *p;
1019
1020         switch (argc) {
1021                 default:
1022                         return "bad args to connect";
1023                 case 2:
1024                         p = setraddrport(c, argv[1]);
1025                         if (p != NULL)
1026                                 return p;
1027                         setladdr(c);
1028                         setlport(c);
1029                         break;
1030                 case 3:
1031                         p = setraddrport(c, argv[1]);
1032                         if (p != NULL)
1033                                 return p;
1034                         p = setladdrport(c, argv[2], 0);
1035                         if (p != NULL)
1036                                 return p;
1037         }
1038
1039         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
1040                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
1041                 || ipcmp(c->raddr, IPnoaddr) == 0)
1042                 c->ipversion = V4;
1043         else
1044                 c->ipversion = V6;
1045
1046         return NULL;
1047 }
1048
1049 /*
1050  *  initiate connection and sleep till its set up
1051  */
1052 static int connected(void *a)
1053 {
1054         return ((struct conv *)a)->state == Connected;
1055 }
1056
1057 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1058 {
1059         ERRSTACK(1);
1060         char *p;
1061
1062         if (c->state != 0)
1063                 error(EBUSY, ERROR_FIXME);
1064         c->state = Connecting;
1065         c->cerr[0] = '\0';
1066         if (x->connect == NULL)
1067                 error(EFAIL, "connect not supported");
1068         p = x->connect(c, cb->f, cb->nf);
1069         if (p != NULL)
1070                 error(EFAIL, p);
1071
1072         qunlock(&c->qlock);
1073         if (waserror()) {
1074                 qlock(&c->qlock);
1075                 nexterror();
1076         }
1077         rendez_sleep(&c->cr, connected, c);
1078         qlock(&c->qlock);
1079         poperror();
1080
1081         if (c->cerr[0] != '\0')
1082                 error(EFAIL, c->cerr);
1083 }
1084
1085 /*
1086  *  called by protocol announce routine to set addresses
1087  */
1088 char *Fsstdannounce(struct conv *c, char *argv[], int argc)
1089 {
1090         memset(c->raddr, 0, sizeof(c->raddr));
1091         c->rport = 0;
1092         switch (argc) {
1093                 default:
1094                         return "bad args to announce";
1095                 case 2:
1096                         return setladdrport(c, argv[1], 1);
1097         }
1098 }
1099
1100 /*
1101  *  initiate announcement and sleep till its set up
1102  */
1103 static int announced(void *a)
1104 {
1105         return ((struct conv *)a)->state == Announced;
1106 }
1107
1108 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1109 {
1110         ERRSTACK(1);
1111         char *p;
1112
1113         if (c->state != 0)
1114                 error(EBUSY, ERROR_FIXME);
1115         c->state = Announcing;
1116         c->cerr[0] = '\0';
1117         if (x->announce == NULL)
1118                 error(EFAIL, "announce not supported");
1119         p = x->announce(c, cb->f, cb->nf);
1120         if (p != NULL)
1121                 error(EFAIL, p);
1122
1123         qunlock(&c->qlock);
1124         if (waserror()) {
1125                 qlock(&c->qlock);
1126                 nexterror();
1127         }
1128         rendez_sleep(&c->cr, announced, c);
1129         qlock(&c->qlock);
1130         poperror();
1131
1132         if (c->cerr[0] != '\0')
1133                 error(EFAIL, c->cerr);
1134 }
1135
1136 /*
1137  *  called by protocol bind routine to set addresses
1138  */
1139 char *Fsstdbind(struct conv *c, char *argv[], int argc)
1140 {
1141         switch (argc) {
1142                 default:
1143                         return "bad args to bind";
1144                 case 2:
1145                         return setladdrport(c, argv[1], 0);
1146         }
1147 }
1148
1149 void Fsconvnonblock(struct conv *cv, bool onoff)
1150 {
1151         qnonblock(cv->wq, onoff);
1152         qnonblock(cv->rq, onoff);
1153         cv->nonblock = onoff;
1154 }
1155
1156 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1157 {
1158         char *p;
1159
1160         if (x->bind == NULL)
1161                 p = Fsstdbind(c, cb->f, cb->nf);
1162         else
1163                 p = x->bind(c, cb->f, cb->nf);
1164         if (p != NULL)
1165                 error(EFAIL, p);
1166 }
1167
1168 static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
1169 {
1170         if (cb->nf < 2)
1171                 goto err;
1172         if (!strcmp(cb->f[1], "on"))
1173                 Fsconvnonblock(c, TRUE);
1174         else if (!strcmp(cb->f[1], "off"))
1175                 Fsconvnonblock(c, FALSE);
1176         else
1177                 goto err;
1178         return;
1179 err:
1180         error(EINVAL, "nonblock [on|off]");
1181 }
1182
1183 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1184 {
1185         if (cb->nf < 2)
1186                 c->tos = 0;
1187         else
1188                 c->tos = atoi(cb->f[1]);
1189 }
1190
1191 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1192 {
1193         if (cb->nf < 2)
1194                 c->ttl = MAXTTL;
1195         else
1196                 c->ttl = atoi(cb->f[1]);
1197 }
1198
1199 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1200 {
1201         ERRSTACK(1);
1202         struct conv *c;
1203         struct Proto *x;
1204         char *p;
1205         struct cmdbuf *cb;
1206         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1207         struct Fs *f;
1208         char *a;
1209
1210         a = v;
1211         f = ipfs[ch->dev];
1212
1213         switch (TYPE(ch->qid)) {
1214                 default:
1215                         error(EPERM, ERROR_FIXME);
1216                 case Qdata:
1217                         x = f->p[PROTO(ch->qid)];
1218                         c = x->conv[CONV(ch->qid)];
1219                         qwrite(c->wq, a, n);
1220                         break;
1221                 case Qarp:
1222                         return arpwrite(f, a, n);
1223                 case Qiproute:
1224                         return routewrite(f, ch, a, n);
1225                 case Qlog:
1226                         netlogctl(f, a, n);
1227                         return n;
1228                 case Qndb:
1229                         return ndbwrite(f, a, off, n);
1230                 case Qctl:
1231                         x = f->p[PROTO(ch->qid)];
1232                         c = x->conv[CONV(ch->qid)];
1233                         cb = parsecmd(a, n);
1234
1235                         qlock(&c->qlock);
1236                         if (waserror()) {
1237                                 qunlock(&c->qlock);
1238                                 kfree(cb);
1239                                 nexterror();
1240                         }
1241                         if (cb->nf < 1)
1242                                 error(EFAIL, "short control request");
1243                         if (strcmp(cb->f[0], "connect") == 0)
1244                                 connectctlmsg(x, c, cb);
1245                         else if (strcmp(cb->f[0], "announce") == 0)
1246                                 announcectlmsg(x, c, cb);
1247                         else if (strcmp(cb->f[0], "bind") == 0)
1248                                 bindctlmsg(x, c, cb);
1249                         else if (strcmp(cb->f[0], "nonblock") == 0)
1250                                 nonblockctlmsg(c, cb);
1251                         else if (strcmp(cb->f[0], "ttl") == 0)
1252                                 ttlctlmsg(c, cb);
1253                         else if (strcmp(cb->f[0], "tos") == 0)
1254                                 tosctlmsg(c, cb);
1255                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1256                                 c->ignoreadvice = 1;
1257                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1258                                 if (cb->nf < 2)
1259                                         error(EFAIL, "addmulti needs interface address");
1260                                 if (cb->nf == 2) {
1261                                         if (!ipismulticast(c->raddr))
1262                                                 error(EFAIL, "addmulti for a non multicast address");
1263                                         parseip(ia, cb->f[1]);
1264                                         ipifcaddmulti(c, c->raddr, ia);
1265                                 } else {
1266                                         parseip(ma, cb->f[2]);
1267                                         if (!ipismulticast(ma))
1268                                                 error(EFAIL, "addmulti for a non multicast address");
1269                                         parseip(ia, cb->f[1]);
1270                                         ipifcaddmulti(c, ma, ia);
1271                                 }
1272                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1273                                 if (cb->nf < 2)
1274                                         error(EFAIL, "remmulti needs interface address");
1275                                 if (!ipismulticast(c->raddr))
1276                                         error(EFAIL, "remmulti for a non multicast address");
1277                                 parseip(ia, cb->f[1]);
1278                                 ipifcremmulti(c, c->raddr, ia);
1279                         } else if (x->ctl != NULL) {
1280                                 p = x->ctl(c, cb->f, cb->nf);
1281                                 if (p != NULL)
1282                                         error(EFAIL, p);
1283                         } else
1284                                 error(EFAIL, "unknown control request");
1285                         qunlock(&c->qlock);
1286                         kfree(cb);
1287                         poperror();
1288         }
1289         return n;
1290 }
1291
1292 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1293 {
1294         struct conv *c;
1295         struct Proto *x;
1296         struct Fs *f;
1297         int n;
1298
1299         switch (TYPE(ch->qid)) {
1300                 case Qdata:
1301                         f = ipfs[ch->dev];
1302                         x = f->p[PROTO(ch->qid)];
1303                         c = x->conv[CONV(ch->qid)];
1304                         if (bp->next)
1305                                 bp = concatblock(bp);
1306                         n = BLEN(bp);
1307                         qbwrite(c->wq, bp);
1308                         return n;
1309                 default:
1310                         return devbwrite(ch, bp, offset);
1311         }
1312 }
1313
1314 static void ip_wake_cb(struct queue *q, void *data, int filter)
1315 {
1316         struct conv *conv = (struct conv*)data;
1317         struct fd_tap *tap_i;
1318         /* For these two, we want to ignore events on the opposite end of the
1319          * queues.  For instance, we want to know when the WQ is writable.  Our
1320          * writes will actually make it readable - we don't want to trigger a tap
1321          * for that.  However, qio doesn't know how/why we are using a queue, or
1322          * even who the ends are (hence the callbacks) */
1323         if ((filter & FDTAP_FILT_READABLE) && (q == conv->wq))
1324                 return;
1325         if ((filter & FDTAP_FILT_WRITABLE) && (q == conv->rq))
1326                 return;
1327         /* At this point, we have an event we want to send to our taps (if any).
1328          * The lock protects list integrity and the existence of the tap.
1329          *
1330          * Previously, I thought of using the conv qlock.  That actually breaks, due
1331          * to weird usages of the qlock (someone holds it for a long time, blocking
1332          * the inbound wakeup from etherread4).
1333          *
1334          * I opted for a spinlock for a couple reasons:
1335          * - fire_tap should not block.  ideally it'll be fast too (it's mostly a
1336          * send_event).
1337          * - our callers might not want to block.  A lot of network wakeups will
1338          * come network processes (etherread4) or otherwise unrelated to this
1339          * particular conversation.  I'd rather do something like fire off a KMSG
1340          * than block those.
1341          * - if fire_tap takes a while, holding the lock only slows down other
1342          * events on this *same* conversation, or other tap registration.  not a
1343          * huge deal. */
1344         spin_lock(&conv->tap_lock);
1345         SLIST_FOREACH(tap_i, &conv->data_taps, link)
1346                 fire_tap(tap_i, filter);
1347         spin_unlock(&conv->tap_lock);
1348 }
1349
1350 int iptapfd(struct chan *chan, struct fd_tap *tap, int cmd)
1351 {
1352         struct conv *conv;
1353         struct Proto *x;
1354         struct Fs *f;
1355         int ret;
1356
1357         #define DEVIP_LEGAL_DATA_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_WRITABLE | \
1358                                        FDTAP_FILT_HANGUP | FDTAP_FILT_PRIORITY |   \
1359                                        FDTAP_FILT_ERROR)
1360         #define DEVIP_LEGAL_LISTEN_TAPS (FDTAP_FILT_READABLE | FDTAP_FILT_HANGUP)
1361
1362         /* That's a lot of pointers to get to the conv! */
1363         f = ipfs[chan->dev];
1364         x = f->p[PROTO(chan->qid)];
1365         conv = x->conv[CONV(chan->qid)];
1366
1367         switch (TYPE(chan->qid)) {
1368                 case Qdata:
1369                         if (tap->filter & ~DEVIP_LEGAL_DATA_TAPS) {
1370                                 set_errno(ENOSYS);
1371                                 set_errstr("Unsupported #%s data tap %p, must be %p", devname(),
1372                                            tap->filter, DEVIP_LEGAL_DATA_TAPS);
1373                                 return -1;
1374                         }
1375                         spin_lock(&conv->tap_lock);
1376                         switch (cmd) {
1377                                 case (FDTAP_CMD_ADD):
1378                                         if (SLIST_EMPTY(&conv->data_taps)) {
1379                                                 qio_set_wake_cb(conv->rq, ip_wake_cb, conv);
1380                                                 qio_set_wake_cb(conv->wq, ip_wake_cb, conv);
1381                                         }
1382                                         SLIST_INSERT_HEAD(&conv->data_taps, tap, link);
1383                                         ret = 0;
1384                                         break;
1385                                 case (FDTAP_CMD_REM):
1386                                         SLIST_REMOVE(&conv->data_taps, tap, fd_tap, link);
1387                                         if (SLIST_EMPTY(&conv->data_taps)) {
1388                                                 qio_set_wake_cb(conv->rq, 0, conv);
1389                                                 qio_set_wake_cb(conv->wq, 0, conv);
1390                                         }
1391                                         ret = 0;
1392                                         break;
1393                                 default:
1394                                         set_errno(ENOSYS);
1395                                         set_errstr("Unsupported #%s data tap command %p",
1396                                                    devname(), cmd);
1397                                         ret = -1;
1398                         }
1399                         spin_unlock(&conv->tap_lock);
1400                         return ret;
1401                 case Qlisten:
1402                         if (tap->filter & ~DEVIP_LEGAL_LISTEN_TAPS) {
1403                                 set_errno(ENOSYS);
1404                                 set_errstr("Unsupported #%s listen tap %p, must be %p",
1405                                            devname(), tap->filter, DEVIP_LEGAL_LISTEN_TAPS);
1406                                 return -1;
1407                         }
1408                         spin_lock(&conv->tap_lock);
1409                         switch (cmd) {
1410                                 case (FDTAP_CMD_ADD):
1411                                         SLIST_INSERT_HEAD(&conv->listen_taps, tap, link);
1412                                         ret = 0;
1413                                         break;
1414                                 case (FDTAP_CMD_REM):
1415                                         SLIST_REMOVE(&conv->listen_taps, tap, fd_tap, link);
1416                                         ret = 0;
1417                                         break;
1418                                 default:
1419                                         set_errno(ENOSYS);
1420                                         set_errstr("Unsupported #%s listen tap command %p",
1421                                                    devname(), cmd);
1422                                         ret = -1;
1423                         }
1424                         spin_unlock(&conv->tap_lock);
1425                         return ret;
1426                 default:
1427                         set_errno(ENOSYS);
1428                         set_errstr("Can't tap #%s file type %d", devname(),
1429                                    TYPE(chan->qid));
1430                         return -1;
1431         }
1432 }
1433
1434 struct dev ipdevtab __devtab = {
1435         .name = "ip",
1436
1437         .reset = ipreset,
1438         .init = ipinit,
1439         .shutdown = devshutdown,
1440         .attach = ipattach,
1441         .walk = ipwalk,
1442         .stat = ipstat,
1443         .open = ipopen,
1444         .create = devcreate,
1445         .close = ipclose,
1446         .read = ipread,
1447         .bread = ipbread,
1448         .write = ipwrite,
1449         .bwrite = ipbwrite,
1450         .remove = devremove,
1451         .wstat = ipwstat,
1452         .power = devpower,
1453         .chaninfo = ipchaninfo,
1454         .tapfd = iptapfd,
1455 };
1456
1457 int Fsproto(struct Fs *f, struct Proto *p)
1458 {
1459         if (f->np >= Maxproto)
1460                 return -1;
1461
1462         qlock_init(&p->qlock);
1463         p->f = f;
1464
1465         if (p->ipproto > 0) {
1466                 if (f->t2p[p->ipproto] != NULL)
1467                         return -1;
1468                 f->t2p[p->ipproto] = p;
1469         }
1470
1471         p->qid.type = QTDIR;
1472         p->qid.path = QID(f->np, 0, Qprotodir);
1473         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1474         if (p->conv == NULL)
1475                 panic("Fsproto");
1476
1477         p->x = f->np;
1478         p->nextport = 0;
1479         p->nextrport = 600;
1480         f->p[f->np++] = p;
1481
1482         return 0;
1483 }
1484
1485 /*
1486  *  return true if this protocol is
1487  *  built in
1488  */
1489 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1490 {
1491         return f->t2p[proto] != NULL;
1492 }
1493
1494 /*
1495  *  called with protocol locked
1496  */
1497 struct conv *Fsprotoclone(struct Proto *p, char *user)
1498 {
1499         struct conv *c, **pp, **ep;
1500
1501 retry:
1502         c = NULL;
1503         ep = &p->conv[p->nc];
1504         for (pp = p->conv; pp < ep; pp++) {
1505                 c = *pp;
1506                 if (c == NULL) {
1507                         c = kzmalloc(sizeof(struct conv), 0);
1508                         if (c == NULL)
1509                                 error(ENOMEM, ERROR_FIXME);
1510                         qlock_init(&c->qlock);
1511                         qlock_init(&c->listenq);
1512                         rendez_init(&c->cr);
1513                         rendez_init(&c->listenr);
1514                         SLIST_INIT(&c->data_taps);      /* already = 0; set to be futureproof */
1515                         SLIST_INIT(&c->listen_taps);
1516                         spinlock_init(&c->tap_lock);
1517                         qlock(&c->qlock);
1518                         c->p = p;
1519                         c->x = pp - p->conv;
1520                         if (p->ptclsize != 0) {
1521                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1522                                 if (c->ptcl == NULL) {
1523                                         kfree(c);
1524                                         error(ENOMEM, ERROR_FIXME);
1525                                 }
1526                         }
1527                         *pp = c;
1528                         p->ac++;
1529                         c->eq = qopen(1024, Qmsg, 0, 0);
1530                         (*p->create) (c);
1531                         assert(c->rq && c->wq);
1532                         break;
1533                 }
1534                 if (canqlock(&c->qlock)) {
1535                         /*
1536                          *  make sure both processes and protocol
1537                          *  are done with this Conv
1538                          */
1539                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1540                                 break;
1541
1542                         qunlock(&c->qlock);
1543                 }
1544         }
1545         if (pp >= ep) {
1546                 if (p->gc != NULL && (*p->gc) (p))
1547                         goto retry;
1548                 return NULL;
1549         }
1550
1551         c->inuse = 1;
1552         kstrdup(&c->owner, user);
1553         c->perm = 0660;
1554         c->state = Idle;
1555         ipmove(c->laddr, IPnoaddr);
1556         ipmove(c->raddr, IPnoaddr);
1557         c->r = NULL;
1558         c->rgen = 0;
1559         c->lport = 0;
1560         c->rport = 0;
1561         c->restricted = 0;
1562         c->ttl = MAXTTL;
1563         c->tos = DFLTTOS;
1564         c->nonblock = FALSE;
1565         qreopen(c->rq);
1566         qreopen(c->wq);
1567         qreopen(c->eq);
1568
1569         qunlock(&c->qlock);
1570         return c;
1571 }
1572
1573 int Fsconnected(struct conv *c, char *msg)
1574 {
1575         if (msg != NULL && *msg != '\0')
1576                 strlcpy(c->cerr, msg, sizeof(c->cerr));
1577
1578         switch (c->state) {
1579                 case Announcing:
1580                         c->state = Announced;
1581                         break;
1582
1583                 case Connecting:
1584                         c->state = Connected;
1585                         break;
1586         }
1587
1588         rendez_wakeup(&c->cr);
1589         return 0;
1590 }
1591
1592 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1593 {
1594         if (f->ipmux)
1595                 return f->ipmux;
1596         else
1597                 return f->t2p[proto];
1598 }
1599
1600 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1601 {
1602         return f->t2p[proto];
1603 }
1604
1605 static void fire_listener_taps(struct conv *conv)
1606 {
1607         struct fd_tap *tap_i;
1608         if (SLIST_EMPTY(&conv->listen_taps))
1609                 return;
1610         spin_lock(&conv->tap_lock);
1611         SLIST_FOREACH(tap_i, &conv->listen_taps, link)
1612                 fire_tap(tap_i, FDTAP_FILT_READABLE);
1613         spin_unlock(&conv->tap_lock);
1614 }
1615
1616 /*
1617  *  called with protocol locked
1618  */
1619 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1620                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1621 {
1622         struct conv *nc;
1623         struct conv **l;
1624         int i;
1625
1626         qlock(&c->qlock);
1627         i = 0;
1628         for (l = &c->incall; *l; l = &(*l)->next)
1629                 i++;
1630         if (i >= Maxincall) {
1631                 qunlock(&c->qlock);
1632                 return NULL;
1633         }
1634
1635         /* find a free conversation */
1636         nc = Fsprotoclone(c->p, network);
1637         if (nc == NULL) {
1638                 qunlock(&c->qlock);
1639                 return NULL;
1640         }
1641         ipmove(nc->raddr, raddr);
1642         nc->rport = rport;
1643         ipmove(nc->laddr, laddr);
1644         nc->lport = lport;
1645         nc->next = NULL;
1646         *l = nc;
1647         nc->state = Connected;
1648         nc->ipversion = version;
1649
1650         qunlock(&c->qlock);
1651
1652         rendez_wakeup(&c->listenr);
1653         fire_listener_taps(c);
1654
1655         return nc;
1656 }
1657
1658 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1659 {
1660         if (off > strlen(f->ndb))
1661                 error(EIO, ERROR_FIXME);
1662         if (off + n >= sizeof(f->ndb) - 1)
1663                 error(EIO, ERROR_FIXME);
1664         memmove(f->ndb + off, a, n);
1665         f->ndb[off + n] = 0;
1666         f->ndbvers++;
1667         f->ndbmtime = seconds();
1668         return n;
1669 }
1670
1671 uint32_t scalednconv(void)
1672 {
1673         //if(conf.npage*BY2PG >= 128*MB)
1674         return Nchans * 4;
1675         //  return Nchans;
1676 }