4ac2c6da1c736efd692ea231d8f54bf51fca3c4a
[akaros.git] / kern / src / net / devip.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 enum {
17         Qtopdir = 1,                            /* top level directory */
18         Qtopbase,
19         Qarp = Qtopbase,
20         Qbootp,
21         Qndb,
22         Qiproute,
23         Qiprouter,
24         Qipselftab,
25         Qlog,
26
27         Qprotodir,      /* directory for a protocol */
28         Qprotobase,
29         Qclone = Qprotobase,
30         Qstats,
31
32         Qconvdir,       /* directory for a conversation */
33         Qconvbase,
34         Qctl = Qconvbase,
35         Qdata,
36         Qerr,
37         Qlisten,
38         Qlocal,
39         Qremote,
40         Qstatus,
41         Qsnoop,
42
43         Logtype = 5,
44         Masktype = (1 << Logtype) - 1,
45         Logconv = 12,
46         Maskconv = (1 << Logconv) - 1,
47         Shiftconv = Logtype,
48         Logproto = 8,
49         Maskproto = (1 << Logproto) - 1,
50         Shiftproto = Logtype + Logconv,
51
52         Nfs = 32,
53 };
54 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
55 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
56 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
57 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
58 static char network[] = "network";
59
60 qlock_t fslock;
61 struct Fs *ipfs[Nfs];                   /* attached fs's */
62 struct queue *qlog;
63
64 extern void nullmediumlink(void);
65 extern void pktmediumlink(void);
66 extern char *eve;
67 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
68 static void closeconv(struct conv *);
69
70 static inline int founddevdir(struct chan *c, struct qid q, char *n,
71                                                           int64_t length, char *user, long perm,
72                                                           struct dir *db)
73 {
74         devdir(c, q, n, length, user, perm, db);
75         return 1;
76 }
77
78 static int topdirgen(struct chan *c, struct dir *dp)
79 {
80         struct qid q;
81         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
82         snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lu", c->dev);
83         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
84 }
85
86
87 static int ip3gen(struct chan *c, int i, struct dir *dp)
88 {
89         struct qid q;
90         struct conv *cv;
91         char *p;
92
93         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
94         if (cv->owner == NULL)
95                 kstrdup(&cv->owner, eve);
96         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
97
98         switch (i) {
99                 default:
100                         return -1;
101                 case Qctl:
102                         return founddevdir(c, q, "ctl", 0,
103                                                    cv->owner, cv->perm, dp);
104                 case Qdata:
105                         return founddevdir(c, q, "data", qlen(cv->rq),
106                                                            cv->owner, cv->perm, dp);
107                 case Qerr:
108                         return founddevdir(c, q, "err", qlen(cv->eq),
109                                                            cv->owner, cv->perm, dp);
110                 case Qlisten:
111                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
112                 case Qlocal:
113                         p = "local";
114                         break;
115                 case Qremote:
116                         p = "remote";
117                         break;
118                 case Qsnoop:
119                         if (strcmp(cv->p->name, "ipifc") != 0)
120                                 return -1;
121                         return founddevdir(c, q, "snoop", qlen(cv->sq),
122                                                            cv->owner, 0400, dp);
123                 case Qstatus:
124                         p = "status";
125                         break;
126         }
127         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
128 }
129
130 static int ip2gen(struct chan *c, int i, struct dir *dp)
131 {
132         struct qid q;
133         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
134         switch (i) {
135                 case Qclone:
136                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
137                 case Qstats:
138                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
139         }
140         return -1;
141 }
142
143 static int ip1gen(struct chan *c, int i, struct dir *dp)
144 {
145         struct qid q;
146         char *p;
147         int prot;
148         int len = 0;
149         struct Fs *f;
150         extern uint32_t kerndate;
151
152         f = ipfs[c->dev];
153
154         prot = 0666;
155         mkqid(&q, QID(0, 0, i), 0, QTFILE);
156         switch (i) {
157                 default:
158                         return -1;
159                 case Qarp:
160                         p = "arp";
161                         break;
162                 case Qbootp:
163                         if (bootp == NULL)
164                                 return 0;
165                         p = "bootp";
166                         break;
167                 case Qndb:
168                         p = "ndb";
169                         len = strlen(f->ndb);
170                         q.vers = f->ndbvers;
171                         break;
172                 case Qiproute:
173                         p = "iproute";
174                         break;
175                 case Qipselftab:
176                         p = "ipselftab";
177                         prot = 0444;
178                         break;
179                 case Qiprouter:
180                         p = "iprouter";
181                         break;
182                 case Qlog:
183                         p = "log";
184                         break;
185         }
186         devdir(c, q, p, len, network, prot, dp);
187         if (i == Qndb && f->ndbmtime > kerndate)
188                 dp->mtime = f->ndbmtime;
189         return 1;
190 }
191
192 static int
193 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
194           int s, struct dir *dp)
195 {
196         struct qid q;
197         struct conv *cv;
198         struct Fs *f;
199
200         f = ipfs[c->dev];
201
202         switch (TYPE(c->qid)) {
203                 case Qtopdir:
204                         if (s == DEVDOTDOT)
205                                 return topdirgen(c, dp);
206                         if (s < f->np) {
207                                 if (f->p[s]->connect == NULL)
208                                         return 0;       /* protocol with no user interface */
209                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
210                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
211                         }
212                         s -= f->np;
213                         return ip1gen(c, s + Qtopbase, dp);
214                 case Qarp:
215                 case Qbootp:
216                 case Qndb:
217                 case Qlog:
218                 case Qiproute:
219                 case Qiprouter:
220                 case Qipselftab:
221                         return ip1gen(c, TYPE(c->qid), dp);
222                 case Qprotodir:
223                         if (s == DEVDOTDOT)
224                                 return topdirgen(c, dp);
225                         else if (s < f->p[PROTO(c->qid)]->ac) {
226                                 cv = f->p[PROTO(c->qid)]->conv[s];
227                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
228                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
229                                 return
230                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
231                         }
232                         s -= f->p[PROTO(c->qid)]->ac;
233                         return ip2gen(c, s + Qprotobase, dp);
234                 case Qclone:
235                 case Qstats:
236                         return ip2gen(c, TYPE(c->qid), dp);
237                 case Qconvdir:
238                         if (s == DEVDOTDOT) {
239                                 s = PROTO(c->qid);
240                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
241                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
242                                 return 1;
243                         }
244                         return ip3gen(c, s + Qconvbase, dp);
245                 case Qctl:
246                 case Qdata:
247                 case Qerr:
248                 case Qlisten:
249                 case Qlocal:
250                 case Qremote:
251                 case Qstatus:
252                 case Qsnoop:
253                         return ip3gen(c, TYPE(c->qid), dp);
254         }
255         return -1;
256 }
257
258 static void ipinit(void)
259 {
260         qlock_init(&fslock);
261         nullmediumlink();
262         pktmediumlink();
263 /* if only
264         fmtinstall('i', eipfmt);
265         fmtinstall('I', eipfmt);
266         fmtinstall('E', eipfmt);
267         fmtinstall('V', eipfmt);
268         fmtinstall('M', eipfmt);
269 */
270 }
271
272 static void ipreset(void)
273 {
274 }
275
276 static struct Fs *ipgetfs(int dev)
277 {
278         extern void (*ipprotoinit[]) (struct Fs *);
279         struct Fs *f;
280         int i;
281
282         if (dev >= Nfs)
283                 return NULL;
284
285         qlock(&fslock);
286         if (ipfs[dev] == NULL) {
287                 f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
288                 rwinit(&f->rwlock);
289                 qlock_init(&f->iprouter.qlock);
290                 ip_init(f);
291                 arpinit(f);
292                 netloginit(f);
293                 for (i = 0; ipprotoinit[i]; i++)
294                         ipprotoinit[i] (f);
295                 f->dev = dev;
296                 ipfs[dev] = f;
297         }
298         qunlock(&fslock);
299
300         return ipfs[dev];
301 }
302
303 struct IPaux *newipaux(char *owner, char *tag)
304 {
305         struct IPaux *a;
306         int n;
307
308         a = kzmalloc(sizeof(*a), 0);
309         kstrdup(&a->owner, owner);
310         memset(a->tag, ' ', sizeof(a->tag));
311         n = strlen(tag);
312         if (n > sizeof(a->tag))
313                 n = sizeof(a->tag);
314         memmove(a->tag, tag, n);
315         return a;
316 }
317
318 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
319
320 static struct chan *ipattach(char *spec)
321 {
322         struct chan *c;
323         int dev;
324
325         dev = atoi(spec);
326         if (dev >= Nfs)
327                 error("bad specification");
328
329         ipgetfs(dev);
330         c = devattach('I', spec);
331         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
332         c->dev = dev;
333
334         c->aux = newipaux(commonuser(), "none");
335
336         return c;
337 }
338
339 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
340                                                           int nname)
341 {
342         struct IPaux *a = c->aux;
343         struct walkqid *w;
344
345         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
346         if (w != NULL && w->clone != NULL)
347                 w->clone->aux = newipaux(a->owner, a->tag);
348         return w;
349 }
350
351 static int ipstat(struct chan *c, uint8_t * db, int n)
352 {
353         return devstat(c, db, n, NULL, 0, ipgen);
354 }
355
356 static int should_wake(void *arg)
357 {
358         struct conv *cv = arg;
359         /* signal that the conv is closed */
360         if (qisclosed(cv->rq))
361                 return TRUE;
362         return cv->incall != NULL;
363 }
364
365 static int m2p[] = {
366         [OREAD] 4,
367         [OWRITE] 2,
368         [ORDWR] 6
369 };
370
371 static struct chan *ipopen(struct chan *c, int omode)
372 {
373         ERRSTACK(2);
374         struct conv *cv, *nc;
375         struct Proto *p;
376         int perm;
377         struct Fs *f;
378
379         perm = m2p[omode & 3];
380
381         f = ipfs[c->dev];
382
383         switch (TYPE(c->qid)) {
384                 default:
385                         break;
386                 case Qndb:
387                         if (omode & (OWRITE | OTRUNC) && !iseve())
388                                 error(Eperm);
389                         if ((omode & (OWRITE | OTRUNC)) == (OWRITE | OTRUNC))
390                                 f->ndb[0] = 0;
391                         break;
392                 case Qlog:
393                         netlogopen(f);
394                         break;
395                 case Qiprouter:
396                         iprouteropen(f);
397                         break;
398                 case Qiproute:
399                         break;
400                 case Qtopdir:
401                 case Qprotodir:
402                 case Qconvdir:
403                 case Qstatus:
404                 case Qremote:
405                 case Qlocal:
406                 case Qstats:
407                 case Qbootp:
408                 case Qipselftab:
409                         if (!IS_RDONLY(omode))
410                                 error(Eperm);
411                         break;
412                 case Qsnoop:
413                         if (!IS_RDONLY(omode))
414                                 error(Eperm);
415                         p = f->p[PROTO(c->qid)];
416                         cv = p->conv[CONV(c->qid)];
417                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
418                                 error(Eperm);
419                         atomic_inc(&cv->snoopers);
420                         break;
421                 case Qclone:
422                         p = f->p[PROTO(c->qid)];
423                         qlock(&p->qlock);
424                         if (waserror()) {
425                                 qunlock(&p->qlock);
426                                 nexterror();
427                         }
428                         cv = Fsprotoclone(p, ATTACHER(c));
429                         qunlock(&p->qlock);
430                         poperror();
431                         if (cv == NULL) {
432                                 error(Enodev);
433                                 break;
434                         }
435                         /* we only honor nonblock on a clone */
436                         if (c->flag & CNONBLOCK)
437                                 Fsconvnonblock(cv, TRUE);
438                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
439                         break;
440                 case Qdata:
441                 case Qctl:
442                 case Qerr:
443                         p = f->p[PROTO(c->qid)];
444                         qlock(&p->qlock);
445                         cv = p->conv[CONV(c->qid)];
446                         qlock(&cv->qlock);
447                         if (waserror()) {
448                                 qunlock(&cv->qlock);
449                                 qunlock(&p->qlock);
450                                 nexterror();
451                         }
452                         if ((perm & (cv->perm >> 6)) != perm) {
453                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
454                                         error(Eperm);
455                                 if ((perm & cv->perm) != perm)
456                                         error(Eperm);
457
458                         }
459                         cv->inuse++;
460                         if (cv->inuse == 1) {
461                                 kstrdup(&cv->owner, ATTACHER(c));
462                                 cv->perm = 0660;
463                         }
464                         qunlock(&cv->qlock);
465                         qunlock(&p->qlock);
466                         poperror();
467                         break;
468                 case Qlisten:
469                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
470                         if ((perm & (cv->perm >> 6)) != perm) {
471                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
472                                         error(Eperm);
473                                 if ((perm & cv->perm) != perm)
474                                         error(Eperm);
475
476                         }
477
478                         if (cv->state != Announced)
479                                 error("not announced");
480
481                         if (waserror()) {
482                                 closeconv(cv);
483                                 nexterror();
484                         }
485                         qlock(&cv->qlock);
486                         cv->inuse++;
487                         qunlock(&cv->qlock);
488
489                         nc = NULL;
490                         while (nc == NULL) {
491                                 /* give up if we got a hangup */
492                                 if (qisclosed(cv->rq))
493                                         error("listen hungup");
494
495                                 qlock(&cv->listenq);
496                                 if (waserror()) {
497                                         qunlock(&cv->listenq);
498                                         nexterror();
499                                 }
500
501                                 /* wait for a connect */
502                                 rendez_sleep(&cv->listenr, should_wake, cv);
503
504                                 /* if there is a concurrent hangup, they will hold the qlock
505                                  * until the hangup is complete, including closing the cv->rq */
506                                 qlock(&cv->qlock);
507                                 nc = cv->incall;
508                                 if (nc != NULL) {
509                                         cv->incall = nc->next;
510                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
511                                         kstrdup(&cv->owner, ATTACHER(c));
512                                 }
513                                 qunlock(&cv->qlock);
514
515                                 qunlock(&cv->listenq);
516                                 poperror();
517                         }
518                         closeconv(cv);
519                         poperror();
520                         break;
521         }
522         c->mode = openmode(omode);
523         c->flag |= COPEN;
524         c->offset = 0;
525         return c;
526 }
527
528 static int ipwstat(struct chan *c, uint8_t * dp, int n)
529 {
530         ERRSTACK(2);
531         struct dir *d;
532         struct conv *cv;
533         struct Fs *f;
534         struct Proto *p;
535
536         f = ipfs[c->dev];
537         switch (TYPE(c->qid)) {
538                 default:
539                         error(Eperm);
540                         break;
541                 case Qctl:
542                 case Qdata:
543                         break;
544         }
545
546         d = kzmalloc(sizeof(*d) + n, 0);
547         if (waserror()) {
548                 kfree(d);
549                 nexterror();
550         }
551         n = convM2D(dp, n, d, (char *)&d[1]);
552         if (n == 0)
553                 error(Eshortstat);
554         p = f->p[PROTO(c->qid)];
555         cv = p->conv[CONV(c->qid)];
556         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
557                 error(Eperm);
558         if (!emptystr(d->uid))
559                 kstrdup(&cv->owner, d->uid);
560         if (d->mode != ~0UL)
561                 cv->perm = d->mode & 0777;
562         poperror();
563         kfree(d);
564         return n;
565 }
566
567 /* Should be able to handle any file type chan. Feel free to extend it. */
568 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
569 {
570         struct conv *conv;
571         struct Proto *proto;
572         char *p;
573         struct Fs *f;
574
575         f = ipfs[ch->dev];
576
577         switch (TYPE(ch->qid)) {
578                 default:
579                         ret = "Unknown type";
580                         break;
581                 case Qdata:
582                         proto = f->p[PROTO(ch->qid)];
583                         conv = proto->conv[CONV(ch->qid)];
584                         snprintf(ret, ret_l, "Qdata, proto %s, conv idx %d", proto->name,
585                                          conv->x);
586                         break;
587                 case Qarp:
588                         ret = "Qarp";
589                         break;
590                 case Qiproute:
591                         ret = "Qiproute";
592                         break;
593                 case Qlog:
594                         ret = "Qlog";
595                         break;
596                 case Qndb:
597                         ret = "Qndb";
598                         break;
599                 case Qctl:
600                         proto = f->p[PROTO(ch->qid)];
601                         conv = proto->conv[CONV(ch->qid)];
602                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
603                                          conv->x);
604                         break;
605         }
606         return ret;
607 }
608
609 static void closeconv(struct conv *cv)
610 {
611         struct conv *nc;
612         struct Ipmulti *mp;
613
614         qlock(&cv->qlock);
615
616         if (--cv->inuse > 0) {
617                 qunlock(&cv->qlock);
618                 return;
619         }
620
621         /* close all incoming calls since no listen will ever happen */
622         for (nc = cv->incall; nc; nc = cv->incall) {
623                 cv->incall = nc->next;
624                 closeconv(nc);
625         }
626         cv->incall = NULL;
627
628         kstrdup(&cv->owner, network);
629         cv->perm = 0660;
630
631         while ((mp = cv->multi) != NULL)
632                 ipifcremmulti(cv, mp->ma, mp->ia);
633
634         cv->r = NULL;
635         cv->rgen = 0;
636         cv->p->close(cv);
637         cv->state = Idle;
638         qunlock(&cv->qlock);
639 }
640
641 static void ipclose(struct chan *c)
642 {
643         struct Fs *f;
644
645         f = ipfs[c->dev];
646         switch (TYPE(c->qid)) {
647                 default:
648                         break;
649                 case Qlog:
650                         if (c->flag & COPEN)
651                                 netlogclose(f);
652                         break;
653                 case Qiprouter:
654                         if (c->flag & COPEN)
655                                 iprouterclose(f);
656                         break;
657                 case Qdata:
658                 case Qctl:
659                 case Qerr:
660                         if (c->flag & COPEN)
661                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
662                         break;
663                 case Qsnoop:
664                         if (c->flag & COPEN)
665                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
666                         break;
667         }
668         kfree(((struct IPaux *)c->aux)->owner);
669         kfree(c->aux);
670 }
671
672 enum {
673         Statelen = 32 * 1024,
674 };
675
676 static long ipread(struct chan *ch, void *a, long n, int64_t off)
677 {
678         struct conv *c;
679         struct Proto *x;
680         char *buf, *p;
681         long rv;
682         struct Fs *f;
683         uint32_t offset = off;
684         size_t sofar;
685
686         f = ipfs[ch->dev];
687
688         p = a;
689         switch (TYPE(ch->qid)) {
690                 default:
691                         error(Eperm);
692                 case Qtopdir:
693                 case Qprotodir:
694                 case Qconvdir:
695                         return devdirread(ch, a, n, 0, 0, ipgen);
696                 case Qarp:
697                         return arpread(f->arp, a, offset, n);
698                 case Qbootp:
699                         return bootpread(a, offset, n);
700                 case Qndb:
701                         return readstr(offset, a, n, f->ndb);
702                 case Qiproute:
703                         return routeread(f, a, offset, n);
704                 case Qiprouter:
705                         return iprouterread(f, a, n);
706                 case Qipselftab:
707                         return ipselftabread(f, a, offset, n);
708                 case Qlog:
709                         return netlogread(f, a, offset, n);
710                 case Qctl:
711                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
712                         return readstr(offset, p, n, get_cur_genbuf());
713                 case Qremote:
714                         buf = kzmalloc(Statelen, 0);
715                         x = f->p[PROTO(ch->qid)];
716                         c = x->conv[CONV(ch->qid)];
717                         if (x->remote == NULL) {
718                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
719                         } else {
720                                 (*x->remote) (c, buf, Statelen - 2);
721                         }
722                         rv = readstr(offset, p, n, buf);
723                         kfree(buf);
724                         return rv;
725                 case Qlocal:
726                         buf = kzmalloc(Statelen, 0);
727                         x = f->p[PROTO(ch->qid)];
728                         c = x->conv[CONV(ch->qid)];
729                         if (x->local == NULL) {
730                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
731                         } else {
732                                 (*x->local) (c, buf, Statelen - 2);
733                         }
734                         rv = readstr(offset, p, n, buf);
735                         kfree(buf);
736                         return rv;
737                 case Qstatus:
738                         /* this all is a bit screwed up since the size of some state's
739                          * buffers will change from one invocation to another.  a reader
740                          * will come in and read the entire buffer.  then it will come again
741                          * and read from the next offset, expecting EOF.  if the buffer
742                          * changed sizes, it'll reprint the end of the buffer slightly. */
743                         buf = kzmalloc(Statelen, 0);
744                         x = f->p[PROTO(ch->qid)];
745                         c = x->conv[CONV(ch->qid)];
746                         sofar = (*x->state) (c, buf, Statelen - 2);
747                         sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
748                                           c->nonblock ? "on" : "off");
749                         rv = readstr(offset, p, n, buf);
750                         kfree(buf);
751                         return rv;
752                 case Qdata:
753                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
754                         return qread(c->rq, a, n);
755                 case Qerr:
756                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
757                         return qread(c->eq, a, n);
758                 case Qsnoop:
759                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
760                         return qread(c->sq, a, n);
761                 case Qstats:
762                         x = f->p[PROTO(ch->qid)];
763                         if (x->stats == NULL)
764                                 error("stats not implemented");
765                         buf = kzmalloc(Statelen, 0);
766                         (*x->stats) (x, buf, Statelen);
767                         rv = readstr(offset, p, n, buf);
768                         kfree(buf);
769                         return rv;
770         }
771 }
772
773 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
774 {
775         struct conv *c;
776         struct Proto *x;
777         struct Fs *f;
778
779         switch (TYPE(ch->qid)) {
780                 case Qdata:
781                         f = ipfs[ch->dev];
782                         x = f->p[PROTO(ch->qid)];
783                         c = x->conv[CONV(ch->qid)];
784                         return qbread(c->rq, n);
785                 default:
786                         return devbread(ch, n, offset);
787         }
788 }
789
790 /*
791  *  set local address to be that of the ifc closest to remote address
792  */
793 static void setladdr(struct conv *c)
794 {
795         findlocalip(c->p->f, c->laddr, c->raddr);
796 }
797
798 /*
799  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
800  */
801 static char *setluniqueport(struct conv *c, int lport)
802 {
803         struct Proto *p;
804         struct conv *xp;
805         int x;
806
807         p = c->p;
808
809         qlock(&p->qlock);
810         for (x = 0; x < p->nc; x++) {
811                 xp = p->conv[x];
812                 if (xp == NULL)
813                         break;
814                 if (xp == c)
815                         continue;
816                 if ((xp->state == Connected || xp->state == Announced)
817                         && xp->lport == lport
818                         && xp->rport == c->rport
819                         && ipcmp(xp->raddr, c->raddr) == 0
820                         && ipcmp(xp->laddr, c->laddr) == 0) {
821                         qunlock(&p->qlock);
822                         return "address in use";
823                 }
824         }
825         c->lport = lport;
826         qunlock(&p->qlock);
827         return NULL;
828 }
829
830 /*
831  *  pick a local port and set it
832  */
833 static void setlport(struct conv *c)
834 {
835         struct Proto *p;
836         uint16_t *pp;
837         int x, found;
838
839         p = c->p;
840         if (c->restricted)
841                 pp = &p->nextrport;
842         else
843                 pp = &p->nextport;
844         qlock(&p->qlock);
845         for (;; (*pp)++) {
846                 /*
847                  * Fsproto initialises p->nextport to 0 and the restricted
848                  * ports (p->nextrport) to 600.
849                  * Restricted ports must lie between 600 and 1024.
850                  * For the initial condition or if the unrestricted port number
851                  * has wrapped round, select a random port between 5000 and 1<<15
852                  * to start at.
853                  */
854                 if (c->restricted) {
855                         if (*pp >= 1024)
856                                 *pp = 600;
857                 } else
858                         while (*pp < 5000)
859                                 *pp = nrand(1 << 15);
860
861                 found = 0;
862                 for (x = 0; x < p->nc; x++) {
863                         if (p->conv[x] == NULL)
864                                 break;
865                         if (p->conv[x]->lport == *pp) {
866                                 found = 1;
867                                 break;
868                         }
869                 }
870                 if (!found)
871                         break;
872         }
873         c->lport = (*pp)++;
874         qunlock(&p->qlock);
875 }
876
877 /*
878  *  set a local address and port from a string of the form
879  *      [address!]port[!r]
880  */
881 static char *setladdrport(struct conv *c, char *str, int announcing)
882 {
883         char *p;
884         char *rv;
885         uint16_t lport;
886         uint8_t addr[IPaddrlen];
887
888         rv = NULL;
889
890         /*
891          *  ignore restricted part if it exists.  it's
892          *  meaningless on local ports.
893          */
894         p = strchr(str, '!');
895         if (p != NULL) {
896                 *p++ = 0;
897                 if (strcmp(p, "r") == 0)
898                         p = NULL;
899         }
900
901         c->lport = 0;
902         if (p == NULL) {
903                 if (announcing)
904                         ipmove(c->laddr, IPnoaddr);
905                 else
906                         setladdr(c);
907                 p = str;
908         } else {
909                 if (strcmp(str, "*") == 0)
910                         ipmove(c->laddr, IPnoaddr);
911                 else {
912                         parseip(addr, str);
913                         if (ipforme(c->p->f, addr))
914                                 ipmove(c->laddr, addr);
915                         else
916                                 return "not a local IP address";
917                 }
918         }
919
920         /* one process can get all connections */
921         if (announcing && strcmp(p, "*") == 0) {
922                 if (!iseve())
923                         error(Eperm);
924                 return setluniqueport(c, 0);
925         }
926
927         lport = atoi(p);
928         if (lport <= 0)
929                 setlport(c);
930         else
931                 rv = setluniqueport(c, lport);
932         return rv;
933 }
934
935 static char *setraddrport(struct conv *c, char *str)
936 {
937         char *p;
938
939         p = strchr(str, '!');
940         if (p == NULL)
941                 return "malformed address";
942         *p++ = 0;
943         parseip(c->raddr, str);
944         c->rport = atoi(p);
945         p = strchr(p, '!');
946         if (p) {
947                 if (strstr(p, "!r") != NULL)
948                         c->restricted = 1;
949         }
950         return NULL;
951 }
952
953 /*
954  *  called by protocol connect routine to set addresses
955  */
956 char *Fsstdconnect(struct conv *c, char *argv[], int argc)
957 {
958         char *p;
959
960         switch (argc) {
961                 default:
962                         return "bad args to connect";
963                 case 2:
964                         p = setraddrport(c, argv[1]);
965                         if (p != NULL)
966                                 return p;
967                         setladdr(c);
968                         setlport(c);
969                         break;
970                 case 3:
971                         p = setraddrport(c, argv[1]);
972                         if (p != NULL)
973                                 return p;
974                         p = setladdrport(c, argv[2], 0);
975                         if (p != NULL)
976                                 return p;
977         }
978
979         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
980                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
981                 || ipcmp(c->raddr, IPnoaddr) == 0)
982                 c->ipversion = V4;
983         else
984                 c->ipversion = V6;
985
986         return NULL;
987 }
988
989 /*
990  *  initiate connection and sleep till its set up
991  */
992 static int connected(void *a)
993 {
994         return ((struct conv *)a)->state == Connected;
995 }
996
997 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
998 {
999         ERRSTACK(1);
1000         char *p;
1001
1002         if (c->state != 0)
1003                 error(Econinuse);
1004         c->state = Connecting;
1005         c->cerr[0] = '\0';
1006         if (x->connect == NULL)
1007                 error("connect not supported");
1008         p = x->connect(c, cb->f, cb->nf);
1009         if (p != NULL)
1010                 error(p);
1011
1012         qunlock(&c->qlock);
1013         if (waserror()) {
1014                 qlock(&c->qlock);
1015                 nexterror();
1016         }
1017         rendez_sleep(&c->cr, connected, c);
1018         qlock(&c->qlock);
1019         poperror();
1020
1021         if (c->cerr[0] != '\0')
1022                 error(c->cerr);
1023 }
1024
1025 /*
1026  *  called by protocol announce routine to set addresses
1027  */
1028 char *Fsstdannounce(struct conv *c, char *argv[], int argc)
1029 {
1030         memset(c->raddr, 0, sizeof(c->raddr));
1031         c->rport = 0;
1032         switch (argc) {
1033                 default:
1034                         return "bad args to announce";
1035                 case 2:
1036                         return setladdrport(c, argv[1], 1);
1037         }
1038 }
1039
1040 /*
1041  *  initiate announcement and sleep till its set up
1042  */
1043 static int announced(void *a)
1044 {
1045         return ((struct conv *)a)->state == Announced;
1046 }
1047
1048 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1049 {
1050         ERRSTACK(1);
1051         char *p;
1052
1053         if (c->state != 0)
1054                 error(Econinuse);
1055         c->state = Announcing;
1056         c->cerr[0] = '\0';
1057         if (x->announce == NULL)
1058                 error("announce not supported");
1059         p = x->announce(c, cb->f, cb->nf);
1060         if (p != NULL)
1061                 error(p);
1062
1063         qunlock(&c->qlock);
1064         if (waserror()) {
1065                 qlock(&c->qlock);
1066                 nexterror();
1067         }
1068         rendez_sleep(&c->cr, announced, c);
1069         qlock(&c->qlock);
1070         poperror();
1071
1072         if (c->cerr[0] != '\0')
1073                 error(c->cerr);
1074 }
1075
1076 /*
1077  *  called by protocol bind routine to set addresses
1078  */
1079 char *Fsstdbind(struct conv *c, char *argv[], int argc)
1080 {
1081         switch (argc) {
1082                 default:
1083                         return "bad args to bind";
1084                 case 2:
1085                         return setladdrport(c, argv[1], 0);
1086         }
1087 }
1088
1089 void Fsconvnonblock(struct conv *cv, bool onoff)
1090 {
1091         qnonblock(cv->wq, onoff);
1092         qnonblock(cv->rq, onoff);
1093         cv->nonblock = onoff;
1094 }
1095
1096 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1097 {
1098         char *p;
1099
1100         if (x->bind == NULL)
1101                 p = Fsstdbind(c, cb->f, cb->nf);
1102         else
1103                 p = x->bind(c, cb->f, cb->nf);
1104         if (p != NULL)
1105                 error(p);
1106 }
1107
1108 static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
1109 {
1110         if (cb->nf < 2)
1111                 goto err;
1112         if (!strcmp(cb->f[1], "on"))
1113                 Fsconvnonblock(c, TRUE);
1114         else if (!strcmp(cb->f[1], "off"))
1115                 Fsconvnonblock(c, FALSE);
1116         else
1117                 goto err;
1118         return;
1119 err:
1120         set_errno(EINVAL);
1121         error("nonblock [on|off]");
1122 }
1123
1124 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1125 {
1126         if (cb->nf < 2)
1127                 c->tos = 0;
1128         else
1129                 c->tos = atoi(cb->f[1]);
1130 }
1131
1132 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1133 {
1134         if (cb->nf < 2)
1135                 c->ttl = MAXTTL;
1136         else
1137                 c->ttl = atoi(cb->f[1]);
1138 }
1139
1140 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1141 {
1142         ERRSTACK(1);
1143         struct conv *c;
1144         struct Proto *x;
1145         char *p;
1146         struct cmdbuf *cb;
1147         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1148         struct Fs *f;
1149         char *a;
1150
1151         a = v;
1152         f = ipfs[ch->dev];
1153
1154         switch (TYPE(ch->qid)) {
1155                 default:
1156                         error(Eperm);
1157                 case Qdata:
1158                         x = f->p[PROTO(ch->qid)];
1159                         c = x->conv[CONV(ch->qid)];
1160
1161                         if (c->wq == NULL)
1162                                 error(Eperm);
1163
1164                         qwrite(c->wq, a, n);
1165                         break;
1166                 case Qarp:
1167                         return arpwrite(f, a, n);
1168                 case Qiproute:
1169                         return routewrite(f, ch, a, n);
1170                 case Qlog:
1171                         netlogctl(f, a, n);
1172                         return n;
1173                 case Qndb:
1174                         return ndbwrite(f, a, off, n);
1175                 case Qctl:
1176                         x = f->p[PROTO(ch->qid)];
1177                         c = x->conv[CONV(ch->qid)];
1178                         cb = parsecmd(a, n);
1179
1180                         qlock(&c->qlock);
1181                         if (waserror()) {
1182                                 qunlock(&c->qlock);
1183                                 kfree(cb);
1184                                 nexterror();
1185                         }
1186                         if (cb->nf < 1)
1187                                 error("short control request");
1188                         if (strcmp(cb->f[0], "connect") == 0)
1189                                 connectctlmsg(x, c, cb);
1190                         else if (strcmp(cb->f[0], "announce") == 0)
1191                                 announcectlmsg(x, c, cb);
1192                         else if (strcmp(cb->f[0], "bind") == 0)
1193                                 bindctlmsg(x, c, cb);
1194                         else if (strcmp(cb->f[0], "nonblock") == 0)
1195                                 nonblockctlmsg(c, cb);
1196                         else if (strcmp(cb->f[0], "ttl") == 0)
1197                                 ttlctlmsg(c, cb);
1198                         else if (strcmp(cb->f[0], "tos") == 0)
1199                                 tosctlmsg(c, cb);
1200                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1201                                 c->ignoreadvice = 1;
1202                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1203                                 if (cb->nf < 2)
1204                                         error("addmulti needs interface address");
1205                                 if (cb->nf == 2) {
1206                                         if (!ipismulticast(c->raddr))
1207                                                 error("addmulti for a non multicast address");
1208                                         parseip(ia, cb->f[1]);
1209                                         ipifcaddmulti(c, c->raddr, ia);
1210                                 } else {
1211                                         parseip(ma, cb->f[2]);
1212                                         if (!ipismulticast(ma))
1213                                                 error("addmulti for a non multicast address");
1214                                         parseip(ia, cb->f[1]);
1215                                         ipifcaddmulti(c, ma, ia);
1216                                 }
1217                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1218                                 if (cb->nf < 2)
1219                                         error("remmulti needs interface address");
1220                                 if (!ipismulticast(c->raddr))
1221                                         error("remmulti for a non multicast address");
1222                                 parseip(ia, cb->f[1]);
1223                                 ipifcremmulti(c, c->raddr, ia);
1224                         } else if (x->ctl != NULL) {
1225                                 p = x->ctl(c, cb->f, cb->nf);
1226                                 if (p != NULL)
1227                                         error(p);
1228                         } else
1229                                 error("unknown control request");
1230                         qunlock(&c->qlock);
1231                         kfree(cb);
1232                         poperror();
1233         }
1234         return n;
1235 }
1236
1237 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1238 {
1239         struct conv *c;
1240         struct Proto *x;
1241         struct Fs *f;
1242         int n;
1243
1244         switch (TYPE(ch->qid)) {
1245                 case Qdata:
1246                         f = ipfs[ch->dev];
1247                         x = f->p[PROTO(ch->qid)];
1248                         c = x->conv[CONV(ch->qid)];
1249
1250                         if (c->wq == NULL)
1251                                 error(Eperm);
1252
1253                         if (bp->next)
1254                                 bp = concatblock(bp);
1255                         n = BLEN(bp);
1256                         qbwrite(c->wq, bp);
1257                         return n;
1258                 default:
1259                         return devbwrite(ch, bp, offset);
1260         }
1261 }
1262
1263 struct dev ipdevtab __devtab = {
1264         'I',
1265         "ip",
1266
1267         ipreset,
1268         ipinit,
1269         devshutdown,
1270         ipattach,
1271         ipwalk,
1272         ipstat,
1273         ipopen,
1274         devcreate,
1275         ipclose,
1276         ipread,
1277         ipbread,
1278         ipwrite,
1279         ipbwrite,
1280         devremove,
1281         ipwstat,
1282         devpower,
1283         ipchaninfo,
1284 };
1285
1286 int Fsproto(struct Fs *f, struct Proto *p)
1287 {
1288         if (f->np >= Maxproto)
1289                 return -1;
1290
1291         qlock_init(&p->qlock);
1292         p->f = f;
1293
1294         if (p->ipproto > 0) {
1295                 if (f->t2p[p->ipproto] != NULL)
1296                         return -1;
1297                 f->t2p[p->ipproto] = p;
1298         }
1299
1300         p->qid.type = QTDIR;
1301         p->qid.path = QID(f->np, 0, Qprotodir);
1302         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1303         if (p->conv == NULL)
1304                 panic("Fsproto");
1305
1306         p->x = f->np;
1307         p->nextport = 0;
1308         p->nextrport = 600;
1309         f->p[f->np++] = p;
1310
1311         return 0;
1312 }
1313
1314 /*
1315  *  return true if this protocol is
1316  *  built in
1317  */
1318 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1319 {
1320         return f->t2p[proto] != NULL;
1321 }
1322
1323 /*
1324  *  called with protocol locked
1325  */
1326 struct conv *Fsprotoclone(struct Proto *p, char *user)
1327 {
1328         struct conv *c, **pp, **ep;
1329
1330 retry:
1331         c = NULL;
1332         ep = &p->conv[p->nc];
1333         for (pp = p->conv; pp < ep; pp++) {
1334                 c = *pp;
1335                 if (c == NULL) {
1336                         c = kzmalloc(sizeof(struct conv), 0);
1337                         if (c == NULL)
1338                                 error(Enomem);
1339                         qlock_init(&c->qlock);
1340                         qlock_init(&c->listenq);
1341                         rendez_init(&c->cr);
1342                         rendez_init(&c->listenr);
1343                         qlock(&c->qlock);
1344                         c->p = p;
1345                         c->x = pp - p->conv;
1346                         if (p->ptclsize != 0) {
1347                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1348                                 if (c->ptcl == NULL) {
1349                                         kfree(c);
1350                                         error(Enomem);
1351                                 }
1352                         }
1353                         *pp = c;
1354                         p->ac++;
1355                         c->eq = qopen(1024, Qmsg, 0, 0);
1356                         (*p->create) (c);
1357                         break;
1358                 }
1359                 if (canqlock(&c->qlock)) {
1360                         /*
1361                          *  make sure both processes and protocol
1362                          *  are done with this Conv
1363                          */
1364                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1365                                 break;
1366
1367                         qunlock(&c->qlock);
1368                 }
1369         }
1370         if (pp >= ep) {
1371                 if (p->gc != NULL && (*p->gc) (p))
1372                         goto retry;
1373                 return NULL;
1374         }
1375
1376         c->inuse = 1;
1377         kstrdup(&c->owner, user);
1378         c->perm = 0660;
1379         c->state = Idle;
1380         ipmove(c->laddr, IPnoaddr);
1381         ipmove(c->raddr, IPnoaddr);
1382         c->r = NULL;
1383         c->rgen = 0;
1384         c->lport = 0;
1385         c->rport = 0;
1386         c->restricted = 0;
1387         c->ttl = MAXTTL;
1388         c->tos = DFLTTOS;
1389         c->nonblock = FALSE;
1390         qreopen(c->rq);
1391         qreopen(c->wq);
1392         qreopen(c->eq);
1393
1394         qunlock(&c->qlock);
1395         return c;
1396 }
1397
1398 int Fsconnected(struct conv *c, char *msg)
1399 {
1400         if (msg != NULL && *msg != '\0')
1401                 strncpy(c->cerr, msg, sizeof(c->cerr));
1402
1403         switch (c->state) {
1404
1405                 case Announcing:
1406                         c->state = Announced;
1407                         break;
1408
1409                 case Connecting:
1410                         c->state = Connected;
1411                         break;
1412         }
1413
1414         rendez_wakeup(&c->cr);
1415         return 0;
1416 }
1417
1418 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1419 {
1420         if (f->ipmux)
1421                 return f->ipmux;
1422         else
1423                 return f->t2p[proto];
1424 }
1425
1426 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1427 {
1428         return f->t2p[proto];
1429 }
1430
1431 /*
1432  *  called with protocol locked
1433  */
1434 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1435                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1436 {
1437         struct conv *nc;
1438         struct conv **l;
1439         int i;
1440
1441         qlock(&c->qlock);
1442         i = 0;
1443         for (l = &c->incall; *l; l = &(*l)->next)
1444                 i++;
1445         if (i >= Maxincall) {
1446                 qunlock(&c->qlock);
1447                 return NULL;
1448         }
1449
1450         /* find a free conversation */
1451         nc = Fsprotoclone(c->p, network);
1452         if (nc == NULL) {
1453                 qunlock(&c->qlock);
1454                 return NULL;
1455         }
1456         ipmove(nc->raddr, raddr);
1457         nc->rport = rport;
1458         ipmove(nc->laddr, laddr);
1459         nc->lport = lport;
1460         nc->next = NULL;
1461         *l = nc;
1462         nc->state = Connected;
1463         nc->ipversion = version;
1464
1465         qunlock(&c->qlock);
1466
1467         rendez_wakeup(&c->listenr);
1468
1469         return nc;
1470 }
1471
1472 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1473 {
1474         if (off > strlen(f->ndb))
1475                 error(Eio);
1476         if (off + n >= sizeof(f->ndb) - 1)
1477                 error(Eio);
1478         memmove(f->ndb + off, a, n);
1479         f->ndb[off + n] = 0;
1480         f->ndbvers++;
1481         f->ndbmtime = seconds();
1482         return n;
1483 }
1484
1485 uint32_t scalednconv(void)
1486 {
1487         //if(conf.npage*BY2PG >= 128*MB)
1488         return Nchans * 4;
1489         //  return Nchans;
1490 }