Allow non-blocking listens
[akaros.git] / kern / src / net / devip.c
1 // INFERNO
2 #include <vfs.h>
3 #include <kfs.h>
4 #include <slab.h>
5 #include <kmalloc.h>
6 #include <kref.h>
7 #include <string.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include <error.h>
11 #include <cpio.h>
12 #include <pmap.h>
13 #include <smp.h>
14 #include <ip.h>
15
16 enum {
17         Qtopdir = 1,                            /* top level directory */
18         Qtopbase,
19         Qarp = Qtopbase,
20         Qbootp,
21         Qndb,
22         Qiproute,
23         Qiprouter,
24         Qipselftab,
25         Qlog,
26
27         Qprotodir,      /* directory for a protocol */
28         Qprotobase,
29         Qclone = Qprotobase,
30         Qstats,
31
32         Qconvdir,       /* directory for a conversation */
33         Qconvbase,
34         Qctl = Qconvbase,
35         Qdata,
36         Qerr,
37         Qlisten,
38         Qlocal,
39         Qremote,
40         Qstatus,
41         Qsnoop,
42
43         Logtype = 5,
44         Masktype = (1 << Logtype) - 1,
45         Logconv = 12,
46         Maskconv = (1 << Logconv) - 1,
47         Shiftconv = Logtype,
48         Logproto = 8,
49         Maskproto = (1 << Logproto) - 1,
50         Shiftproto = Logtype + Logconv,
51
52         Nfs = 32,
53 };
54 #define TYPE(x)         ( ((uint32_t)(x).path) & Masktype )
55 #define CONV(x)         ( (((uint32_t)(x).path) >> Shiftconv) & Maskconv )
56 #define PROTO(x)        ( (((uint32_t)(x).path) >> Shiftproto) & Maskproto )
57 #define QID(p, c, y)    ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y))
58 static char network[] = "network";
59
60 qlock_t fslock;
61 struct Fs *ipfs[Nfs];                   /* attached fs's */
62 struct queue *qlog;
63
64 extern void nullmediumlink(void);
65 extern void pktmediumlink(void);
66 extern char *eve;
67 static long ndbwrite(struct Fs *, char *unused_char_p_t, uint32_t, int);
68 static void closeconv(struct conv *);
69
70 static inline int founddevdir(struct chan *c, struct qid q, char *n,
71                                                           int64_t length, char *user, long perm,
72                                                           struct dir *db)
73 {
74         devdir(c, q, n, length, user, perm, db);
75         return 1;
76 }
77
78 static int topdirgen(struct chan *c, struct dir *dp)
79 {
80         struct qid q;
81         mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
82         snprintf(get_cur_genbuf(), GENBUF_SZ, "#I%lu", c->dev);
83         return founddevdir(c, q, get_cur_genbuf(), 0, network, 0555, dp);
84 }
85
86
87 static int ip3gen(struct chan *c, int i, struct dir *dp)
88 {
89         struct qid q;
90         struct conv *cv;
91         char *p;
92
93         cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
94         if (cv->owner == NULL)
95                 kstrdup(&cv->owner, eve);
96         mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
97
98         switch (i) {
99                 default:
100                         return -1;
101                 case Qctl:
102                         return founddevdir(c, q, "ctl", 0,
103                                                    cv->owner, cv->perm, dp);
104                 case Qdata:
105                         return founddevdir(c, q, "data", qlen(cv->rq),
106                                                            cv->owner, cv->perm, dp);
107                 case Qerr:
108                         return founddevdir(c, q, "err", qlen(cv->eq),
109                                                            cv->owner, cv->perm, dp);
110                 case Qlisten:
111                         return founddevdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
112                 case Qlocal:
113                         p = "local";
114                         break;
115                 case Qremote:
116                         p = "remote";
117                         break;
118                 case Qsnoop:
119                         if (strcmp(cv->p->name, "ipifc") != 0)
120                                 return -1;
121                         return founddevdir(c, q, "snoop", qlen(cv->sq),
122                                                            cv->owner, 0400, dp);
123                 case Qstatus:
124                         p = "status";
125                         break;
126         }
127         return founddevdir(c, q, p, 0, cv->owner, 0444, dp);
128 }
129
130 static int ip2gen(struct chan *c, int i, struct dir *dp)
131 {
132         struct qid q;
133         mkqid(&q, QID(PROTO(c->qid), 0, i), 0, QTFILE);
134         switch (i) {
135                 case Qclone:
136                         return founddevdir(c, q, "clone", 0, network, 0666, dp);
137                 case Qstats:
138                         return founddevdir(c, q, "stats", 0, network, 0444, dp);
139         }
140         return -1;
141 }
142
143 static int ip1gen(struct chan *c, int i, struct dir *dp)
144 {
145         struct qid q;
146         char *p;
147         int prot;
148         int len = 0;
149         struct Fs *f;
150         extern uint32_t kerndate;
151
152         f = ipfs[c->dev];
153
154         prot = 0666;
155         mkqid(&q, QID(0, 0, i), 0, QTFILE);
156         switch (i) {
157                 default:
158                         return -1;
159                 case Qarp:
160                         p = "arp";
161                         break;
162                 case Qbootp:
163                         if (bootp == NULL)
164                                 return 0;
165                         p = "bootp";
166                         break;
167                 case Qndb:
168                         p = "ndb";
169                         len = strlen(f->ndb);
170                         q.vers = f->ndbvers;
171                         break;
172                 case Qiproute:
173                         p = "iproute";
174                         break;
175                 case Qipselftab:
176                         p = "ipselftab";
177                         prot = 0444;
178                         break;
179                 case Qiprouter:
180                         p = "iprouter";
181                         break;
182                 case Qlog:
183                         p = "log";
184                         break;
185         }
186         devdir(c, q, p, len, network, prot, dp);
187         if (i == Qndb && f->ndbmtime > kerndate)
188                 dp->mtime = f->ndbmtime;
189         return 1;
190 }
191
192 static int
193 ipgen(struct chan *c, char *unused_char_p_t, struct dirtab *d, int unused_int,
194           int s, struct dir *dp)
195 {
196         struct qid q;
197         struct conv *cv;
198         struct Fs *f;
199
200         f = ipfs[c->dev];
201
202         switch (TYPE(c->qid)) {
203                 case Qtopdir:
204                         if (s == DEVDOTDOT)
205                                 return topdirgen(c, dp);
206                         if (s < f->np) {
207                                 if (f->p[s]->connect == NULL)
208                                         return 0;       /* protocol with no user interface */
209                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
210                                 return founddevdir(c, q, f->p[s]->name, 0, network, 0555, dp);
211                         }
212                         s -= f->np;
213                         return ip1gen(c, s + Qtopbase, dp);
214                 case Qarp:
215                 case Qbootp:
216                 case Qndb:
217                 case Qlog:
218                 case Qiproute:
219                 case Qiprouter:
220                 case Qipselftab:
221                         return ip1gen(c, TYPE(c->qid), dp);
222                 case Qprotodir:
223                         if (s == DEVDOTDOT)
224                                 return topdirgen(c, dp);
225                         else if (s < f->p[PROTO(c->qid)]->ac) {
226                                 cv = f->p[PROTO(c->qid)]->conv[s];
227                                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%d", s);
228                                 mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
229                                 return
230                                         founddevdir(c, q, get_cur_genbuf(), 0, cv->owner, 0555, dp);
231                         }
232                         s -= f->p[PROTO(c->qid)]->ac;
233                         return ip2gen(c, s + Qprotobase, dp);
234                 case Qclone:
235                 case Qstats:
236                         return ip2gen(c, TYPE(c->qid), dp);
237                 case Qconvdir:
238                         if (s == DEVDOTDOT) {
239                                 s = PROTO(c->qid);
240                                 mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
241                                 devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
242                                 return 1;
243                         }
244                         return ip3gen(c, s + Qconvbase, dp);
245                 case Qctl:
246                 case Qdata:
247                 case Qerr:
248                 case Qlisten:
249                 case Qlocal:
250                 case Qremote:
251                 case Qstatus:
252                 case Qsnoop:
253                         return ip3gen(c, TYPE(c->qid), dp);
254         }
255         return -1;
256 }
257
258 static void ipinit(void)
259 {
260         qlock_init(&fslock);
261         nullmediumlink();
262         pktmediumlink();
263 /* if only
264         fmtinstall('i', eipfmt);
265         fmtinstall('I', eipfmt);
266         fmtinstall('E', eipfmt);
267         fmtinstall('V', eipfmt);
268         fmtinstall('M', eipfmt);
269 */
270 }
271
272 static void ipreset(void)
273 {
274 }
275
276 static struct Fs *ipgetfs(int dev)
277 {
278         extern void (*ipprotoinit[]) (struct Fs *);
279         struct Fs *f;
280         int i;
281
282         if (dev >= Nfs)
283                 return NULL;
284
285         qlock(&fslock);
286         if (ipfs[dev] == NULL) {
287                 f = kzmalloc(sizeof(struct Fs), KMALLOC_WAIT);
288                 rwinit(&f->rwlock);
289                 qlock_init(&f->iprouter.qlock);
290                 ip_init(f);
291                 arpinit(f);
292                 netloginit(f);
293                 for (i = 0; ipprotoinit[i]; i++)
294                         ipprotoinit[i] (f);
295                 f->dev = dev;
296                 ipfs[dev] = f;
297         }
298         qunlock(&fslock);
299
300         return ipfs[dev];
301 }
302
303 struct IPaux *newipaux(char *owner, char *tag)
304 {
305         struct IPaux *a;
306         int n;
307
308         a = kzmalloc(sizeof(*a), 0);
309         kstrdup(&a->owner, owner);
310         memset(a->tag, ' ', sizeof(a->tag));
311         n = strlen(tag);
312         if (n > sizeof(a->tag))
313                 n = sizeof(a->tag);
314         memmove(a->tag, tag, n);
315         return a;
316 }
317
318 #define ATTACHER(c) (((struct IPaux*)((c)->aux))->owner)
319
320 static struct chan *ipattach(char *spec)
321 {
322         struct chan *c;
323         int dev;
324
325         dev = atoi(spec);
326         if (dev >= Nfs)
327                 error("bad specification");
328
329         ipgetfs(dev);
330         c = devattach('I', spec);
331         mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
332         c->dev = dev;
333
334         c->aux = newipaux(commonuser(), "none");
335
336         return c;
337 }
338
339 static struct walkqid *ipwalk(struct chan *c, struct chan *nc, char **name,
340                                                           int nname)
341 {
342         struct IPaux *a = c->aux;
343         struct walkqid *w;
344
345         w = devwalk(c, nc, name, nname, NULL, 0, ipgen);
346         if (w != NULL && w->clone != NULL)
347                 w->clone->aux = newipaux(a->owner, a->tag);
348         return w;
349 }
350
351 static int ipstat(struct chan *c, uint8_t * db, int n)
352 {
353         return devstat(c, db, n, NULL, 0, ipgen);
354 }
355
356 static int should_wake(void *arg)
357 {
358         struct conv *cv = arg;
359         /* signal that the conv is closed */
360         if (qisclosed(cv->rq))
361                 return TRUE;
362         return cv->incall != NULL;
363 }
364
365 static int m2p[] = {
366         [OREAD] 4,
367         [OWRITE] 2,
368         [ORDWR] 6
369 };
370
371 static struct chan *ipopen(struct chan *c, int omode)
372 {
373         ERRSTACK(2);
374         struct conv *cv, *nc;
375         struct Proto *p;
376         int perm;
377         struct Fs *f;
378
379         perm = m2p[omode & 3];
380
381         f = ipfs[c->dev];
382
383         switch (TYPE(c->qid)) {
384                 default:
385                         break;
386                 case Qndb:
387                         if (omode & (OWRITE | OTRUNC) && !iseve())
388                                 error(Eperm);
389                         if ((omode & (OWRITE | OTRUNC)) == (OWRITE | OTRUNC))
390                                 f->ndb[0] = 0;
391                         break;
392                 case Qlog:
393                         netlogopen(f);
394                         break;
395                 case Qiprouter:
396                         iprouteropen(f);
397                         break;
398                 case Qiproute:
399                         break;
400                 case Qtopdir:
401                 case Qprotodir:
402                 case Qconvdir:
403                 case Qstatus:
404                 case Qremote:
405                 case Qlocal:
406                 case Qstats:
407                 case Qbootp:
408                 case Qipselftab:
409                         if (!IS_RDONLY(omode))
410                                 error(Eperm);
411                         break;
412                 case Qsnoop:
413                         if (!IS_RDONLY(omode))
414                                 error(Eperm);
415                         p = f->p[PROTO(c->qid)];
416                         cv = p->conv[CONV(c->qid)];
417                         if (strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
418                                 error(Eperm);
419                         atomic_inc(&cv->snoopers);
420                         break;
421                 case Qclone:
422                         p = f->p[PROTO(c->qid)];
423                         qlock(&p->qlock);
424                         if (waserror()) {
425                                 qunlock(&p->qlock);
426                                 nexterror();
427                         }
428                         cv = Fsprotoclone(p, ATTACHER(c));
429                         qunlock(&p->qlock);
430                         poperror();
431                         if (cv == NULL) {
432                                 error(Enodev);
433                                 break;
434                         }
435                         /* we only honor nonblock on a clone */
436                         if (c->flag & CNONBLOCK)
437                                 Fsconvnonblock(cv, TRUE);
438                         mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
439                         break;
440                 case Qdata:
441                 case Qctl:
442                 case Qerr:
443                         p = f->p[PROTO(c->qid)];
444                         qlock(&p->qlock);
445                         cv = p->conv[CONV(c->qid)];
446                         qlock(&cv->qlock);
447                         if (waserror()) {
448                                 qunlock(&cv->qlock);
449                                 qunlock(&p->qlock);
450                                 nexterror();
451                         }
452                         if ((perm & (cv->perm >> 6)) != perm) {
453                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
454                                         error(Eperm);
455                                 if ((perm & cv->perm) != perm)
456                                         error(Eperm);
457
458                         }
459                         cv->inuse++;
460                         if (cv->inuse == 1) {
461                                 kstrdup(&cv->owner, ATTACHER(c));
462                                 cv->perm = 0660;
463                         }
464                         qunlock(&cv->qlock);
465                         qunlock(&p->qlock);
466                         poperror();
467                         break;
468                 case Qlisten:
469                         cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
470                         if ((perm & (cv->perm >> 6)) != perm) {
471                                 if (strcmp(ATTACHER(c), cv->owner) != 0)
472                                         error(Eperm);
473                                 if ((perm & cv->perm) != perm)
474                                         error(Eperm);
475
476                         }
477
478                         if (cv->state != Announced)
479                                 error("not announced");
480
481                         if (waserror()) {
482                                 closeconv(cv);
483                                 nexterror();
484                         }
485                         qlock(&cv->qlock);
486                         cv->inuse++;
487                         qunlock(&cv->qlock);
488
489                         nc = NULL;
490                         while (nc == NULL) {
491                                 /* give up if we got a hangup */
492                                 if (qisclosed(cv->rq))
493                                         error("listen hungup");
494
495                                 qlock(&cv->listenq);
496                                 if (waserror()) {
497                                         qunlock(&cv->listenq);
498                                         nexterror();
499                                 }
500                                 /* we can peek at incall without grabbing the cv qlock.  if
501                                  * anything is there, it'll remain there until we dequeue it.
502                                  * no one else can, since we hold the listenq lock */
503                                 if (cv->nonblock && !cv->incall) {
504                                         set_errno(EAGAIN);
505                                         error("listen queue empty");
506                                 }
507                                 /* wait for a connect */
508                                 rendez_sleep(&cv->listenr, should_wake, cv);
509
510                                 /* if there is a concurrent hangup, they will hold the qlock
511                                  * until the hangup is complete, including closing the cv->rq */
512                                 qlock(&cv->qlock);
513                                 nc = cv->incall;
514                                 if (nc != NULL) {
515                                         cv->incall = nc->next;
516                                         mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
517                                         kstrdup(&cv->owner, ATTACHER(c));
518                                         /* TODO: If we want to support something like accept4(),
519                                          * where the new conversations are nonblocking right away,
520                                          * we can do so here. */
521                                 }
522                                 qunlock(&cv->qlock);
523
524                                 qunlock(&cv->listenq);
525                                 poperror();
526                         }
527                         closeconv(cv);
528                         poperror();
529                         break;
530         }
531         c->mode = openmode(omode);
532         c->flag |= COPEN;
533         c->offset = 0;
534         return c;
535 }
536
537 static int ipwstat(struct chan *c, uint8_t * dp, int n)
538 {
539         ERRSTACK(2);
540         struct dir *d;
541         struct conv *cv;
542         struct Fs *f;
543         struct Proto *p;
544
545         f = ipfs[c->dev];
546         switch (TYPE(c->qid)) {
547                 default:
548                         error(Eperm);
549                         break;
550                 case Qctl:
551                 case Qdata:
552                         break;
553         }
554
555         d = kzmalloc(sizeof(*d) + n, 0);
556         if (waserror()) {
557                 kfree(d);
558                 nexterror();
559         }
560         n = convM2D(dp, n, d, (char *)&d[1]);
561         if (n == 0)
562                 error(Eshortstat);
563         p = f->p[PROTO(c->qid)];
564         cv = p->conv[CONV(c->qid)];
565         if (!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
566                 error(Eperm);
567         if (!emptystr(d->uid))
568                 kstrdup(&cv->owner, d->uid);
569         if (d->mode != ~0UL)
570                 cv->perm = d->mode & 0777;
571         poperror();
572         kfree(d);
573         return n;
574 }
575
576 /* Should be able to handle any file type chan. Feel free to extend it. */
577 static char *ipchaninfo(struct chan *ch, char *ret, size_t ret_l)
578 {
579         struct conv *conv;
580         struct Proto *proto;
581         char *p;
582         struct Fs *f;
583
584         f = ipfs[ch->dev];
585
586         switch (TYPE(ch->qid)) {
587                 default:
588                         ret = "Unknown type";
589                         break;
590                 case Qdata:
591                         proto = f->p[PROTO(ch->qid)];
592                         conv = proto->conv[CONV(ch->qid)];
593                         snprintf(ret, ret_l, "Qdata, proto %s, conv idx %d", proto->name,
594                                          conv->x);
595                         break;
596                 case Qarp:
597                         ret = "Qarp";
598                         break;
599                 case Qiproute:
600                         ret = "Qiproute";
601                         break;
602                 case Qlog:
603                         ret = "Qlog";
604                         break;
605                 case Qndb:
606                         ret = "Qndb";
607                         break;
608                 case Qctl:
609                         proto = f->p[PROTO(ch->qid)];
610                         conv = proto->conv[CONV(ch->qid)];
611                         snprintf(ret, ret_l, "Qctl, proto %s, conv idx %d", proto->name,
612                                          conv->x);
613                         break;
614         }
615         return ret;
616 }
617
618 static void closeconv(struct conv *cv)
619 {
620         struct conv *nc;
621         struct Ipmulti *mp;
622
623         qlock(&cv->qlock);
624
625         if (--cv->inuse > 0) {
626                 qunlock(&cv->qlock);
627                 return;
628         }
629
630         /* close all incoming calls since no listen will ever happen */
631         for (nc = cv->incall; nc; nc = cv->incall) {
632                 cv->incall = nc->next;
633                 closeconv(nc);
634         }
635         cv->incall = NULL;
636
637         kstrdup(&cv->owner, network);
638         cv->perm = 0660;
639
640         while ((mp = cv->multi) != NULL)
641                 ipifcremmulti(cv, mp->ma, mp->ia);
642
643         cv->r = NULL;
644         cv->rgen = 0;
645         cv->p->close(cv);
646         cv->state = Idle;
647         qunlock(&cv->qlock);
648 }
649
650 static void ipclose(struct chan *c)
651 {
652         struct Fs *f;
653
654         f = ipfs[c->dev];
655         switch (TYPE(c->qid)) {
656                 default:
657                         break;
658                 case Qlog:
659                         if (c->flag & COPEN)
660                                 netlogclose(f);
661                         break;
662                 case Qiprouter:
663                         if (c->flag & COPEN)
664                                 iprouterclose(f);
665                         break;
666                 case Qdata:
667                 case Qctl:
668                 case Qerr:
669                         if (c->flag & COPEN)
670                                 closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
671                         break;
672                 case Qsnoop:
673                         if (c->flag & COPEN)
674                                 atomic_dec(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
675                         break;
676         }
677         kfree(((struct IPaux *)c->aux)->owner);
678         kfree(c->aux);
679 }
680
681 enum {
682         Statelen = 32 * 1024,
683 };
684
685 static long ipread(struct chan *ch, void *a, long n, int64_t off)
686 {
687         struct conv *c;
688         struct Proto *x;
689         char *buf, *p;
690         long rv;
691         struct Fs *f;
692         uint32_t offset = off;
693         size_t sofar;
694
695         f = ipfs[ch->dev];
696
697         p = a;
698         switch (TYPE(ch->qid)) {
699                 default:
700                         error(Eperm);
701                 case Qtopdir:
702                 case Qprotodir:
703                 case Qconvdir:
704                         return devdirread(ch, a, n, 0, 0, ipgen);
705                 case Qarp:
706                         return arpread(f->arp, a, offset, n);
707                 case Qbootp:
708                         return bootpread(a, offset, n);
709                 case Qndb:
710                         return readstr(offset, a, n, f->ndb);
711                 case Qiproute:
712                         return routeread(f, a, offset, n);
713                 case Qiprouter:
714                         return iprouterread(f, a, n);
715                 case Qipselftab:
716                         return ipselftabread(f, a, offset, n);
717                 case Qlog:
718                         return netlogread(f, a, offset, n);
719                 case Qctl:
720                         snprintf(get_cur_genbuf(), GENBUF_SZ, "%lu", CONV(ch->qid));
721                         return readstr(offset, p, n, get_cur_genbuf());
722                 case Qremote:
723                         buf = kzmalloc(Statelen, 0);
724                         x = f->p[PROTO(ch->qid)];
725                         c = x->conv[CONV(ch->qid)];
726                         if (x->remote == NULL) {
727                                 snprintf(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
728                         } else {
729                                 (*x->remote) (c, buf, Statelen - 2);
730                         }
731                         rv = readstr(offset, p, n, buf);
732                         kfree(buf);
733                         return rv;
734                 case Qlocal:
735                         buf = kzmalloc(Statelen, 0);
736                         x = f->p[PROTO(ch->qid)];
737                         c = x->conv[CONV(ch->qid)];
738                         if (x->local == NULL) {
739                                 snprintf(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
740                         } else {
741                                 (*x->local) (c, buf, Statelen - 2);
742                         }
743                         rv = readstr(offset, p, n, buf);
744                         kfree(buf);
745                         return rv;
746                 case Qstatus:
747                         /* this all is a bit screwed up since the size of some state's
748                          * buffers will change from one invocation to another.  a reader
749                          * will come in and read the entire buffer.  then it will come again
750                          * and read from the next offset, expecting EOF.  if the buffer
751                          * changed sizes, it'll reprint the end of the buffer slightly. */
752                         buf = kzmalloc(Statelen, 0);
753                         x = f->p[PROTO(ch->qid)];
754                         c = x->conv[CONV(ch->qid)];
755                         sofar = (*x->state) (c, buf, Statelen - 2);
756                         sofar += snprintf(buf + sofar, Statelen - 2 - sofar, "nonblock %s\n",
757                                           c->nonblock ? "on" : "off");
758                         rv = readstr(offset, p, n, buf);
759                         kfree(buf);
760                         return rv;
761                 case Qdata:
762                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
763                         return qread(c->rq, a, n);
764                 case Qerr:
765                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
766                         return qread(c->eq, a, n);
767                 case Qsnoop:
768                         c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
769                         return qread(c->sq, a, n);
770                 case Qstats:
771                         x = f->p[PROTO(ch->qid)];
772                         if (x->stats == NULL)
773                                 error("stats not implemented");
774                         buf = kzmalloc(Statelen, 0);
775                         (*x->stats) (x, buf, Statelen);
776                         rv = readstr(offset, p, n, buf);
777                         kfree(buf);
778                         return rv;
779         }
780 }
781
782 static struct block *ipbread(struct chan *ch, long n, uint32_t offset)
783 {
784         struct conv *c;
785         struct Proto *x;
786         struct Fs *f;
787
788         switch (TYPE(ch->qid)) {
789                 case Qdata:
790                         f = ipfs[ch->dev];
791                         x = f->p[PROTO(ch->qid)];
792                         c = x->conv[CONV(ch->qid)];
793                         return qbread(c->rq, n);
794                 default:
795                         return devbread(ch, n, offset);
796         }
797 }
798
799 /*
800  *  set local address to be that of the ifc closest to remote address
801  */
802 static void setladdr(struct conv *c)
803 {
804         findlocalip(c->p->f, c->laddr, c->raddr);
805 }
806
807 /*
808  *  set a local port making sure the quad of raddr,rport,laddr,lport is unique
809  */
810 static char *setluniqueport(struct conv *c, int lport)
811 {
812         struct Proto *p;
813         struct conv *xp;
814         int x;
815
816         p = c->p;
817
818         qlock(&p->qlock);
819         for (x = 0; x < p->nc; x++) {
820                 xp = p->conv[x];
821                 if (xp == NULL)
822                         break;
823                 if (xp == c)
824                         continue;
825                 if ((xp->state == Connected || xp->state == Announced)
826                         && xp->lport == lport
827                         && xp->rport == c->rport
828                         && ipcmp(xp->raddr, c->raddr) == 0
829                         && ipcmp(xp->laddr, c->laddr) == 0) {
830                         qunlock(&p->qlock);
831                         return "address in use";
832                 }
833         }
834         c->lport = lport;
835         qunlock(&p->qlock);
836         return NULL;
837 }
838
839 /*
840  *  pick a local port and set it
841  */
842 static void setlport(struct conv *c)
843 {
844         struct Proto *p;
845         uint16_t *pp;
846         int x, found;
847
848         p = c->p;
849         if (c->restricted)
850                 pp = &p->nextrport;
851         else
852                 pp = &p->nextport;
853         qlock(&p->qlock);
854         for (;; (*pp)++) {
855                 /*
856                  * Fsproto initialises p->nextport to 0 and the restricted
857                  * ports (p->nextrport) to 600.
858                  * Restricted ports must lie between 600 and 1024.
859                  * For the initial condition or if the unrestricted port number
860                  * has wrapped round, select a random port between 5000 and 1<<15
861                  * to start at.
862                  */
863                 if (c->restricted) {
864                         if (*pp >= 1024)
865                                 *pp = 600;
866                 } else
867                         while (*pp < 5000)
868                                 *pp = nrand(1 << 15);
869
870                 found = 0;
871                 for (x = 0; x < p->nc; x++) {
872                         if (p->conv[x] == NULL)
873                                 break;
874                         if (p->conv[x]->lport == *pp) {
875                                 found = 1;
876                                 break;
877                         }
878                 }
879                 if (!found)
880                         break;
881         }
882         c->lport = (*pp)++;
883         qunlock(&p->qlock);
884 }
885
886 /*
887  *  set a local address and port from a string of the form
888  *      [address!]port[!r]
889  */
890 static char *setladdrport(struct conv *c, char *str, int announcing)
891 {
892         char *p;
893         char *rv;
894         uint16_t lport;
895         uint8_t addr[IPaddrlen];
896
897         rv = NULL;
898
899         /*
900          *  ignore restricted part if it exists.  it's
901          *  meaningless on local ports.
902          */
903         p = strchr(str, '!');
904         if (p != NULL) {
905                 *p++ = 0;
906                 if (strcmp(p, "r") == 0)
907                         p = NULL;
908         }
909
910         c->lport = 0;
911         if (p == NULL) {
912                 if (announcing)
913                         ipmove(c->laddr, IPnoaddr);
914                 else
915                         setladdr(c);
916                 p = str;
917         } else {
918                 if (strcmp(str, "*") == 0)
919                         ipmove(c->laddr, IPnoaddr);
920                 else {
921                         parseip(addr, str);
922                         if (ipforme(c->p->f, addr))
923                                 ipmove(c->laddr, addr);
924                         else
925                                 return "not a local IP address";
926                 }
927         }
928
929         /* one process can get all connections */
930         if (announcing && strcmp(p, "*") == 0) {
931                 if (!iseve())
932                         error(Eperm);
933                 return setluniqueport(c, 0);
934         }
935
936         lport = atoi(p);
937         if (lport <= 0)
938                 setlport(c);
939         else
940                 rv = setluniqueport(c, lport);
941         return rv;
942 }
943
944 static char *setraddrport(struct conv *c, char *str)
945 {
946         char *p;
947
948         p = strchr(str, '!');
949         if (p == NULL)
950                 return "malformed address";
951         *p++ = 0;
952         parseip(c->raddr, str);
953         c->rport = atoi(p);
954         p = strchr(p, '!');
955         if (p) {
956                 if (strstr(p, "!r") != NULL)
957                         c->restricted = 1;
958         }
959         return NULL;
960 }
961
962 /*
963  *  called by protocol connect routine to set addresses
964  */
965 char *Fsstdconnect(struct conv *c, char *argv[], int argc)
966 {
967         char *p;
968
969         switch (argc) {
970                 default:
971                         return "bad args to connect";
972                 case 2:
973                         p = setraddrport(c, argv[1]);
974                         if (p != NULL)
975                                 return p;
976                         setladdr(c);
977                         setlport(c);
978                         break;
979                 case 3:
980                         p = setraddrport(c, argv[1]);
981                         if (p != NULL)
982                                 return p;
983                         p = setladdrport(c, argv[2], 0);
984                         if (p != NULL)
985                                 return p;
986         }
987
988         if ((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
989                  memcmp(c->laddr, v4prefix, IPv4off) == 0)
990                 || ipcmp(c->raddr, IPnoaddr) == 0)
991                 c->ipversion = V4;
992         else
993                 c->ipversion = V6;
994
995         return NULL;
996 }
997
998 /*
999  *  initiate connection and sleep till its set up
1000  */
1001 static int connected(void *a)
1002 {
1003         return ((struct conv *)a)->state == Connected;
1004 }
1005
1006 static void connectctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1007 {
1008         ERRSTACK(1);
1009         char *p;
1010
1011         if (c->state != 0)
1012                 error(Econinuse);
1013         c->state = Connecting;
1014         c->cerr[0] = '\0';
1015         if (x->connect == NULL)
1016                 error("connect not supported");
1017         p = x->connect(c, cb->f, cb->nf);
1018         if (p != NULL)
1019                 error(p);
1020
1021         qunlock(&c->qlock);
1022         if (waserror()) {
1023                 qlock(&c->qlock);
1024                 nexterror();
1025         }
1026         rendez_sleep(&c->cr, connected, c);
1027         qlock(&c->qlock);
1028         poperror();
1029
1030         if (c->cerr[0] != '\0')
1031                 error(c->cerr);
1032 }
1033
1034 /*
1035  *  called by protocol announce routine to set addresses
1036  */
1037 char *Fsstdannounce(struct conv *c, char *argv[], int argc)
1038 {
1039         memset(c->raddr, 0, sizeof(c->raddr));
1040         c->rport = 0;
1041         switch (argc) {
1042                 default:
1043                         return "bad args to announce";
1044                 case 2:
1045                         return setladdrport(c, argv[1], 1);
1046         }
1047 }
1048
1049 /*
1050  *  initiate announcement and sleep till its set up
1051  */
1052 static int announced(void *a)
1053 {
1054         return ((struct conv *)a)->state == Announced;
1055 }
1056
1057 static void announcectlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1058 {
1059         ERRSTACK(1);
1060         char *p;
1061
1062         if (c->state != 0)
1063                 error(Econinuse);
1064         c->state = Announcing;
1065         c->cerr[0] = '\0';
1066         if (x->announce == NULL)
1067                 error("announce not supported");
1068         p = x->announce(c, cb->f, cb->nf);
1069         if (p != NULL)
1070                 error(p);
1071
1072         qunlock(&c->qlock);
1073         if (waserror()) {
1074                 qlock(&c->qlock);
1075                 nexterror();
1076         }
1077         rendez_sleep(&c->cr, announced, c);
1078         qlock(&c->qlock);
1079         poperror();
1080
1081         if (c->cerr[0] != '\0')
1082                 error(c->cerr);
1083 }
1084
1085 /*
1086  *  called by protocol bind routine to set addresses
1087  */
1088 char *Fsstdbind(struct conv *c, char *argv[], int argc)
1089 {
1090         switch (argc) {
1091                 default:
1092                         return "bad args to bind";
1093                 case 2:
1094                         return setladdrport(c, argv[1], 0);
1095         }
1096 }
1097
1098 void Fsconvnonblock(struct conv *cv, bool onoff)
1099 {
1100         qnonblock(cv->wq, onoff);
1101         qnonblock(cv->rq, onoff);
1102         cv->nonblock = onoff;
1103 }
1104
1105 static void bindctlmsg(struct Proto *x, struct conv *c, struct cmdbuf *cb)
1106 {
1107         char *p;
1108
1109         if (x->bind == NULL)
1110                 p = Fsstdbind(c, cb->f, cb->nf);
1111         else
1112                 p = x->bind(c, cb->f, cb->nf);
1113         if (p != NULL)
1114                 error(p);
1115 }
1116
1117 static void nonblockctlmsg(struct conv *c, struct cmdbuf *cb)
1118 {
1119         if (cb->nf < 2)
1120                 goto err;
1121         if (!strcmp(cb->f[1], "on"))
1122                 Fsconvnonblock(c, TRUE);
1123         else if (!strcmp(cb->f[1], "off"))
1124                 Fsconvnonblock(c, FALSE);
1125         else
1126                 goto err;
1127         return;
1128 err:
1129         set_errno(EINVAL);
1130         error("nonblock [on|off]");
1131 }
1132
1133 static void tosctlmsg(struct conv *c, struct cmdbuf *cb)
1134 {
1135         if (cb->nf < 2)
1136                 c->tos = 0;
1137         else
1138                 c->tos = atoi(cb->f[1]);
1139 }
1140
1141 static void ttlctlmsg(struct conv *c, struct cmdbuf *cb)
1142 {
1143         if (cb->nf < 2)
1144                 c->ttl = MAXTTL;
1145         else
1146                 c->ttl = atoi(cb->f[1]);
1147 }
1148
1149 static long ipwrite(struct chan *ch, void *v, long n, int64_t off)
1150 {
1151         ERRSTACK(1);
1152         struct conv *c;
1153         struct Proto *x;
1154         char *p;
1155         struct cmdbuf *cb;
1156         uint8_t ia[IPaddrlen], ma[IPaddrlen];
1157         struct Fs *f;
1158         char *a;
1159
1160         a = v;
1161         f = ipfs[ch->dev];
1162
1163         switch (TYPE(ch->qid)) {
1164                 default:
1165                         error(Eperm);
1166                 case Qdata:
1167                         x = f->p[PROTO(ch->qid)];
1168                         c = x->conv[CONV(ch->qid)];
1169
1170                         if (c->wq == NULL)
1171                                 error(Eperm);
1172
1173                         qwrite(c->wq, a, n);
1174                         break;
1175                 case Qarp:
1176                         return arpwrite(f, a, n);
1177                 case Qiproute:
1178                         return routewrite(f, ch, a, n);
1179                 case Qlog:
1180                         netlogctl(f, a, n);
1181                         return n;
1182                 case Qndb:
1183                         return ndbwrite(f, a, off, n);
1184                 case Qctl:
1185                         x = f->p[PROTO(ch->qid)];
1186                         c = x->conv[CONV(ch->qid)];
1187                         cb = parsecmd(a, n);
1188
1189                         qlock(&c->qlock);
1190                         if (waserror()) {
1191                                 qunlock(&c->qlock);
1192                                 kfree(cb);
1193                                 nexterror();
1194                         }
1195                         if (cb->nf < 1)
1196                                 error("short control request");
1197                         if (strcmp(cb->f[0], "connect") == 0)
1198                                 connectctlmsg(x, c, cb);
1199                         else if (strcmp(cb->f[0], "announce") == 0)
1200                                 announcectlmsg(x, c, cb);
1201                         else if (strcmp(cb->f[0], "bind") == 0)
1202                                 bindctlmsg(x, c, cb);
1203                         else if (strcmp(cb->f[0], "nonblock") == 0)
1204                                 nonblockctlmsg(c, cb);
1205                         else if (strcmp(cb->f[0], "ttl") == 0)
1206                                 ttlctlmsg(c, cb);
1207                         else if (strcmp(cb->f[0], "tos") == 0)
1208                                 tosctlmsg(c, cb);
1209                         else if (strcmp(cb->f[0], "ignoreadvice") == 0)
1210                                 c->ignoreadvice = 1;
1211                         else if (strcmp(cb->f[0], "addmulti") == 0) {
1212                                 if (cb->nf < 2)
1213                                         error("addmulti needs interface address");
1214                                 if (cb->nf == 2) {
1215                                         if (!ipismulticast(c->raddr))
1216                                                 error("addmulti for a non multicast address");
1217                                         parseip(ia, cb->f[1]);
1218                                         ipifcaddmulti(c, c->raddr, ia);
1219                                 } else {
1220                                         parseip(ma, cb->f[2]);
1221                                         if (!ipismulticast(ma))
1222                                                 error("addmulti for a non multicast address");
1223                                         parseip(ia, cb->f[1]);
1224                                         ipifcaddmulti(c, ma, ia);
1225                                 }
1226                         } else if (strcmp(cb->f[0], "remmulti") == 0) {
1227                                 if (cb->nf < 2)
1228                                         error("remmulti needs interface address");
1229                                 if (!ipismulticast(c->raddr))
1230                                         error("remmulti for a non multicast address");
1231                                 parseip(ia, cb->f[1]);
1232                                 ipifcremmulti(c, c->raddr, ia);
1233                         } else if (x->ctl != NULL) {
1234                                 p = x->ctl(c, cb->f, cb->nf);
1235                                 if (p != NULL)
1236                                         error(p);
1237                         } else
1238                                 error("unknown control request");
1239                         qunlock(&c->qlock);
1240                         kfree(cb);
1241                         poperror();
1242         }
1243         return n;
1244 }
1245
1246 static long ipbwrite(struct chan *ch, struct block *bp, uint32_t offset)
1247 {
1248         struct conv *c;
1249         struct Proto *x;
1250         struct Fs *f;
1251         int n;
1252
1253         switch (TYPE(ch->qid)) {
1254                 case Qdata:
1255                         f = ipfs[ch->dev];
1256                         x = f->p[PROTO(ch->qid)];
1257                         c = x->conv[CONV(ch->qid)];
1258
1259                         if (c->wq == NULL)
1260                                 error(Eperm);
1261
1262                         if (bp->next)
1263                                 bp = concatblock(bp);
1264                         n = BLEN(bp);
1265                         qbwrite(c->wq, bp);
1266                         return n;
1267                 default:
1268                         return devbwrite(ch, bp, offset);
1269         }
1270 }
1271
1272 struct dev ipdevtab __devtab = {
1273         'I',
1274         "ip",
1275
1276         ipreset,
1277         ipinit,
1278         devshutdown,
1279         ipattach,
1280         ipwalk,
1281         ipstat,
1282         ipopen,
1283         devcreate,
1284         ipclose,
1285         ipread,
1286         ipbread,
1287         ipwrite,
1288         ipbwrite,
1289         devremove,
1290         ipwstat,
1291         devpower,
1292         ipchaninfo,
1293 };
1294
1295 int Fsproto(struct Fs *f, struct Proto *p)
1296 {
1297         if (f->np >= Maxproto)
1298                 return -1;
1299
1300         qlock_init(&p->qlock);
1301         p->f = f;
1302
1303         if (p->ipproto > 0) {
1304                 if (f->t2p[p->ipproto] != NULL)
1305                         return -1;
1306                 f->t2p[p->ipproto] = p;
1307         }
1308
1309         p->qid.type = QTDIR;
1310         p->qid.path = QID(f->np, 0, Qprotodir);
1311         p->conv = kzmalloc(sizeof(struct conv *) * (p->nc + 1), 0);
1312         if (p->conv == NULL)
1313                 panic("Fsproto");
1314
1315         p->x = f->np;
1316         p->nextport = 0;
1317         p->nextrport = 600;
1318         f->p[f->np++] = p;
1319
1320         return 0;
1321 }
1322
1323 /*
1324  *  return true if this protocol is
1325  *  built in
1326  */
1327 int Fsbuiltinproto(struct Fs *f, uint8_t proto)
1328 {
1329         return f->t2p[proto] != NULL;
1330 }
1331
1332 /*
1333  *  called with protocol locked
1334  */
1335 struct conv *Fsprotoclone(struct Proto *p, char *user)
1336 {
1337         struct conv *c, **pp, **ep;
1338
1339 retry:
1340         c = NULL;
1341         ep = &p->conv[p->nc];
1342         for (pp = p->conv; pp < ep; pp++) {
1343                 c = *pp;
1344                 if (c == NULL) {
1345                         c = kzmalloc(sizeof(struct conv), 0);
1346                         if (c == NULL)
1347                                 error(Enomem);
1348                         qlock_init(&c->qlock);
1349                         qlock_init(&c->listenq);
1350                         rendez_init(&c->cr);
1351                         rendez_init(&c->listenr);
1352                         qlock(&c->qlock);
1353                         c->p = p;
1354                         c->x = pp - p->conv;
1355                         if (p->ptclsize != 0) {
1356                                 c->ptcl = kzmalloc(p->ptclsize, 0);
1357                                 if (c->ptcl == NULL) {
1358                                         kfree(c);
1359                                         error(Enomem);
1360                                 }
1361                         }
1362                         *pp = c;
1363                         p->ac++;
1364                         c->eq = qopen(1024, Qmsg, 0, 0);
1365                         (*p->create) (c);
1366                         break;
1367                 }
1368                 if (canqlock(&c->qlock)) {
1369                         /*
1370                          *  make sure both processes and protocol
1371                          *  are done with this Conv
1372                          */
1373                         if (c->inuse == 0 && (p->inuse == NULL || (*p->inuse) (c) == 0))
1374                                 break;
1375
1376                         qunlock(&c->qlock);
1377                 }
1378         }
1379         if (pp >= ep) {
1380                 if (p->gc != NULL && (*p->gc) (p))
1381                         goto retry;
1382                 return NULL;
1383         }
1384
1385         c->inuse = 1;
1386         kstrdup(&c->owner, user);
1387         c->perm = 0660;
1388         c->state = Idle;
1389         ipmove(c->laddr, IPnoaddr);
1390         ipmove(c->raddr, IPnoaddr);
1391         c->r = NULL;
1392         c->rgen = 0;
1393         c->lport = 0;
1394         c->rport = 0;
1395         c->restricted = 0;
1396         c->ttl = MAXTTL;
1397         c->tos = DFLTTOS;
1398         c->nonblock = FALSE;
1399         qreopen(c->rq);
1400         qreopen(c->wq);
1401         qreopen(c->eq);
1402
1403         qunlock(&c->qlock);
1404         return c;
1405 }
1406
1407 int Fsconnected(struct conv *c, char *msg)
1408 {
1409         if (msg != NULL && *msg != '\0')
1410                 strncpy(c->cerr, msg, sizeof(c->cerr));
1411
1412         switch (c->state) {
1413
1414                 case Announcing:
1415                         c->state = Announced;
1416                         break;
1417
1418                 case Connecting:
1419                         c->state = Connected;
1420                         break;
1421         }
1422
1423         rendez_wakeup(&c->cr);
1424         return 0;
1425 }
1426
1427 struct Proto *Fsrcvpcol(struct Fs *f, uint8_t proto)
1428 {
1429         if (f->ipmux)
1430                 return f->ipmux;
1431         else
1432                 return f->t2p[proto];
1433 }
1434
1435 struct Proto *Fsrcvpcolx(struct Fs *f, uint8_t proto)
1436 {
1437         return f->t2p[proto];
1438 }
1439
1440 /*
1441  *  called with protocol locked
1442  */
1443 struct conv *Fsnewcall(struct conv *c, uint8_t * raddr, uint16_t rport,
1444                                            uint8_t * laddr, uint16_t lport, uint8_t version)
1445 {
1446         struct conv *nc;
1447         struct conv **l;
1448         int i;
1449
1450         qlock(&c->qlock);
1451         i = 0;
1452         for (l = &c->incall; *l; l = &(*l)->next)
1453                 i++;
1454         if (i >= Maxincall) {
1455                 qunlock(&c->qlock);
1456                 return NULL;
1457         }
1458
1459         /* find a free conversation */
1460         nc = Fsprotoclone(c->p, network);
1461         if (nc == NULL) {
1462                 qunlock(&c->qlock);
1463                 return NULL;
1464         }
1465         ipmove(nc->raddr, raddr);
1466         nc->rport = rport;
1467         ipmove(nc->laddr, laddr);
1468         nc->lport = lport;
1469         nc->next = NULL;
1470         *l = nc;
1471         nc->state = Connected;
1472         nc->ipversion = version;
1473
1474         qunlock(&c->qlock);
1475
1476         rendez_wakeup(&c->listenr);
1477
1478         return nc;
1479 }
1480
1481 static long ndbwrite(struct Fs *f, char *a, uint32_t off, int n)
1482 {
1483         if (off > strlen(f->ndb))
1484                 error(Eio);
1485         if (off + n >= sizeof(f->ndb) - 1)
1486                 error(Eio);
1487         memmove(f->ndb + off, a, n);
1488         f->ndb[off + n] = 0;
1489         f->ndbvers++;
1490         f->ndbmtime = seconds();
1491         return n;
1492 }
1493
1494 uint32_t scalednconv(void)
1495 {
1496         //if(conf.npage*BY2PG >= 128*MB)
1497         return Nchans * 4;
1498         //  return Nchans;
1499 }